From b9376c7e42ca22644085332fd450b153cd4e9bde Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 18 Oct 2024 14:45:01 -0400 Subject: nfsd: new tracepoint for after op_func in compound processing Turn nfsd_compound_encode_err tracepoint into a class and add a new nfsd_compound_op_err tracepoint. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfsd/nfs4proc.c') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d32f2dfd148f..93089f7064f0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2780,6 +2780,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (op->opdesc->op_get_currentstateid) op->opdesc->op_get_currentstateid(cstate, &op->u); op->status = op->opdesc->op_func(rqstp, cstate, &op->u); + trace_nfsd_compound_op_err(rqstp, op->opnum, op->status); /* Only from SEQUENCE */ if (cstate->status == nfserr_replay_cache) { -- cgit v1.2.3 From 600020927b004f027e737e6bf57c450d48f2405e Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Sat, 5 Oct 2024 20:33:49 +0200 Subject: nfsd: Fill NFSv4.1 server implementation fields in OP_EXCHANGE_ID response MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NFSv4.1 OP_EXCHANGE_ID response from server may contain server implementation details (domain, name and build time) in optional nfs_impl_id4 field. Currently nfsd does not fill this field. Send these information in NFSv4.1 OP_EXCHANGE_ID response. Fill them with the same values as what is Linux NFSv4.1 client doing. Domain is hardcoded to "kernel.org", name is composed in the same way as "uname -srvm" output and build time is hardcoded to zeros. NFSv4.1 client and server implementation fields are useful for statistic purposes or for identifying type of clients and servers. Signed-off-by: Pali Rohár Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 1 + fs/nfsd/nfs4state.c | 31 +++++++++++++++++++++++++++++++ fs/nfsd/nfs4xdr.c | 24 +++++++++++++++++++++++- fs/nfsd/xdr4.h | 2 ++ 4 files changed, 57 insertions(+), 1 deletion(-) (limited to 'fs/nfsd/nfs4proc.c') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 93089f7064f0..49b4cbfe914d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -3453,6 +3453,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { /* NFSv4.1 operations */ [OP_EXCHANGE_ID] = { .op_func = nfsd4_exchange_id, + .op_release = nfsd4_exchange_id_release, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP | OP_MODIFIES_SOMETHING, .op_name = "OP_EXCHANGE_ID", diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e0862173cd9e..2f229d007b58 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3524,6 +3524,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __func__, rqstp, exid, exid->clname.len, exid->clname.data, addr_str, exid->flags, exid->spa_how); + exid->server_impl_name = kasprintf(GFP_KERNEL, "%s %s %s %s", + utsname()->sysname, utsname()->release, + utsname()->version, utsname()->machine); + if (!exid->server_impl_name) + return nfserr_jukebox; + if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; @@ -3661,6 +3667,23 @@ out_copy: exid->seqid = conf->cl_cs_slot.sl_seqid + 1; nfsd4_set_ex_flags(conf, exid); + exid->nii_domain.len = sizeof("kernel.org") - 1; + exid->nii_domain.data = "kernel.org"; + + /* + * Note that RFC 8881 places no length limit on + * nii_name, but this implementation permits no + * more than NFS4_OPAQUE_LIMIT bytes. + */ + exid->nii_name.len = strlen(exid->server_impl_name); + if (exid->nii_name.len > NFS4_OPAQUE_LIMIT) + exid->nii_name.len = NFS4_OPAQUE_LIMIT; + exid->nii_name.data = exid->server_impl_name; + + /* just send zeros - the date is in nii_name */ + exid->nii_time.tv_sec = 0; + exid->nii_time.tv_nsec = 0; + dprintk("nfsd4_exchange_id seqid %d flags %x\n", conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags); status = nfs_ok; @@ -3677,6 +3700,14 @@ out_nolock: return status; } +void +nfsd4_exchange_id_release(union nfsd4_op_u *u) +{ + struct nfsd4_exchange_id *exid = &u->exchange_id; + + kfree(exid->server_impl_name); +} + static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse) { /* The slot is in use, and no response has been sent. */ diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index abbfd2b58c82..37f8301cfb8a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4825,6 +4825,25 @@ nfsd4_encode_server_owner4(struct xdr_stream *xdr, struct svc_rqst *rqstp) return nfsd4_encode_opaque(xdr, nn->nfsd_name, strlen(nn->nfsd_name)); } +static __be32 +nfsd4_encode_nfs_impl_id4(struct xdr_stream *xdr, struct nfsd4_exchange_id *exid) +{ + __be32 status; + + /* nii_domain */ + status = nfsd4_encode_opaque(xdr, exid->nii_domain.data, + exid->nii_domain.len); + if (status != nfs_ok) + return status; + /* nii_name */ + status = nfsd4_encode_opaque(xdr, exid->nii_name.data, + exid->nii_name.len); + if (status != nfs_ok) + return status; + /* nii_time */ + return nfsd4_encode_nfstime4(xdr, &exid->nii_time); +} + static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) @@ -4859,8 +4878,11 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr != nfs_ok) return nfserr; /* eir_server_impl_id<1> */ - if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT) + if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT) return nfserr_resource; + nfserr = nfsd4_encode_nfs_impl_id4(xdr, exid); + if (nfserr != nfs_ok) + return nfserr; return nfs_ok; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 2a21a7662e03..d5bb9a3c2da4 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -567,6 +567,7 @@ struct nfsd4_exchange_id { struct xdr_netobj nii_domain; struct xdr_netobj nii_name; struct timespec64 nii_time; + char *server_impl_name; }; struct nfsd4_sequence { @@ -930,6 +931,7 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); +void nfsd4_exchange_id_release(union nfsd4_op_u *u); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, -- cgit v1.2.3 From bb4f07f2409c26c01e97e6f9b432545f353e3b66 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Sat, 5 Oct 2024 18:40:39 +0200 Subject: nfsd: Fix NFSD_MAY_BYPASS_GSS and NFSD_MAY_BYPASS_GSS_ON_ROOT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently NFSD_MAY_BYPASS_GSS and NFSD_MAY_BYPASS_GSS_ON_ROOT do not bypass only GSS, but bypass any method. This is a problem specially for NFS3 AUTH_NULL-only exports. The purpose of NFSD_MAY_BYPASS_GSS_ON_ROOT is described in RFC 2623, section 2.3.2, to allow mounting NFS2/3 GSS-only export without authentication. So few procedures which do not expose security risk used during mount time can be called also with AUTH_NONE or AUTH_SYS, to allow client mount operation to finish successfully. The problem with current implementation is that for AUTH_NULL-only exports, the NFSD_MAY_BYPASS_GSS_ON_ROOT is active also for NFS3 AUTH_UNIX mount attempts which confuse NFS3 clients, and make them think that AUTH_UNIX is enabled and is working. Linux NFS3 client never switches from AUTH_UNIX to AUTH_NONE on active mount, which makes the mount inaccessible. Fix the NFSD_MAY_BYPASS_GSS and NFSD_MAY_BYPASS_GSS_ON_ROOT implementation and really allow to bypass only exports which have enabled some real authentication (GSS, TLS, or any other). The result would be: For AUTH_NULL-only export if client attempts to do mount with AUTH_UNIX flavor then it will receive access errors, which instruct client that AUTH_UNIX flavor is not usable and will either try other auth flavor (AUTH_NULL if enabled) or fails mount procedure. Similarly if client attempt to do mount with AUTH_NULL flavor and only AUTH_UNIX flavor is enabled then the client will receive access error. This should fix problems with AUTH_NULL-only or AUTH_UNIX-only exports if client attempts to mount it with other auth flavor (e.g. with AUTH_NULL for AUTH_UNIX-only export, or with AUTH_UNIX for AUTH_NULL-only export). Signed-off-by: Pali Rohár Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/export.c | 21 ++++++++++++++++++++- fs/nfsd/export.h | 3 ++- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4xdr.c | 2 +- fs/nfsd/nfsfh.c | 9 ++++++--- fs/nfsd/vfs.c | 2 +- 6 files changed, 31 insertions(+), 8 deletions(-) (limited to 'fs/nfsd/nfs4proc.c') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c82d8e3e0d4f..0f40003d864f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1078,12 +1078,14 @@ static struct svc_export *exp_find(struct cache_detail *cd, * check_nfsd_access - check if access to export is allowed. * @exp: svc_export that is being accessed. * @rqstp: svc_rqst attempting to access @exp (will be NULL for LOCALIO). + * @may_bypass_gss: reduce strictness of authorization check * * Return values: * %nfs_ok if access is granted, or * %nfserr_wrongsec if access is denied */ -__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp, + bool may_bypass_gss) { struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors; struct svc_xprt *xprt; @@ -1140,6 +1142,23 @@ ok: if (nfsd4_spo_must_allow(rqstp)) return nfs_ok; + /* Some calls may be processed without authentication + * on GSS exports. For example NFS2/3 calls on root + * directory, see section 2.3.2 of rfc 2623. + * For "may_bypass_gss" check that export has really + * enabled some flavor with authentication (GSS or any + * other) and also check that the used auth flavor is + * without authentication (none or sys). + */ + if (may_bypass_gss && ( + rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL || + rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)) { + for (f = exp->ex_flavors; f < end; f++) { + if (f->pseudoflavor >= RPC_AUTH_DES) + return 0; + } + } + denied: return nfserr_wrongsec; } diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index 3794ae253a70..4d92b99c1ffd 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -101,7 +101,8 @@ struct svc_expkey { struct svc_cred; int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp); -__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp, + bool may_bypass_gss); /* * Function declarations diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 49b4cbfe914d..94cc7b2ac385 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2797,7 +2797,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (current_fh->fh_export && need_wrongsec_check(rqstp)) - op->status = check_nfsd_access(current_fh->fh_export, rqstp); + op->status = check_nfsd_access(current_fh->fh_export, rqstp, false); } encode_op: if (op->status == nfserr_replay_me) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 37f8301cfb8a..5e6ef24d5450 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3767,7 +3767,7 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name, nfserr = nfserrno(err); goto out_put; } - nfserr = check_nfsd_access(exp, cd->rd_rqstp); + nfserr = check_nfsd_access(exp, cd->rd_rqstp, false); if (nfserr) goto out_put; diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 96e19c50a5d7..cbb046f88eec 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -320,6 +320,7 @@ __fh_verify(struct svc_rqst *rqstp, { struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_export *exp = NULL; + bool may_bypass_gss = false; struct dentry *dentry; __be32 error; @@ -367,8 +368,10 @@ __fh_verify(struct svc_rqst *rqstp, * which clients virtually always use auth_sys for, * even while using RPCSEC_GSS for NFS. */ - if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) + if (access & NFSD_MAY_LOCK) goto skip_pseudoflavor_check; + if (access & NFSD_MAY_BYPASS_GSS) + may_bypass_gss = true; /* * Clients may expect to be able to use auth_sys during mount, * even if they use gss for everything else; see section 2.3.2 @@ -376,9 +379,9 @@ __fh_verify(struct svc_rqst *rqstp, */ if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT && exp->ex_path.dentry == dentry) - goto skip_pseudoflavor_check; + may_bypass_gss = true; - error = check_nfsd_access(exp, rqstp); + error = check_nfsd_access(exp, rqstp, may_bypass_gss); if (error) goto out; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e0692401da33..1f8540148c4a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -321,7 +321,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); if (err) return err; - err = check_nfsd_access(exp, rqstp); + err = check_nfsd_access(exp, rqstp, false); if (err) goto out; /* -- cgit v1.2.3 From a4452e661bc8ee56b2a893df6f523607c63f6de8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:02 -0400 Subject: NFSD: Add a tracepoint to record canceled async COPY operations Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 1 + fs/nfsd/trace.h | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'fs/nfsd/nfs4proc.c') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 94cc7b2ac385..ca383ebc1790 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1287,6 +1287,7 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) static void nfsd4_stop_copy(struct nfsd4_copy *copy) { + trace_nfsd_copy_async_cancel(copy); if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) kthread_stop(copy->copy_task); nfs4_put_copy(copy); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index f318898cfc31..6b7bf8129e49 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -2244,7 +2244,7 @@ TRACE_EVENT(nfsd_copy_done, ) ); -TRACE_EVENT(nfsd_copy_async_done, +DECLARE_EVENT_CLASS(nfsd_copy_async_done_class, TP_PROTO( const struct nfsd4_copy *copy ), @@ -2313,6 +2313,15 @@ TRACE_EVENT(nfsd_copy_async_done, ) ); +#define DEFINE_COPY_ASYNC_DONE_EVENT(name) \ +DEFINE_EVENT(nfsd_copy_async_done_class, \ + nfsd_copy_async_##name, \ + TP_PROTO(const struct nfsd4_copy *copy), \ + TP_ARGS(copy)) + +DEFINE_COPY_ASYNC_DONE_EVENT(done); +DEFINE_COPY_ASYNC_DONE_EVENT(cancel); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 62a8642ba00aa8ceb0a02ade942f5ec52e877c95 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:03 -0400 Subject: NFSD: Fix nfsd4_shutdown_copy() nfsd4_shutdown_copy() is just this: while ((copy = nfsd4_get_copy(clp)) != NULL) nfsd4_stop_copy(copy); nfsd4_get_copy() bumps @copy's reference count, preventing nfsd4_stop_copy() from releasing @copy. A while loop like this usually works by removing the first element of the list, but neither nfsd4_get_copy() nor nfsd4_stop_copy() alters the async_copies list. Best I can tell, then, is that nfsd4_shutdown_copy() continues to loop until other threads manage to remove all the items from this list. The spinning loop blocks shutdown until these items are gone. Possibly the reason we haven't seen this issue in the field is because client_has_state() prevents __destroy_client() from calling nfsd4_shutdown_copy() if there are any items on this list. In a subsequent patch I plan to remove that restriction. Fixes: e0639dc5805a ("NFSD introduce async copy feature") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/nfsd/nfs4proc.c') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ca383ebc1790..5ad55b734541 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1293,7 +1293,7 @@ static void nfsd4_stop_copy(struct nfsd4_copy *copy) nfs4_put_copy(copy); } -static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp) +static struct nfsd4_copy *nfsd4_unhash_copy(struct nfs4_client *clp) { struct nfsd4_copy *copy = NULL; @@ -1302,6 +1302,9 @@ static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp) copy = list_first_entry(&clp->async_copies, struct nfsd4_copy, copies); refcount_inc(©->refcount); + copy->cp_clp = NULL; + if (!list_empty(©->copies)) + list_del_init(©->copies); } spin_unlock(&clp->async_lock); return copy; @@ -1311,7 +1314,7 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp) { struct nfsd4_copy *copy; - while ((copy = nfsd4_get_copy(clp)) != NULL) + while ((copy = nfsd4_unhash_copy(clp)) != NULL) nfsd4_stop_copy(copy); } #ifdef CONFIG_NFSD_V4_2_INTER_SSC -- cgit v1.2.3 From 409d6f52bd6b4fb43fbb4db9daeb5022e2e1d00c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:04 -0400 Subject: NFSD: Free async copy information in nfsd4_cb_offload_release() RFC 7862 Section 4.8 states: > A copy offload stateid will be valid until either (A) the client > or server restarts or (B) the client returns the resource by > issuing an OFFLOAD_CANCEL operation or the client replies to a > CB_OFFLOAD operation. Currently, NFSD purges the metadata for an async COPY operation as soon as the CB_OFFLOAD callback has been sent. It does not wait even for the client's CB_OFFLOAD response, as the paragraph above suggests that it should. This makes the OFFLOAD_STATUS operation ineffective during the window between the completion of an asynchronous COPY and the server's receipt of the corresponding CB_OFFLOAD response. This is important if, for example, the client responds with NFS4ERR_DELAY, or the transport is lost before the server receives the response. A client might use OFFLOAD_STATUS to query the server about the still pending asynchronous COPY, but NFSD will respond to OFFLOAD_STATUS as if it had never heard of the presented copy stateid. This patch starts to address this issue by extending the lifetime of struct nfsd4_copy at least until the server has seen the client's CB_OFFLOAD response, or the CB_OFFLOAD has timed out. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 17 ++++++++++------- fs/nfsd/xdr4.h | 3 +++ 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'fs/nfsd/nfs4proc.c') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5ad55b734541..5c1013141095 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -57,6 +57,8 @@ module_param(inter_copy_offload_enable, bool, 0644); MODULE_PARM_DESC(inter_copy_offload_enable, "Enable inter server to server copy offload. Default: false"); +static void cleanup_async_copy(struct nfsd4_copy *copy); + #ifdef CONFIG_NFSD_V4_2_INTER_SSC static int nfsd4_ssc_umount_timeout = 900000; /* default to 15 mins */ module_param(nfsd4_ssc_umount_timeout, int, 0644); @@ -1602,8 +1604,10 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { struct nfsd4_cb_offload *cbo = container_of(cb, struct nfsd4_cb_offload, co_cb); + struct nfsd4_copy *copy = + container_of(cbo, struct nfsd4_copy, cp_cb_offload); - kfree(cbo); + cleanup_async_copy(copy); } static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, @@ -1736,11 +1740,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) static void nfsd4_send_cb_offload(struct nfsd4_copy *copy) { - struct nfsd4_cb_offload *cbo; - - cbo = kzalloc(sizeof(*cbo), GFP_KERNEL); - if (!cbo) - return; + struct nfsd4_cb_offload *cbo = ©->cp_cb_offload; memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res)); memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh)); @@ -1790,10 +1790,13 @@ static int nfsd4_do_async_copy(void *data) } do_callback: + /* The kthread exits forthwith. Ensure that a subsequent + * OFFLOAD_CANCEL won't try to kill it again. */ + set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags); + set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); trace_nfsd_copy_async_done(copy); nfsd4_send_cb_offload(copy); - cleanup_async_copy(copy); return 0; } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index d5bb9a3c2da4..08e5d280f887 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -700,6 +700,9 @@ struct nfsd4_copy { struct nfsd42_write_res cp_res; struct knfsd_fh fh; + /* offload callback */ + struct nfsd4_cb_offload cp_cb_offload; + struct nfs4_client *cp_clp; struct nfsd_file *nf_src; -- cgit v1.2.3 From 5c41f321470a5400641d3a271014ddd9b9868373 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:05 -0400 Subject: NFSD: Handle an NFS4ERR_DELAY response to CB_OFFLOAD RFC 7862 permits callback services to respond to CB_OFFLOAD with NFS4ERR_DELAY. Currently NFSD drops the CB_OFFLOAD in that case. To improve the reliability of COPY offload, NFSD should rather send another CB_OFFLOAD completion notification. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 8 ++++++++ fs/nfsd/xdr4.h | 1 + 2 files changed, 9 insertions(+) (limited to 'fs/nfsd/nfs4proc.c') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5c1013141095..54db3979605f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1617,6 +1617,13 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, container_of(cb, struct nfsd4_cb_offload, co_cb); trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task); + switch (task->tk_status) { + case -NFS4ERR_DELAY: + if (cbo->co_retries--) { + rpc_delay(task, 1 * HZ); + return 0; + } + } return 1; } @@ -1745,6 +1752,7 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy) memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res)); memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh)); cbo->co_nfserr = copy->nfserr; + cbo->co_retries = 5; nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 08e5d280f887..550f7e064f2b 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -676,6 +676,7 @@ struct nfsd4_cb_offload { struct nfsd4_callback co_cb; struct nfsd42_write_res co_res; __be32 co_nfserr; + unsigned int co_retries; struct knfsd_fh co_fh; }; -- cgit v1.2.3 From b44ffa4c4f57ffe8a0967963538689fed169f1c8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:06 -0400 Subject: NFSD: Block DESTROY_CLIENTID only when there are ongoing async COPY operations Currently __destroy_client() consults the nfs4_client's async_copies list to determine whether there are ongoing async COPY operations. However, NFSD now keeps copy state in that list even when the async copy has completed, to enable OFFLOAD_STATUS to find the COPY results for a while after the COPY has completed. DESTROY_CLIENTID should not be blocked if the client's async_copies list contains state for only completed copy operations. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 30 ++++++++++++++++++++++++++++++ fs/nfsd/nfs4state.c | 2 +- fs/nfsd/state.h | 1 + 3 files changed, 32 insertions(+), 1 deletion(-) (limited to 'fs/nfsd/nfs4proc.c') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 54db3979605f..67349eca55b1 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1278,6 +1278,36 @@ out: return status; } +/** + * nfsd4_has_active_async_copies - Check for ongoing copy operations + * @clp: Client to be checked + * + * NFSD maintains state for async COPY operations after they complete, + * and this state remains in the nfs4_client's async_copies list. + * Ongoing copies should block the destruction of the nfs4_client, but + * completed copies should not. + * + * Return values: + * %true: At least one active async COPY is ongoing + * %false: No active async COPY operations were found + */ +bool nfsd4_has_active_async_copies(struct nfs4_client *clp) +{ + struct nfsd4_copy *copy; + bool result = false; + + spin_lock(&clp->async_lock); + list_for_each_entry(copy, &clp->async_copies, copies) { + if (!test_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags) && + !test_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) { + result = true; + break; + } + } + spin_unlock(&clp->async_lock); + return result; +} + static void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a334f0a0e50a..b8bedc6a157f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3472,7 +3472,7 @@ static bool client_has_state(struct nfs4_client *clp) #endif || !list_empty(&clp->cl_delegations) || !list_empty(&clp->cl_sessions) - || !list_empty(&clp->async_copies); + || nfsd4_has_active_async_copies(clp); } static __be32 copy_impl_id(struct nfs4_client *clp, diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 2f735492cf6c..4dd7ed7ae052 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -738,6 +738,7 @@ extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, extern bool nfsd4_run_cb(struct nfsd4_callback *cb); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); +bool nfsd4_has_active_async_copies(struct nfs4_client *clp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); -- cgit v1.2.3 From ac0514f4d198b5d1d5ba367b122cdf5a68e711d4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:07 -0400 Subject: NFSD: Add a laundromat reaper for async copy state RFC 7862 Section 4.8 states: > A copy offload stateid will be valid until either (A) the client > or server restarts or (B) the client returns the resource by > issuing an OFFLOAD_CANCEL operation or the client replies to a > CB_OFFLOAD operation. Instead of releasing async copy state when the CB_OFFLOAD callback completes, now let it live until the next laundromat run after the callback completes. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 35 ++++++++++++++++++++++++++++++++++- fs/nfsd/nfs4state.c | 1 + fs/nfsd/state.h | 1 + fs/nfsd/xdr4.h | 1 + 4 files changed, 37 insertions(+), 1 deletion(-) (limited to 'fs/nfsd/nfs4proc.c') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 67349eca55b1..dfc118bf9aa5 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1308,6 +1308,39 @@ bool nfsd4_has_active_async_copies(struct nfs4_client *clp) return result; } +/** + * nfsd4_async_copy_reaper - Purge completed copies + * @nn: Network namespace with possible active copy information + */ +void nfsd4_async_copy_reaper(struct nfsd_net *nn) +{ + struct nfs4_client *clp; + struct nfsd4_copy *copy; + LIST_HEAD(reaplist); + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + struct list_head *pos, *next; + + spin_lock(&clp->async_lock); + list_for_each_safe(pos, next, &clp->async_copies) { + copy = list_entry(pos, struct nfsd4_copy, copies); + if (test_bit(NFSD4_COPY_F_OFFLOAD_DONE, ©->cp_flags)) { + list_del_init(©->copies); + list_add(©->copies, &reaplist); + } + } + spin_unlock(&clp->async_lock); + } + spin_unlock(&nn->client_lock); + + while (!list_empty(&reaplist)) { + copy = list_first_entry(&reaplist, struct nfsd4_copy, copies); + list_del_init(©->copies); + cleanup_async_copy(copy); + } +} + static void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) @@ -1637,7 +1670,7 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) struct nfsd4_copy *copy = container_of(cbo, struct nfsd4_copy, cp_cb_offload); - cleanup_async_copy(copy); + set_bit(NFSD4_COPY_F_OFFLOAD_DONE, ©->cp_flags); } static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b8bedc6a157f..10aa7732e914 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6574,6 +6574,7 @@ nfs4_laundromat(struct nfsd_net *nn) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); + nfsd4_async_copy_reaper(nn); nfs4_get_client_reaplist(nn, &reaplist, <); nfs4_process_client_reaplist(&reaplist); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4dd7ed7ae052..dcbebd53e5f4 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -738,6 +738,7 @@ extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, extern bool nfsd4_run_cb(struct nfsd4_callback *cb); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); +void nfsd4_async_copy_reaper(struct nfsd_net *nn); bool nfsd4_has_active_async_copies(struct nfs4_client *clp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 550f7e064f2b..5951360f6d03 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -695,6 +695,7 @@ struct nfsd4_copy { #define NFSD4_COPY_F_SYNCHRONOUS (2) #define NFSD4_COPY_F_COMMITTED (3) #define NFSD4_COPY_F_COMPLETED (4) +#define NFSD4_COPY_F_OFFLOAD_DONE (5) /* response */ __be32 nfserr; -- cgit v1.2.3 From aa0ebd21df9c90a69f8bf00e3fa49d476be8e290 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 31 Oct 2024 09:40:08 -0400 Subject: NFSD: Add nfsd4_copy time-to-live Keep async copy state alive for a few lease cycles after the copy completes so that OFFLOAD_STATUS returns something meaningful. This means that NFSD's client shutdown processing needs to purge any of this state that happens to be waiting to die. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 7 +++++-- fs/nfsd/state.h | 15 +++++++++++++++ fs/nfsd/xdr4.h | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'fs/nfsd/nfs4proc.c') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index dfc118bf9aa5..f8a10f90bc7a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1326,8 +1326,10 @@ void nfsd4_async_copy_reaper(struct nfsd_net *nn) list_for_each_safe(pos, next, &clp->async_copies) { copy = list_entry(pos, struct nfsd4_copy, copies); if (test_bit(NFSD4_COPY_F_OFFLOAD_DONE, ©->cp_flags)) { - list_del_init(©->copies); - list_add(©->copies, &reaplist); + if (--copy->cp_ttl) { + list_del_init(©->copies); + list_add(©->copies, &reaplist); + } } } spin_unlock(&clp->async_lock); @@ -1921,6 +1923,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, async_copy->cp_nn = nn; INIT_LIST_HEAD(&async_copy->copies); refcount_set(&async_copy->refcount, 1); + async_copy->cp_ttl = NFSD_COPY_INITIAL_TTL; /* Arbitrary cap on number of pending async copy operations */ if (atomic_inc_return(&nn->pending_async_copies) > (int)rqstp->rq_pool->sp_nrthreads) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index dcbebd53e5f4..8092894e8f3c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -137,6 +137,21 @@ struct nfs4_cpntf_state { time64_t cpntf_time; /* last time stateid used */ }; +/* + * RFC 7862 Section 4.8 states: + * + * | A copy offload stateid will be valid until either (A) the client + * | or server restarts or (B) the client returns the resource by + * | issuing an OFFLOAD_CANCEL operation or the client replies to a + * | CB_OFFLOAD operation. + * + * Because a client might not reply to a CB_OFFLOAD, or a reply + * might get lost due to connection loss, NFSD purges async copy + * state after a short period to prevent it from accumulating + * over time. + */ +#define NFSD_COPY_INITIAL_TTL 10 + struct nfs4_cb_fattr { struct nfsd4_callback ncf_getattr; u32 ncf_cb_status; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 5951360f6d03..382cc1389396 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -715,6 +715,7 @@ struct nfsd4_copy { struct list_head copies; struct task_struct *copy_task; refcount_t refcount; + unsigned int cp_ttl; struct nfsd4_ssc_umount_item *ss_nsui; struct nfs_fh c_fh; -- cgit v1.2.3