From 64a38e840ce5940253208eaba40265c73decc4ee Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Fri, 26 Jul 2019 18:33:01 -0400 Subject: SUNRPC: Track writers of the 'channel' file to improve cache_listeners_exist The sunrpc cache interface is susceptible to being fooled by a rogue process just reading a 'channel' file. If this happens the kernel may think a valid daemon exists to service the cache when it does not. For example, the following may fool the kernel: cat /proc/net/rpc/auth.unix.gid/channel Change the tracking of readers to writers when considering whether a listener exists as all valid daemon processes either open a channel file O_RDWR or O_WRONLY. While this does not prevent a rogue process from "stealing" a message from the kernel, it does at least improve the kernels perception of whether a valid process servicing the cache exists. Signed-off-by: Dave Wysochanski Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index c7f38e897174..f7d086b77a21 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -107,9 +107,9 @@ struct cache_detail { /* fields for communication over channel */ struct list_head queue; - atomic_t readers; /* how many time is /chennel open */ - time_t last_close; /* if no readers, when did last close */ - time_t last_warn; /* when we last warned about no readers */ + atomic_t writers; /* how many time is /channel open */ + time_t last_close; /* if no writers, when did last close */ + time_t last_warn; /* when we last warned about no writers */ union { struct proc_dir_entry *procfs; -- cgit v1.2.3 From d6dfe43ec6062beea5ba1172b957e74a13c95b86 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 16 Aug 2019 17:48:36 -0400 Subject: svcrdma: Remove svc_rdma_wq Clean up: the system workqueue will work just as well. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 981f0d726ad4..edb39900fe04 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -200,7 +200,6 @@ extern struct svc_xprt_class svc_rdma_bc_class; #endif /* svc_rdma.c */ -extern struct workqueue_struct *svc_rdma_wq; extern int svc_rdma_init(void); extern void svc_rdma_cleanup(void); -- cgit v1.2.3 From 4866073e6ddf03066c925d3237903d7f4ca68982 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 16 Aug 2019 17:49:38 -0400 Subject: svcrdma: Use llist for managing cache of recv_ctxts Use a wait-free mechanism for managing the svc_rdma_recv_ctxts free list. Subsequently, sc_recv_lock can be eliminated. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index edb39900fe04..40f65888dd38 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -42,6 +42,7 @@ #ifndef SVC_RDMA_H #define SVC_RDMA_H +#include #include #include #include @@ -107,8 +108,7 @@ struct svcxprt_rdma { struct list_head sc_read_complete_q; struct work_struct sc_work; - spinlock_t sc_recv_lock; - struct list_head sc_recv_ctxts; + struct llist_head sc_recv_ctxts; }; /* sc_flags */ #define RDMAXPRT_CONN_PENDING 3 @@ -125,6 +125,7 @@ enum { #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD struct svc_rdma_recv_ctxt { + struct llist_node rc_node; struct list_head rc_list; struct ib_recv_wr rc_recv_wr; struct ib_cqe rc_cqe; -- cgit v1.2.3 From f69d6d8eef7807f8d937b81da24bebd2e926e4d2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sun, 18 Aug 2019 14:18:44 -0400 Subject: sunrpc: add a new cache_detail operation for when a cache is flushed When the exports table is changed, exportfs will usually write a new time to the "flush" file in the nfsd.export cache procfile. This tells the kernel to flush any entries that are older than that value. This gives us a mechanism to tell whether an unexport might have occurred. Add a new ->flush cache_detail operation that is called after flushing the cache whenever someone writes to a "flush" file. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index f7d086b77a21..f8603724fbee 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -87,6 +87,7 @@ struct cache_detail { int has_died); struct cache_head * (*alloc)(void); + void (*flush)(void); int (*match)(struct cache_head *orig, struct cache_head *new); void (*init)(struct cache_head *orig, struct cache_head *new); void (*update)(struct cache_head *orig, struct cache_head *new); -- cgit v1.2.3 From 18f6622ebbdea56a83f8e553c159ce2d62d3ad0c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sun, 18 Aug 2019 14:18:45 -0400 Subject: locks: create a new notifier chain for lease attempts With the new file caching infrastructure in nfsd, we can end up holding files open for an indefinite period of time, even when they are still idle. This may prevent the kernel from handing out leases on the file, which is something we don't want to block. Fix this by running a SRCU notifier call chain whenever on any lease attempt. nfsd can then purge the cache for that inode before returning. Since SRCU is only conditionally compiled in, we must only define the new chain if it's enabled, and users of the chain must ensure that SRCU is enabled. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 56b8e358af5c..0f106c7f4bb9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1155,6 +1155,11 @@ extern void lease_get_mtime(struct inode *, struct timespec64 *time); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); extern int vfs_setlease(struct file *, long, struct file_lock **, void **); extern int lease_modify(struct file_lock *, int, struct list_head *); + +struct notifier_block; +extern int lease_register_notifier(struct notifier_block *); +extern void lease_unregister_notifier(struct notifier_block *); + struct files_struct; extern void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files); -- cgit v1.2.3 From b72679ee89a0a0ecd26f7b6fcae96cdaababff94 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 18 Aug 2019 14:18:46 -0400 Subject: notify: export symbols for use by the knfsd file cache The knfsd file cache will need to detect when files are unlinked, so that it can close the associated cached files. Export a minimal set of notifier functions to allow it to do so. Signed-off-by: Trond Myklebust Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/fsnotify_backend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 2de3b2ddd19a..1915bdba2fad 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -475,6 +475,8 @@ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, extern void fsnotify_detach_mark(struct fsnotify_mark *mark); /* free mark */ extern void fsnotify_free_mark(struct fsnotify_mark *mark); +/* Wait until all marks queued for destruction are destroyed */ +extern void fsnotify_wait_marks_destroyed(void); /* run all the marks in a group, and clear all of the marks attached to given object type */ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type); /* run all the marks in a group, and clear all of the vfsmount marks */ -- cgit v1.2.3 From 11a60d159259dbadf9188534b508e5003769af61 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Mon, 9 Sep 2019 16:10:30 -0400 Subject: nfsd: add a "GetVersion" upcall for nfsdcld Add a "GetVersion" upcall to allow nfsd to determine the maximum upcall version that the nfsdcld userspace daemon supports. If the daemon responds with -EOPNOTSUPP, then we know it only supports v1. Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- include/uapi/linux/nfsd/cld.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nfsd/cld.h b/include/uapi/linux/nfsd/cld.h index b1e9de4f07d5..c5aad16d10c0 100644 --- a/include/uapi/linux/nfsd/cld.h +++ b/include/uapi/linux/nfsd/cld.h @@ -36,7 +36,8 @@ enum cld_command { Cld_Remove, /* remove record of this cm_id */ Cld_Check, /* is this cm_id allowed? */ Cld_GraceDone, /* grace period is complete */ - Cld_GraceStart, + Cld_GraceStart, /* grace start (upload client records) */ + Cld_GetVersion, /* query max supported upcall version */ }; /* representation of long-form NFSv4 client ID */ @@ -54,7 +55,15 @@ struct cld_msg { union { __s64 cm_gracetime; /* grace period start time */ struct cld_name cm_name; + __u8 cm_version; /* for getting max version */ } __attribute__((packed)) cm_u; } __attribute__((packed)); +struct cld_msg_hdr { + __u8 cm_vers; /* upcall version */ + __u8 cm_cmd; /* upcall command */ + __s16 cm_status; /* return code */ + __u32 cm_xid; /* transaction id */ +} __attribute__((packed)); + #endif /* !_NFSD_CLD_H */ -- cgit v1.2.3 From 6ee95d1c899186c0798cafd25998d436bcdb9618 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Mon, 9 Sep 2019 16:10:31 -0400 Subject: nfsd: add support for upcall version 2 Version 2 upcalls will allow the nfsd to include a hash of the kerberos principal string in the Cld_Create upcall. If a principal is present in the svc_cred, then the hash will be included in the Cld_Create upcall. We attempt to use the svc_cred.cr_raw_principal (which is returned by gssproxy) first, and then fall back to using the svc_cred.cr_principal (which is returned by both gssproxy and rpc.svcgssd). Upon a subsequent restart, the hash will be returned in the Cld_Gracestart downcall and stored in the reclaim_str_hashtbl so it can be used when handling reclaim opens. Signed-off-by: Scott Mayhew Signed-off-by: J. Bruce Fields --- include/uapi/linux/nfsd/cld.h | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nfsd/cld.h b/include/uapi/linux/nfsd/cld.h index c5aad16d10c0..a519313af953 100644 --- a/include/uapi/linux/nfsd/cld.h +++ b/include/uapi/linux/nfsd/cld.h @@ -26,11 +26,15 @@ #include /* latest upcall version available */ -#define CLD_UPCALL_VERSION 1 +#define CLD_UPCALL_VERSION 2 /* defined by RFC3530 */ #define NFS4_OPAQUE_LIMIT 1024 +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + enum cld_command { Cld_Create, /* create a record for this cm_id */ Cld_Remove, /* remove record of this cm_id */ @@ -46,6 +50,17 @@ struct cld_name { unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */ } __attribute__((packed)); +/* sha256 hash of the kerberos principal */ +struct cld_princhash { + __u8 cp_len; /* length of cp_data */ + unsigned char cp_data[SHA256_DIGEST_SIZE]; /* hash of principal */ +} __attribute__((packed)); + +struct cld_clntinfo { + struct cld_name cc_name; + struct cld_princhash cc_princhash; +} __attribute__((packed)); + /* message struct for communication with userspace */ struct cld_msg { __u8 cm_vers; /* upcall version */ @@ -59,6 +74,19 @@ struct cld_msg { } __attribute__((packed)) cm_u; } __attribute__((packed)); +/* version 2 message can include hash of kerberos principal */ +struct cld_msg_v2 { + __u8 cm_vers; /* upcall version */ + __u8 cm_cmd; /* upcall command */ + __s16 cm_status; /* return code */ + __u32 cm_xid; /* transaction id */ + union { + struct cld_name cm_name; + __u8 cm_version; /* for getting max version */ + struct cld_clntinfo cm_clntinfo; /* name & princ hash */ + } __attribute__((packed)) cm_u; +} __attribute__((packed)); + struct cld_msg_hdr { __u8 cm_vers; /* upcall version */ __u8 cm_cmd; /* upcall command */ -- cgit v1.2.3