From a76580fbf09e6e19c2040c08969af5137e064eda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 20 May 2013 23:00:18 -0400 Subject: SUNRPC: Fix a potential race in rpc_execute If the rpc_task is asynchronous, it could theoretically finish executing on the workqueue it was assigned by rpc_make_runnable() before we get round to testing RPC_IS_ASYNC() in rpc_execute. In practice, however, all the existing callers hold a reference to the rpc_task, so this can't happen today... Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 5356b120dbf8..849ca413522c 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -825,9 +825,11 @@ static void __rpc_execute(struct rpc_task *task) */ void rpc_execute(struct rpc_task *task) { + bool is_async = RPC_IS_ASYNC(task); + rpc_set_active(task); rpc_make_runnable(task); - if (!RPC_IS_ASYNC(task)) + if (!is_async) __rpc_execute(task); } -- cgit v1.2.3 From 0053a8e65c0b949fd230488e5be871755f3f860f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 21 May 2013 12:51:32 -0400 Subject: SUNRPC: Remove unused function rpc_queue_empty Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 849ca413522c..dcbd69cb1cbd 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -445,20 +445,6 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r } } -/* - * Tests whether rpc queue is empty - */ -int rpc_queue_empty(struct rpc_wait_queue *queue) -{ - int res; - - spin_lock_bh(&queue->lock); - res = queue->qlen; - spin_unlock_bh(&queue->lock); - return res == 0; -} -EXPORT_SYMBOL_GPL(rpc_queue_empty); - /* * Wake up a task on a specific queue */ -- cgit v1.2.3 From 9ec2ef53b92fdbb1b5f24af000fc2ba0b18221ea Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 May 2013 18:52:18 -0400 Subject: SUNRPC: Remove redundant call to rpc_set_running() in __rpc_execute() The RPC_TASK_RUNNING flag will always have been set in rpc_make_runnable() once we get past the test for out_of_line_wait_on_bit() returning ERESTARTSYS. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index dcbd69cb1cbd..b7b32c34c18d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -790,7 +790,6 @@ static void __rpc_execute(struct rpc_task *task) task->tk_flags |= RPC_TASK_KILLED; rpc_exit(task, -ERESTARTSYS); } - rpc_set_running(task); dprintk("RPC: %5u sync task resuming\n", task->tk_pid); } -- cgit v1.2.3 From e401452d923de5b27f61f707773ec38f5593d985 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Jun 2013 09:10:29 -0400 Subject: rpc_pipefs: only set rpc_dentry_ops if d_op isn't already set We had a report of a reproducible WARNING: [ 1360.039358] ------------[ cut here ]------------ [ 1360.043978] WARNING: at fs/dcache.c:1355 d_set_d_op+0x8d/0xc0() [ 1360.049880] Hardware name: HP Z200 Workstation [ 1360.054308] Modules linked in: nfsv4 nfs dns_resolver fscache nfsd auth_rpcgss nfs_acl lockd sunrpc sg acpi_cpufreq mperf coretemp kvm_intel kvm snd_hda_codec_realtek snd_hda_intel snd_hda_codec hp_wmi crc32c_intel snd_hwdep e1000e snd_seq snd_seq_device snd_pcm snd_page_alloc snd_timer snd sparse_keymap rfkill soundcore serio_raw ptp iTCO_wdt pps_core pcspkr iTCO_vendor_support mei microcode lpc_ich mfd_core wmi xfs libcrc32c sr_mod sd_mod cdrom crc_t10dif radeon i2c_algo_bit drm_kms_helper ttm ahci libahci drm i2c_core libata dm_mirror dm_region_hash dm_log dm_mod [last unloaded: auth_rpcgss] [ 1360.107406] Pid: 8814, comm: mount.nfs4 Tainted: G I -------------- 3.9.0-0.55.el7.x86_64 #1 [ 1360.116771] Call Trace: [ 1360.119219] [] warn_slowpath_common+0x70/0xa0 [ 1360.125208] [] warn_slowpath_null+0x1a/0x20 [ 1360.131025] [] d_set_d_op+0x8d/0xc0 [ 1360.136159] [] __rpc_lookup_create_exclusive+0x4f/0x80 [sunrpc] [ 1360.143710] [] rpc_mkpipe_dentry+0x86/0x170 [sunrpc] [ 1360.150311] [] nfs_idmap_new+0x96/0x130 [nfsv4] [ 1360.156475] [] nfs4_init_client+0xad/0x2d0 [nfsv4] [ 1360.162902] [] ? idr_get_empty_slot+0x16f/0x3c0 [ 1360.169062] [] ? idr_mark_full+0x52/0x60 [ 1360.174615] [] ? idr_alloc+0x79/0xe0 [ 1360.179826] [] ? __rpc_init_priority_wait_queue+0x81/0xc0 [sunrpc] [ 1360.187635] [] ? rpc_init_wait_queue+0x13/0x20 [sunrpc] [ 1360.194493] [] nfs_get_client+0x27a/0x350 [nfs] [ 1360.200666] [] nfs4_set_client.isra.8+0x78/0x100 [nfsv4] [ 1360.207624] [] nfs4_create_server+0xf3/0x3a0 [nfsv4] [ 1360.214222] [] nfs4_remote_mount+0x2e/0x60 [nfsv4] [ 1360.220644] [] mount_fs+0x39/0x1b0 [ 1360.225691] [] ? __alloc_percpu+0x10/0x20 [ 1360.231348] [] vfs_kern_mount+0x5f/0xf0 [ 1360.236822] [] nfs_do_root_mount+0x86/0xc0 [nfsv4] [ 1360.243246] [] nfs4_try_mount+0x44/0xc0 [nfsv4] [ 1360.249410] [] ? get_nfs_version+0x27/0x80 [nfs] [ 1360.255659] [] nfs_fs_mount+0x5c5/0xd10 [nfs] [ 1360.261650] [] ? nfs_clone_super+0x140/0x140 [nfs] [ 1360.268074] [] ? param_set_portnr+0x60/0x60 [nfs] [ 1360.274406] [] mount_fs+0x39/0x1b0 [ 1360.279443] [] ? __alloc_percpu+0x10/0x20 [ 1360.285088] [] vfs_kern_mount+0x5f/0xf0 [ 1360.290556] [] do_mount+0x1fd/0xa00 [ 1360.295677] [] ? __get_free_pages+0xe/0x50 [ 1360.301405] [] ? copy_mount_options+0x36/0x170 [ 1360.307479] [] sys_mount+0x83/0xc0 [ 1360.312515] [] system_call_fastpath+0x16/0x1b [ 1360.318503] ---[ end trace 8fa1f4cbc36094a7 ]--- The problem is that we're ending up in __rpc_lookup_create_exclusive with a negative dentry that already has d_op set. A little debugging has shown that when we hit this, the d_ops are already set to simple_dentry_operations. I believe that what's happening is that during a mount, idmapd is racing in and doing a lookup of /var/lib/nfs/rpc_pipefs/nfs/clnt???/idmap. Before that dentry reference is released, the kernel races in to create that file and finds the new negative dentry, which already has the d_op set. This patch just avoids setting the d_op if it's already set. simple_dentry_operations and rpc_dentry_operations are functionally equivalent so it shouldn't matter which one it's set to. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e7ce4b3eb0bd..a816b3a69059 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -667,7 +667,8 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, return ERR_PTR(-ENOMEM); } if (dentry->d_inode == NULL) { - d_set_d_op(dentry, &rpc_dentry_operations); + if (!dentry->d_op) + d_set_d_op(dentry, &rpc_dentry_operations); return dentry; } dput(dentry); -- cgit v1.2.3 From 384816051ca9125cd54750e59c780c2a2655fa4f Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 24 Jun 2013 11:52:38 +0400 Subject: SUNRPC: fix races on PipeFS MOUNT notifications Below are races, when RPC client can be created without PiepFS dentries CPU#0 CPU#1 ----------------------------- ----------------------------- rpc_new_client rpc_fill_super rpc_setup_pipedir mutex_lock(&sn->pipefs_sb_lock) rpc_get_sb_net == NULL (no per-net PipeFS superblock) sn->pipefs_sb = sb; notifier_call_chain(MOUNT) (client is not in the list) rpc_register_client (client without pipes dentries) To fix this patch: 1) makes PipeFS mount notification call with pipefs_sb_lock being held. 2) releases pipefs_sb_lock on new SUNRPC client creation only after registration. Signed-off-by: Stanislav Kinsbursky Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 26 +++++++++++++++----------- net/sunrpc/rpc_pipe.c | 3 +++ 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5a750b9c3640..b827a4b91975 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -157,20 +157,15 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, } static int -rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name) +rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name, + struct super_block *pipefs_sb) { - struct net *net = rpc_net_ns(clnt); - struct super_block *pipefs_sb; struct dentry *dentry; clnt->cl_dentry = NULL; if (dir_name == NULL) return 0; - pipefs_sb = rpc_get_sb_net(net); - if (!pipefs_sb) - return 0; dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); - rpc_put_sb_net(net); if (IS_ERR(dentry)) return PTR_ERR(dentry); clnt->cl_dentry = dentry; @@ -296,6 +291,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; int err; + struct super_block *pipefs_sb; /* sanity check the name before trying to print it */ dprintk("RPC: creating %s client for %s (xprt %p)\n", @@ -354,9 +350,12 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru atomic_set(&clnt->cl_count, 1); - err = rpc_setup_pipedir(clnt, program->pipe_dir_name); - if (err < 0) - goto out_no_path; + pipefs_sb = rpc_get_sb_net(rpc_net_ns(clnt)); + if (pipefs_sb) { + err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb); + if (err) + goto out_no_path; + } auth = rpcauth_create(args->authflavor, clnt); if (IS_ERR(auth)) { @@ -369,11 +368,16 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru /* save the nodename */ rpc_clnt_set_nodename(clnt, utsname()->nodename); rpc_register_client(clnt); + if (pipefs_sb) + rpc_put_sb_net(rpc_net_ns(clnt)); return clnt; out_no_auth: - rpc_clnt_remove_pipedir(clnt); + if (pipefs_sb) + __rpc_clnt_remove_pipedir(clnt); out_no_path: + if (pipefs_sb) + rpc_put_sb_net(rpc_net_ns(clnt)); kfree(clnt->cl_principal); out_no_principal: rpc_free_iostats(clnt->cl_metrics); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index a816b3a69059..e02823bdfe98 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1127,6 +1127,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", net, NET_NAME(net)); + mutex_lock(&sn->pipefs_sb_lock); sn->pipefs_sb = sb; err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_MOUNT, @@ -1134,6 +1135,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_depopulate; sb->s_fs_info = get_net(net); + mutex_unlock(&sn->pipefs_sb_lock); return 0; err_depopulate: @@ -1142,6 +1144,7 @@ err_depopulate: sb); sn->pipefs_sb = NULL; __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); + mutex_unlock(&sn->pipefs_sb_lock); return err; } -- cgit v1.2.3 From adb6fa7ffe9031857ec14b8aab75c9ab65556cbc Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 26 Jun 2013 10:15:14 +0400 Subject: SUNRPC: fix races on PipeFS UMOUNT notifications CPU#0 CPU#1 ----------------------------- ----------------------------- rpc_kill_sb sn->pipefs_sb = NULL rpc_release_client (UMOUNT_EVENT) rpc_free_auth rpc_pipefs_event rpc_get_client_for_event !atomic_inc_not_zero(cl_count) atomic_inc(cl_count) rpc_free_client rpc_clnt_remove_pipedir To fix this, this patch does the following: 1) Calls RPC_PIPEFS_UMOUNT notification with sn->pipefs_sb_lock being held. 2) Removes SUNRPC client from the list AFTER pipes destroying. 3) Doesn't hold RPC client on notification: if client in the list, then it can't be destroyed while sn->pipefs_sb_lock in hold by notification caller. Signed-off-by: Stanislav Kinsbursky Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 5 +---- net/sunrpc/rpc_pipe.c | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b827a4b91975..41f180c5a498 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -236,8 +236,6 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event) continue; if (rpc_clnt_skip_event(clnt, event)) continue; - if (atomic_inc_not_zero(&clnt->cl_count) == 0) - continue; spin_unlock(&sn->rpc_client_lock); return clnt; } @@ -254,7 +252,6 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) { error = __rpc_pipefs_event(clnt, event, sb); - rpc_release_client(clnt); if (error) break; } @@ -641,8 +638,8 @@ rpc_free_client(struct rpc_clnt *clnt) rcu_dereference(clnt->cl_xprt)->servername); if (clnt->cl_parent != clnt) rpc_release_client(clnt->cl_parent); - rpc_unregister_client(clnt); rpc_clnt_remove_pipedir(clnt); + rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); kfree(clnt->cl_principal); clnt->cl_metrics = NULL; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e02823bdfe98..4679df5a6d50 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1166,12 +1166,12 @@ static void rpc_kill_sb(struct super_block *sb) goto out; } sn->pipefs_sb = NULL; - mutex_unlock(&sn->pipefs_sb_lock); dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", net, NET_NAME(net)); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, sb); + mutex_unlock(&sn->pipefs_sb_lock); put_net(net); out: kill_litter_super(sb); -- cgit v1.2.3 From e73f4cc051199799aee4320f300f28ffb82f3eb1 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 24 Jun 2013 11:52:52 +0400 Subject: SUNRPC: split client creation routine into setup and registration This helper moves all "registration" code to the new rpc_client_register() helper. This helper will be used later in the series to synchronize against PipeFS MOUNT/UMOUNT events. Signed-off-by: Stanislav Kinsbursky Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 64 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 41f180c5a498..b4f17117b779 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -281,14 +281,47 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen); } +static int rpc_client_register(const struct rpc_create_args *args, + struct rpc_clnt *clnt) +{ + const struct rpc_program *program = args->program; + struct rpc_auth *auth; + struct net *net = rpc_net_ns(clnt); + struct super_block *pipefs_sb; + int err = 0; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb); + if (err) + goto out; + } + + auth = rpcauth_create(args->authflavor, clnt); + if (IS_ERR(auth)) { + dprintk("RPC: Couldn't create auth handle (flavor %u)\n", + args->authflavor); + err = PTR_ERR(auth); + goto err_auth; + } + + rpc_register_client(clnt); +out: + if (pipefs_sb) + rpc_put_sb_net(net); + return err; + +err_auth: + __rpc_clnt_remove_pipedir(clnt); + goto out; +} + static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) { const struct rpc_program *program = args->program; const struct rpc_version *version; struct rpc_clnt *clnt = NULL; - struct rpc_auth *auth; int err; - struct super_block *pipefs_sb; /* sanity check the name before trying to print it */ dprintk("RPC: creating %s client for %s (xprt %p)\n", @@ -347,34 +380,15 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru atomic_set(&clnt->cl_count, 1); - pipefs_sb = rpc_get_sb_net(rpc_net_ns(clnt)); - if (pipefs_sb) { - err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb); - if (err) - goto out_no_path; - } - - auth = rpcauth_create(args->authflavor, clnt); - if (IS_ERR(auth)) { - dprintk("RPC: Couldn't create auth handle (flavor %u)\n", - args->authflavor); - err = PTR_ERR(auth); - goto out_no_auth; - } - /* save the nodename */ rpc_clnt_set_nodename(clnt, utsname()->nodename); - rpc_register_client(clnt); - if (pipefs_sb) - rpc_put_sb_net(rpc_net_ns(clnt)); + + err = rpc_client_register(args, clnt); + if (err) + goto out_no_path; return clnt; -out_no_auth: - if (pipefs_sb) - __rpc_clnt_remove_pipedir(clnt); out_no_path: - if (pipefs_sb) - rpc_put_sb_net(rpc_net_ns(clnt)); kfree(clnt->cl_principal); out_no_principal: rpc_free_iostats(clnt->cl_metrics); -- cgit v1.2.3 From 4f6bb246f69443549fbbd0f2abaf863243cb35e9 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Mon, 24 Jun 2013 11:52:59 +0400 Subject: SUNRPC: PipeFS MOUNT notification optimization for dying clients Not need to create pipes for dying client. So just skip them. Note: we can safely dereference the client structure, because notification caller is holding sn->pipefs_sb_lock. Signed-off-by: Stanislav Kinsbursky Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b4f17117b779..f0339ae9bf37 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -177,6 +177,8 @@ static inline int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) || ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry)) return 1; + if ((event == RPC_PIPEFS_MOUNT) && atomic_read(&clnt->cl_count) == 0) + return 1; return 0; } -- cgit v1.2.3