summaryrefslogtreecommitdiffstats
path: root/fs/afs
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2018-10-20 00:57:59 +0100
committerDavid Howells <dhowells@redhat.com>2018-10-24 00:41:09 +0100
commit3bf0fb6f33dd545693da5e65f5b1b9b9f0bfc35e (patch)
treedf215e6a6ad11b6ac8158461144667e168591d28 /fs/afs
parent18ac61853cc4e44eb30e125fc8344a3b25c7b6fe (diff)
afs: Probe multiple fileservers simultaneously
Send probes to all the unprobed fileservers in a fileserver list on all addresses simultaneously in an attempt to find out the fastest route whilst not getting stuck for 20s on any server or address that we don't get a reply from. This alleviates the problem whereby attempting to access a new server can take a long time because the rotation algorithm ends up rotating through all servers and addresses until it finds one that responds. Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'fs/afs')
-rw-r--r--fs/afs/Makefile4
-rw-r--r--fs/afs/addr_list.c40
-rw-r--r--fs/afs/cmservice.c129
-rw-r--r--fs/afs/fs_probe.c270
-rw-r--r--fs/afs/fsclient.c27
-rw-r--r--fs/afs/internal.h98
-rw-r--r--fs/afs/proc.c6
-rw-r--r--fs/afs/rotate.c174
-rw-r--r--fs/afs/rxrpc.c44
-rw-r--r--fs/afs/server.c109
-rw-r--r--fs/afs/server_list.c6
-rw-r--r--fs/afs/vl_list.c6
-rw-r--r--fs/afs/vl_probe.c273
-rw-r--r--fs/afs/vl_rotate.c159
-rw-r--r--fs/afs/vlclient.c35
-rw-r--r--fs/afs/volume.c16
16 files changed, 1047 insertions, 349 deletions
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index cc942b790cff..0738e2bf5193 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -17,6 +17,7 @@ kafs-y := \
file.o \
flock.o \
fsclient.o \
+ fs_probe.o \
inode.o \
main.o \
misc.o \
@@ -29,8 +30,9 @@ kafs-y := \
super.o \
netdevices.o \
vlclient.o \
- vl_rotate.o \
vl_list.o \
+ vl_probe.o \
+ vl_rotate.o \
volume.o \
write.o \
xattr.o \
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 1536d1d21c33..967db336d11a 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -303,6 +303,8 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
srx = &alist->addrs[i];
+ srx->srx_family = AF_RXRPC;
+ srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin);
srx->transport.sin.sin_family = AF_INET;
srx->transport.sin.sin_port = htons(port);
@@ -341,6 +343,8 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
srx = &alist->addrs[i];
+ srx->srx_family = AF_RXRPC;
+ srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin6);
srx->transport.sin6.sin6_family = AF_INET6;
srx->transport.sin6.sin6_port = htons(port);
@@ -353,23 +357,32 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
*/
bool afs_iterate_addresses(struct afs_addr_cursor *ac)
{
- _enter("%hu+%hd", ac->start, (short)ac->index);
+ unsigned long set, failed;
+ int index;
if (!ac->alist)
return false;
+ set = ac->alist->responded;
+ failed = ac->alist->failed;
+ _enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);
+
ac->nr_iterations++;
- if (ac->begun) {
- ac->index++;
- if (ac->index == ac->alist->nr_addrs)
- ac->index = 0;
+ set &= ~(failed | ac->tried);
- if (ac->index == ac->start)
- return false;
- }
+ if (!set)
+ return false;
+
+ index = READ_ONCE(ac->alist->preferred);
+ if (test_bit(index, &set))
+ goto selected;
+
+ index = __ffs(set);
- ac->begun = true;
+selected:
+ ac->index = index;
+ set_bit(index, &ac->tried);
ac->responded = false;
return true;
}
@@ -383,12 +396,13 @@ int afs_end_cursor(struct afs_addr_cursor *ac)
alist = ac->alist;
if (alist) {
- if (ac->responded && ac->index != ac->start)
- WRITE_ONCE(alist->index, ac->index);
+ if (ac->responded &&
+ ac->index != alist->preferred &&
+ test_bit(ac->alist->preferred, &ac->tried))
+ WRITE_ONCE(alist->preferred, ac->index);
afs_put_addrlist(alist);
+ ac->alist = NULL;
}
- ac->alist = NULL;
- ac->begun = false;
return ac->error;
}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 8cf8d10daa6c..8ee5972893ed 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -122,6 +122,8 @@ bool afs_cm_incoming_call(struct afs_call *call)
{
_enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);
+ call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall);
+
switch (call->operation_ID) {
case CBCallBack:
call->type = &afs_SRXCBCallBack;
@@ -152,6 +154,91 @@ bool afs_cm_incoming_call(struct afs_call *call)
}
/*
+ * Record a probe to the cache manager from a server.
+ */
+static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server)
+{
+ _enter("");
+
+ if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) &&
+ !test_bit(AFS_SERVER_FL_PROBING, &server->flags)) {
+ if (server->cm_epoch == call->epoch)
+ return 0;
+
+ if (!server->probe.said_rebooted) {
+ pr_notice("kAFS: FS rebooted %pU\n", &server->uuid);
+ server->probe.said_rebooted = true;
+ }
+ }
+
+ spin_lock(&server->probe_lock);
+
+ if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) {
+ server->cm_epoch = call->epoch;
+ server->probe.cm_epoch = call->epoch;
+ goto out;
+ }
+
+ if (server->probe.cm_probed &&
+ call->epoch != server->probe.cm_epoch &&
+ !server->probe.said_inconsistent) {
+ pr_notice("kAFS: FS endpoints inconsistent %pU\n",
+ &server->uuid);
+ server->probe.said_inconsistent = true;
+ }
+
+ if (!server->probe.cm_probed || call->epoch == server->cm_epoch)
+ server->probe.cm_epoch = server->cm_epoch;
+
+out:
+ server->probe.cm_probed = true;
+ spin_unlock(&server->probe_lock);
+ return 0;
+}
+
+/*
+ * Find the server record by peer address and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_peer(struct afs_call *call)
+{
+ struct sockaddr_rxrpc srx;
+ struct afs_server *server;
+
+ rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
+
+ server = afs_find_server(call->net, &srx);
+ if (!server) {
+ trace_afs_cm_no_server(call, &srx);
+ return 0;
+ }
+
+ call->cm_server = server;
+ return afs_record_cm_probe(call, server);
+}
+
+/*
+ * Find the server record by server UUID and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_uuid(struct afs_call *call,
+ struct afs_uuid *uuid)
+{
+ struct afs_server *server;
+
+ rcu_read_lock();
+ server = afs_find_server_by_uuid(call->net, call->request);
+ rcu_read_unlock();
+ if (!server) {
+ trace_afs_cm_no_server_u(call, call->request);
+ return 0;
+ }
+
+ call->cm_server = server;
+ return afs_record_cm_probe(call, server);
+}
+
+/*
* Clean up a cache manager call.
*/
static void afs_cm_destructor(struct afs_call *call)
@@ -187,7 +274,6 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
static int afs_deliver_cb_callback(struct afs_call *call)
{
struct afs_callback_break *cb;
- struct sockaddr_rxrpc srx;
__be32 *bp;
int ret, loop;
@@ -276,12 +362,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
- call->cm_server = afs_find_server(call->net, &srx);
- if (!call->cm_server)
- trace_afs_cm_no_server(call, &srx);
-
- return afs_queue_call_work(call);
+ return afs_find_cm_server_by_peer(call);
}
/*
@@ -305,13 +386,10 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
*/
static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
{
- struct sockaddr_rxrpc srx;
int ret;
_enter("");
- rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-
afs_extract_discard(call, 0);
ret = afs_extract_data(call, false);
if (ret < 0)
@@ -319,11 +397,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- call->cm_server = afs_find_server(call->net, &srx);
- if (!call->cm_server)
- trace_afs_cm_no_server(call, &srx);
-
- return afs_queue_call_work(call);
+ return afs_find_cm_server_by_peer(call);
}
/*
@@ -384,13 +458,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- rcu_read_lock();
- call->cm_server = afs_find_server_by_uuid(call->net, call->request);
- rcu_read_unlock();
- if (!call->cm_server)
- trace_afs_cm_no_server_u(call, call->request);
-
- return afs_queue_call_work(call);
+ return afs_find_cm_server_by_uuid(call, call->request);
}
/*
@@ -422,8 +490,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
-
- return afs_queue_call_work(call);
+ return afs_find_cm_server_by_peer(call);
}
/*
@@ -503,8 +570,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
-
- return afs_queue_call_work(call);
+ return afs_find_cm_server_by_uuid(call, call->request);
}
/*
@@ -586,8 +652,7 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
return afs_io_error(call, afs_io_error_cm_reply);
-
- return afs_queue_call_work(call);
+ return afs_find_cm_server_by_peer(call);
}
/*
@@ -596,7 +661,6 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
static int afs_deliver_yfs_cb_callback(struct afs_call *call)
{
struct afs_callback_break *cb;
- struct sockaddr_rxrpc srx;
struct yfs_xdr_YFSFid *bp;
size_t size;
int ret, loop;
@@ -664,10 +728,5 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
/* We'll need the file server record as that tells us which set of
* vnodes to operate upon.
*/
- rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
- call->cm_server = afs_find_server(call->net, &srx);
- if (!call->cm_server)
- trace_afs_cm_no_server(call, &srx);
-
- return afs_queue_call_work(call);
+ return afs_find_cm_server_by_peer(call);
}
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
new file mode 100644
index 000000000000..d049cb459742
--- /dev/null
+++ b/fs/afs/fs_probe.c
@@ -0,0 +1,270 @@
+/* AFS fileserver probing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_yfs.h"
+
+static bool afs_fs_probe_done(struct afs_server *server)
+{
+ if (!atomic_dec_and_test(&server->probe_outstanding))
+ return false;
+
+ wake_up_var(&server->probe_outstanding);
+ clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags);
+ wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING);
+ return true;
+}
+
+/*
+ * Process the result of probing a fileserver. This is called after successful
+ * or failed delivery of an FS.GetCapabilities operation.
+ */
+void afs_fileserver_probe_result(struct afs_call *call)
+{
+ struct afs_addr_list *alist = call->alist;
+ struct afs_server *server = call->reply[0];
+ unsigned int server_index = (long)call->reply[1];
+ unsigned int index = call->addr_ix;
+ unsigned int rtt = UINT_MAX;
+ bool have_result = false;
+ u64 _rtt;
+ int ret = call->error;
+
+ _enter("%pU,%u", &server->uuid, index);
+
+ spin_lock(&server->probe_lock);
+
+ switch (ret) {
+ case 0:
+ server->probe.error = 0;
+ goto responded;
+ case -ECONNABORTED:
+ if (!server->probe.responded) {
+ server->probe.abort_code = call->abort_code;
+ server->probe.error = ret;
+ }
+ goto responded;
+ case -ENOMEM:
+ case -ENONET:
+ server->probe.local_failure = true;
+ afs_io_error(call, afs_io_error_fs_probe_fail);
+ goto out;
+ case -ECONNRESET: /* Responded, but call expired. */
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ case -ETIMEDOUT:
+ case -ETIME:
+ default:
+ clear_bit(index, &alist->responded);
+ set_bit(index, &alist->failed);
+ if (!server->probe.responded &&
+ (server->probe.error == 0 ||
+ server->probe.error == -ETIMEDOUT ||
+ server->probe.error == -ETIME))
+ server->probe.error = ret;
+ afs_io_error(call, afs_io_error_fs_probe_fail);
+ goto out;
+ }
+
+responded:
+ set_bit(index, &alist->responded);
+ clear_bit(index, &alist->failed);
+
+ if (call->service_id == YFS_FS_SERVICE) {
+ server->probe.is_yfs = true;
+ set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+ alist->addrs[index].srx_service = call->service_id;
+ } else {
+ server->probe.not_yfs = true;
+ if (!server->probe.is_yfs) {
+ clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+ alist->addrs[index].srx_service = call->service_id;
+ }
+ }
+
+ /* Get the RTT and scale it to fit into a 32-bit value that represents
+ * over a minute of time so that we can access it with one instruction
+ * on a 32-bit system.
+ */
+ _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
+ _rtt /= 64;
+ rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
+ if (rtt < server->probe.rtt) {
+ server->probe.rtt = rtt;
+ alist->preferred = index;
+ have_result = true;
+ }
+
+ smp_wmb(); /* Set rtt before responded. */
+ server->probe.responded = true;
+ set_bit(AFS_SERVER_FL_PROBED, &server->flags);
+out:
+ spin_unlock(&server->probe_lock);
+
+ _debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
+ server_index, index, &alist->addrs[index].transport,
+ (unsigned int)rtt, ret);
+
+ have_result |= afs_fs_probe_done(server);
+ if (have_result) {
+ server->probe.have_result = true;
+ wake_up_var(&server->probe.have_result);
+ wake_up_all(&server->probe_wq);
+ }
+}
+
+/*
+ * Probe all of a fileserver's addresses to find out the best route and to
+ * query its capabilities.
+ */
+static int afs_do_probe_fileserver(struct afs_net *net,
+ struct afs_server *server,
+ struct key *key,
+ unsigned int server_index)
+{
+ struct afs_addr_cursor ac = {
+ .index = 0,
+ };
+ int ret;
+
+ _enter("%pU", &server->uuid);
+
+ read_lock(&server->fs_lock);
+ ac.alist = rcu_dereference_protected(server->addresses,
+ lockdep_is_held(&server->fs_lock));
+ read_unlock(&server->fs_lock);
+
+ atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+ memset(&server->probe, 0, sizeof(server->probe));
+ server->probe.rtt = UINT_MAX;
+
+ for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
+ ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
+ true);
+ if (ret != -EINPROGRESS) {
+ afs_fs_probe_done(server);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Send off probes to all unprobed servers.
+ */
+int afs_probe_fileservers(struct afs_net *net, struct key *key,
+ struct afs_server_list *list)
+{
+ struct afs_server *server;
+ int i, ret;
+
+ for (i = 0; i < list->nr_servers; i++) {
+ server = list->servers[i].server;
+ if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
+ continue;
+
+ if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
+ ret = afs_do_probe_fileserver(net, server, key, i);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Wait for the first as-yet untried fileserver to respond.
+ */
+int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
+{
+ struct wait_queue_entry *waits;
+ struct afs_server *server;
+ unsigned int rtt = UINT_MAX;
+ bool have_responders = false;
+ int pref = -1, i;
+
+ _enter("%u,%lx", slist->nr_servers, untried);
+
+ /* Only wait for servers that have a probe outstanding. */
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+ __clear_bit(i, &untried);
+ if (server->probe.responded)
+ have_responders = true;
+ }
+ }
+ if (have_responders || !untried)
+ return 0;
+
+ waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
+ if (!waits)
+ return -ENOMEM;
+
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ init_waitqueue_entry(&waits[i], current);
+ add_wait_queue(&server->probe_wq, &waits[i]);
+ }
+ }
+
+ for (;;) {
+ bool still_probing = false;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ if (server->probe.responded)
+ goto stop;
+ if (test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+ still_probing = true;
+ }
+ }
+
+ if (!still_probing || unlikely(signal_pending(current)))
+ goto stop;
+ schedule();
+ }
+
+stop:
+ set_current_state(TASK_RUNNING);
+
+ for (i = 0; i < slist->nr_servers; i++) {
+ if (test_bit(i, &untried)) {
+ server = slist->servers[i].server;
+ if (server->probe.responded &&
+ server->probe.rtt < rtt) {
+ pref = i;
+ rtt = server->probe.rtt;
+ }
+
+ remove_wait_queue(&server->probe_wq, &waits[i]);
+ }
+ }
+
+ kfree(waits);
+
+ if (pref == -1 && signal_pending(current))
+ return -ERESTARTSYS;
+
+ if (pref >= 0)
+ slist->preferred = pref;
+ return 0;
+}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 7c75a1813321..ca08c83168f5 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -2006,7 +2006,6 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net,
*/
static int afs_deliver_fs_get_capabilities(struct afs_call *call)
{
- struct afs_server *server = call->reply[0];
u32 count;
int ret;
@@ -2042,15 +2041,18 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
break;
}
- if (call->service_id == YFS_FS_SERVICE)
- set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
- else
- clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
-
_leave(" = 0 [done]");
return 0;
}
+static void afs_destroy_fs_get_capabilities(struct afs_call *call)
+{
+ struct afs_server *server = call->reply[0];
+
+ afs_put_server(call->net, server);
+ afs_flat_call_destructor(call);
+}
+
/*
* FS.GetCapabilities operation type
*/
@@ -2058,7 +2060,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
.name = "FS.GetCapabilities",
.op = afs_FS_GetCapabilities,
.deliver = afs_deliver_fs_get_capabilities,
- .destructor = afs_flat_call_destructor,
+ .done = afs_fileserver_probe_result,
+ .destructor = afs_destroy_fs_get_capabilities,
};
/*
@@ -2068,7 +2071,9 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
int afs_fs_get_capabilities(struct afs_net *net,
struct afs_server *server,
struct afs_addr_cursor *ac,
- struct key *key)
+ struct key *key,
+ unsigned int server_index,
+ bool async)
{
struct afs_call *call;
__be32 *bp;
@@ -2080,8 +2085,10 @@ int afs_fs_get_capabilities(struct afs_net *net,
return -ENOMEM;
call->key = key;
- call->reply[0] = server;
+ call->reply[0] = afs_get_server(server);
+ call->reply[1] = (void *)(long)server_index;
call->upgrade = true;
+ call->want_reply_time = true;
/* marshall the parameters */
bp = call->request;
@@ -2089,7 +2096,7 @@ int afs_fs_get_capabilities(struct afs_net *net,
/* Can't take a ref on server */
trace_afs_make_fs_call(call, NULL);
- return afs_make_call(ac, call, GFP_NOFS, false);
+ return afs_make_call(ac, call, GFP_NOFS, async);
}
/*
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index b60d15212975..5da3b09b7518 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -76,12 +76,13 @@ struct afs_addr_list {
u32 version; /* Version */
unsigned char max_addrs;
unsigned char nr_addrs;
- unsigned char index; /* Address currently in use */
+ unsigned char preferred; /* Preferred address */
unsigned char nr_ipv4; /* Number of IPv4 addresses */
enum dns_record_source source:8;
enum dns_lookup_status status:8;
unsigned long probed; /* Mask of servers that have been probed */
- unsigned long yfs; /* Mask of servers that are YFS */
+ unsigned long failed; /* Mask of addrs that failed locally/ICMP */
+ unsigned long responded; /* Mask of addrs that responded */
struct sockaddr_rxrpc addrs[];
#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
};
@@ -91,6 +92,7 @@ struct afs_addr_list {
*/
struct afs_call {
const struct afs_call_type *type; /* type of call */
+ struct afs_addr_list *alist; /* Address is alist[addr_ix] */
wait_queue_head_t waitq; /* processes awaiting completion */
struct work_struct async_work; /* async I/O processor */
struct work_struct work; /* actual work processor */
@@ -116,6 +118,7 @@ struct afs_call {
spinlock_t state_lock;
int error; /* error code */
u32 abort_code; /* Remote abort ID or 0 */
+ u32 epoch;
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned first_offset; /* offset into mapping[first] */
@@ -125,13 +128,14 @@ struct afs_call {
unsigned count2; /* count used in unmarshalling */
};
unsigned char unmarshall; /* unmarshalling phase */
+ unsigned char addr_ix; /* Address in ->alist */
bool incoming; /* T if incoming call */
bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
bool ret_reply0; /* T if should return reply[0] on success */
bool upgrade; /* T to request service upgrade */
- bool want_reply_time; /* T if want reply_time */
+ bool want_reply_time; /* T if want reply_time */
u16 service_id; /* Actual service ID (after upgrade) */
unsigned int debug_id; /* Trace ID */
u32 operation_ID; /* operation ID for an incoming call */
@@ -162,6 +166,9 @@ struct afs_call_type {
/* Work function */
void (*work)(struct work_struct *work);
+
+ /* Call done function (gets called immediately on success or failure) */
+ void (*done)(struct afs_call *call);
};
/*
@@ -376,10 +383,27 @@ struct afs_vlserver {
unsigned long flags;
#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */
#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */
+#define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */
rwlock_t lock; /* Lock on addresses */
atomic_t usage;
- u16 name_len; /* Length of name */
+
+ /* Probe state */
+ wait_queue_head_t probe_wq;
+ atomic_t probe_outstanding;
+ spinlock_t probe_lock;
+ struct {
+ unsigned int rtt; /* RTT as ktime/64 */
+ u32 abort_code;
+ short error;
+ bool have_result;
+ bool responded:1;
+ bool is_yfs:1;
+ bool not_yfs:1;
+ bool local_failure:1;
+ } probe;
+
u16 port;
+ u16 name_len; /* Length of name */
char name[]; /* Server name, case-flattened */
};
@@ -399,6 +423,7 @@ struct afs_vlserver_list {
atomic_t usage;
u8 nr_servers;
u8 index; /* Server currently in use */
+ u8 preferred; /* Preferred server */
enum dns_record_source source:8;
enum dns_lookup_status status:8;
rwlock_t lock;
@@ -461,8 +486,10 @@ struct afs_server {
#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */
#define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */
#define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */
+#define AFS_SERVER_FL_HAVE_EPOCH 11 /* ->epoch is valid */
atomic_t usage;
u32 addr_version; /* Address list version */
+ u32 cm_epoch; /* Server RxRPC epoch */
/* file service access */
rwlock_t fs_lock; /* access lock */
@@ -471,6 +498,26 @@ struct afs_server {
struct hlist_head cb_volumes; /* List of volume interests on this server */
unsigned cb_s_break; /* Break-everything counter. */
rwlock_t cb_break_lock; /* Volume finding lock */
+
+ /* Probe state */
+ wait_queue_head_t probe_wq;
+ atomic_t probe_outstanding;
+ spinlock_t probe_lock;
+ struct {
+ unsigned int rtt; /* RTT as ktime/64 */
+ u32 abort_code;
+ u32 cm_epoch;
+ short error;
+ bool have_result;
+ bool responded:1;
+ bool is_yfs:1;
+ bool not_yfs:1;
+ bool local_failure:1;
+ bool no_epoch:1;
+ bool cm_probed:1;
+ bool said_rebooted:1;
+ bool said_inconsistent:1;
+ } probe;
};
/*
@@ -505,8 +552,8 @@ struct afs_server_entry {
struct afs_server_list {
refcount_t usage;
- unsigned short nr_servers;
- unsigned short index; /* Server currently in use */
+ unsigned char nr_servers;
+ unsigned char preferred; /* Preferred server */
unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
unsigned int seq; /* Set to ->servers_seq when installed */
rwlock_t lock;
@@ -653,13 +700,12 @@ struct afs_interface {
*/
struct afs_addr_cursor {
struct afs_addr_list *alist; /* Current address list (pins ref) */
- u32 abort_code;
- unsigned short start; /* Starting point in alist->addrs[] */
- unsigned short index; /* Wrapping offset from start to current addr */
- short error;
- bool begun; /* T if we've begun iteration */
+ unsigned long tried; /* Tried addresses */
+ signed char index; /* Current address */
bool responded; /* T if the current address responded */
unsigned short nr_iterations; /* Number of address iterations */
+ short error;
+ u32 abort_code;
};
/*
@@ -669,9 +715,10 @@ struct afs_vl_cursor {
struct afs_addr_cursor ac;
struct afs_cell *cell; /* The cell we're querying */
struct afs_vlserver_list *server_list; /* Current server list (pins ref) */
+ struct afs_vlserver *server; /* Server on which this resides */
struct key *key; /* Key for the server */
- unsigned char start; /* Initial index in server list */
- unsigned char index; /* Number of servers tried beyond start */
+ unsigned long untried; /* Bitmask of untried servers */
+ short index; /* Current server */
short error;
unsigned short flags;
#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */
@@ -689,10 +736,10 @@ struct afs_fs_cursor {
struct afs_server_list *server_list; /* Current server list (pins ref) */
struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */
struct key *key; /* Key for the server */
+ unsigned long untried; /* Bitmask of untried servers */
unsigned int cb_break; /* cb_break + cb_s_break before the call */
unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */
- unsigned char start; /* Initial index in server list */
- unsigned char index; /* Number of servers tried beyond start */
+ short index; /* Current server */
short error;
unsigned short flags;
#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */
@@ -888,7 +935,7 @@ extern int afs_fs_release_lock(struct afs_fs_cursor *);
extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
struct afs_addr_cursor *, struct key *);
extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
- struct afs_addr_cursor *, struct key *);
+ struct afs_addr_cursor *, struct key *, unsigned int, bool);
extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
struct afs_fid *, struct afs_file_status *,
struct afs_callback *, unsigned int,
@@ -898,6 +945,13 @@ extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
struct afs_callback *, struct afs_volsync *);
/*
+ * fs_probe.c
+ */
+extern void afs_fileserver_probe_result(struct afs_call *);
+extern int afs_probe_fileservers(struct afs_net *, struct key *, struct afs_server_list *);
+extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
+
+/*
* inode.c
*/
extern int afs_fetch_status(struct afs_vnode *, struct key *, bool);
@@ -1013,7 +1067,6 @@ extern int __net_init afs_open_socket(struct afs_net *);
extern void __net_exit afs_close_socket(struct afs_net *);
extern void afs_charge_preallocation(struct work_struct *);
extern void afs_put_call(struct afs_call *);
-extern int afs_queue_call_work(struct afs_call *);
extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
const struct afs_call_type *,
@@ -1130,7 +1183,6 @@ extern void afs_put_server(struct afs_net *, struct afs_server *);
extern void afs_manage_servers(struct work_struct *);
extern void afs_servers_timer(struct timer_list *);
extern void __net_exit afs_purge_servers(struct afs_net *);
-extern bool afs_probe_fileserver(struct afs_fs_cursor *);
extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *);
/*
@@ -1160,10 +1212,18 @@ extern void afs_fs_exit(void);
extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
const char *, int);
extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
-extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
+extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *,
+ struct afs_vlserver *, unsigned int, bool);
extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
/*
+ * vl_probe.c
+ */
+extern void afs_vlserver_probe_result(struct afs_call *);
+extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *);
+extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long);
+
+/*
* vl_rotate.c
*/
extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *,
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index d887f822f4eb..be2ee3bbd0a9 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -312,7 +312,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
if (alist) {
for (i = 0; i < alist->nr_addrs; i++)
seq_printf(m, " %c %pISpc\n",
- alist->index == i ? '>' : '-',
+ alist->preferred == i ? '>' : '-',
&alist->addrs[i].transport);
}
return 0;
@@ -391,11 +391,11 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
&server->uuid,
atomic_read(&server->usage),
&alist->addrs[0].transport,
- alist->index == 0 ? "*" : "");
+ alist->preferred == 0 ? "*" : "");
for (i = 1; i < alist->nr_addrs; i++)
seq_printf(m, " %pISpc%s\n",
&alist->addrs[i].transport,
- alist->index == i ? "*" : "");
+ alist->preferred == i ? "*" : "");
return 0;
}
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 7c4487781637..00504254c1c2 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -19,14 +19,