summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@redhat.com>2015-03-04 15:02:44 -0800
committerDavid S. Miller <davem@davemloft.net>2015-03-04 23:35:18 -0500
commita7e53531234dc206bb75abb5305a72665dd4d75d (patch)
tree015a31ddf3a8d54491a04549adf3e3471dbb8943 /net/ipv4
parent41b489fd6ce03e96e90fcffdb69b168065ae2e40 (diff)
fib_trie: Make fib_table rcu safe
The fib_table was wrapped in several places with an rcu_read_lock/rcu_read_unlock however after looking over the code I found several spots where the tables were being accessed as just standard pointers without any protections. This change fixes that so that all of the proper protections are in place when accessing the table to take RCU replacement or removal of the table into account. Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_frontend.c52
-rw-r--r--net/ipv4/fib_trie.c21
2 files changed, 52 insertions, 21 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 57be71dd6a9e..220c4b4af4cf 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -89,17 +89,14 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
switch (id) {
case RT_TABLE_LOCAL:
- net->ipv4.fib_local = tb;
+ rcu_assign_pointer(net->ipv4.fib_local, tb);
break;
-
case RT_TABLE_MAIN:
- net->ipv4.fib_main = tb;
+ rcu_assign_pointer(net->ipv4.fib_main, tb);
break;
-
case RT_TABLE_DEFAULT:
- net->ipv4.fib_default = tb;
+ rcu_assign_pointer(net->ipv4.fib_default, tb);
break;
-
default:
break;
}
@@ -132,13 +129,14 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
static void fib_flush(struct net *net)
{
int flushed = 0;
- struct fib_table *tb;
- struct hlist_head *head;
unsigned int h;
for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
- head = &net->ipv4.fib_table_hash[h];
- hlist_for_each_entry(tb, head, tb_hlist)
+ struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+ struct hlist_node *tmp;
+ struct fib_table *tb;
+
+ hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
flushed += fib_table_flush(tb);
}
@@ -665,10 +663,12 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
s_h = cb->args[0];
s_e = cb->args[1];
+ rcu_read_lock();
+
for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv4.fib_table_hash[h];
- hlist_for_each_entry(tb, head, tb_hlist) {
+ hlist_for_each_entry_rcu(tb, head, tb_hlist) {
if (e < s_e)
goto next;
if (dumped)
@@ -682,6 +682,8 @@ next:
}
}
out:
+ rcu_read_unlock();
+
cb->args[1] = e;
cb->args[0] = h;
@@ -1117,14 +1119,34 @@ static void ip_fib_net_exit(struct net *net)
rtnl_lock();
for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
- struct fib_table *tb;
- struct hlist_head *head;
+ struct hlist_head *head = &net->ipv4.fib_table_hash[i];
struct hlist_node *tmp;
+ struct fib_table *tb;
+
+ /* this is done in two passes as flushing the table could
+ * cause it to be reallocated in order to accommodate new
+ * tnodes at the root as the table shrinks.
+ */
+ hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
+ fib_table_flush(tb);
- head = &net->ipv4.fib_table_hash[i];
hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+ switch (tb->tb_id) {
+ case RT_TABLE_LOCAL:
+ RCU_INIT_POINTER(net->ipv4.fib_local, NULL);
+ break;
+ case RT_TABLE_MAIN:
+ RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
+ break;
+ case RT_TABLE_DEFAULT:
+ RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
+ break;
+ default:
+ break;
+ }
+#endif
hlist_del(&tb->tb_hlist);
- fib_table_flush(tb);
fib_free_table(tb);
}
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2233ebf2aae8..3642b17c8726 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -193,6 +193,13 @@ static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn,
return rcu_dereference_rtnl(tn->tnode[i]);
}
+static inline struct fib_table *trie_get_table(struct trie *t)
+{
+ unsigned long *tb_data = (unsigned long *)t;
+
+ return container_of(tb_data, struct fib_table, tb_data[0]);
+}
+
/* To understand this stuff, an understanding of keys and all their bits is
* necessary. Every node in the trie has a key associated with it, but not
* all of the bits in that key are significant.
@@ -1593,8 +1600,9 @@ flush_complete:
return found;
}
-void fib_free_table(struct fib_table *tb)
+static void __trie_free_rcu(struct rcu_head *head)
{
+ struct fib_table *tb = container_of(head, struct fib_table, rcu);
#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie *t = (struct trie *)tb->tb_data;
@@ -1603,6 +1611,11 @@ void fib_free_table(struct fib_table *tb)
kfree(tb);
}
+void fib_free_table(struct fib_table *tb)
+{
+ call_rcu(&tb->rcu, __trie_free_rcu);
+}
+
static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -1639,6 +1652,7 @@ static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
return skb->len;
}
+/* rcu_read_lock needs to be hold by caller from readside */
int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -1650,15 +1664,12 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
int count = cb->args[2];
t_key key = cb->args[3];
- rcu_read_lock();
-
tp = rcu_dereference_rtnl(t->trie);
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
cb->args[3] = key;
cb->args[2] = count;
- rcu_read_unlock();
return -1;
}
@@ -1673,8 +1684,6 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
break;
}
- rcu_read_unlock();
-
cb->args[3] = key;
cb->args[2] = count;