summaryrefslogtreecommitdiffstats
path: root/include/net/nexthop.h
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-05-26 16:06:07 -0700
committerDavid S. Miller <davem@davemloft.net>2020-05-26 16:06:07 -0700
commit4d5c32ec7512ac17986288ccbb158a14b21fc937 (patch)
treeb70ecc0b84cb9c9db0c4e40ba2af7f12eacdf969 /include/net/nexthop.h
parent963bdfc75cfa20c53bde64928af57a6ca175fc01 (diff)
parent1fd1c768f3624a5e66766e7b4ddb9b607cd834a5 (diff)
Merge branch 'nexthop-group-fixes'
David Ahern says: ==================== nexthops: Fix 2 fundamental flaws with nexthop groups Nik's torture tests have exposed 2 fundamental mistakes with the initial nexthop code for groups. First, the nexthops entries and num_nh in the nh_grp struct should not be modified once the struct is set under rcu. Doing so has major affects on the datapath seeing valid nexthop entries. Second, the helpers in the header file were convenient for not repeating code, but they cause datapath walks to potentially see 2 different group structs after an rcu replace, disrupting a walk of the path objects. This second problem applies solely to IPv4 as I re-used too much of the existing code in walking legs of a multipath route. Patches 1 is refactoring change to simplify the overhead of reviewing and understanding the change in patch 2 which fixes the update of nexthop groups when a compnent leg is removed. Patches 3-5 address the second problem. Patch 3 inlines the multipath check such that the mpath lookup and subsequent calls all use the same nh_grp struct. Patches 4 and 5 fix datapath uses of fib_info_num_path with iterative calls to fib_info_nhc. fib_info_num_path can be used in control plane path in a 'for loop' with subsequent fib_info_nhc calls to get each leg since the nh_grp struct is only changed while holding the rtnl; the combination can not be used in the data plane with external nexthops as it involves repeated dereferences of nh_grp struct which can change between calls. Similarly, nexthop_is_multipath can be used for branching decisions in the datapath since the nexthop type can not be changed (a group can not be converted to standalone and vice versa). Patch set developed in coordination with Nikolay Aleksandrov. He did a lot of work creating a good reproducer, discussing options to fix it and testing iterations. I have adapted Nik's commands into additional tests in the nexthops selftest script which I will send against -next. v2 - fixed whitespace errors ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/nexthop.h')
-rw-r--r--include/net/nexthop.h100
1 files changed, 84 insertions, 16 deletions
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index c440ccc861fc..8c9f1a718859 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -70,6 +70,7 @@ struct nh_grp_entry {
};
struct nh_group {
+ struct nh_group *spare; /* spare group for removals */
u16 num_nh;
bool mpath;
bool has_v4;
@@ -136,21 +137,20 @@ static inline unsigned int nexthop_num_path(const struct nexthop *nh)
{
unsigned int rc = 1;
- if (nexthop_is_multipath(nh)) {
+ if (nh->is_group) {
struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
- rc = nh_grp->num_nh;
+ if (nh_grp->mpath)
+ rc = nh_grp->num_nh;
}
return rc;
}
static inline
-struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel)
+struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
{
- const struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
-
/* for_nexthops macros in fib_semantics.c grabs a pointer to
* the nexthop before checking nhsel
*/
@@ -185,12 +185,14 @@ static inline bool nexthop_is_blackhole(const struct nexthop *nh)
{
const struct nh_info *nhi;
- if (nexthop_is_multipath(nh)) {
- if (nexthop_num_path(nh) > 1)
- return false;
- nh = nexthop_mpath_select(nh, 0);
- if (!nh)
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ if (nh_grp->num_nh > 1)
return false;
+
+ nh = nh_grp->nh_entries[0].nh;
}
nhi = rcu_dereference_rtnl(nh->nh_info);
@@ -216,16 +218,79 @@ struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
- if (nexthop_is_multipath(nh)) {
- nh = nexthop_mpath_select(nh, nhsel);
- if (!nh)
- return NULL;
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ if (nh_grp->mpath) {
+ nh = nexthop_mpath_select(nh_grp, nhsel);
+ if (!nh)
+ return NULL;
+ }
}
nhi = rcu_dereference_rtnl(nh->nh_info);
return &nhi->fib_nhc;
}
+/* called from fib_table_lookup with rcu_lock */
+static inline
+struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
+ int fib_flags,
+ const struct flowi4 *flp,
+ int *nhsel)
+{
+ struct nh_info *nhi;
+
+ if (nh->is_group) {
+ struct nh_group *nhg = rcu_dereference(nh->nh_grp);
+ int i;
+
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nexthop *nhe = nhg->nh_entries[i].nh;
+
+ nhi = rcu_dereference(nhe->nh_info);
+ if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
+ *nhsel = i;
+ return &nhi->fib_nhc;
+ }
+ }
+ } else {
+ nhi = rcu_dereference(nh->nh_info);
+ if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
+ *nhsel = 0;
+ return &nhi->fib_nhc;
+ }
+ }
+
+ return NULL;
+}
+
+static inline bool nexthop_uses_dev(const struct nexthop *nh,
+ const struct net_device *dev)
+{
+ struct nh_info *nhi;
+
+ if (nh->is_group) {
+ struct nh_group *nhg = rcu_dereference(nh->nh_grp);
+ int i;
+
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nexthop *nhe = nhg->nh_entries[i].nh;
+
+ nhi = rcu_dereference(nhe->nh_info);
+ if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
+ return true;
+ }
+ } else {
+ nhi = rcu_dereference(nh->nh_info);
+ if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
+ return true;
+ }
+
+ return false;
+}
+
static inline unsigned int fib_info_num_path(const struct fib_info *fi)
{
if (unlikely(fi->nh))
@@ -263,8 +328,11 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
{
struct nh_info *nhi;
- if (nexthop_is_multipath(nh)) {
- nh = nexthop_mpath_select(nh, 0);
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ nh = nexthop_mpath_select(nh_grp, 0);
if (!nh)
return NULL;
}