summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2020-09-07 13:16:48 -0700
committerJakub Kicinski <kuba@kernel.org>2020-09-07 13:18:15 -0700
commit6af52ae2ed14a6bc756d5606b29097dfd76740b8 (patch)
treea23abc847500aee38959cbcf556b0392da42adc0
parentbb1416adb8a084f2979ac9536dca3e4c44d45821 (diff)
parente12cec65b5546f19217e26aafb8add6e2fadca18 (diff)
Merge branch 'net-bridge-mcast-initial-IGMPv3-MLDv2-support-part-1'
Nikolay Aleksandrov says: ==================== net: bridge: mcast: initial IGMPv3/MLDv2 support (part 1) This patch-set implements the control plane for initial IGMPv3/MLDv2 support which takes care of include/exclude sets and state transitions based on the different report types. Patch 01 arranges the structure better by moving the frequently used fields together, patch 02 factors out the port group deletion code which is used in a few places. Patches 03 and 04 add support for source lists and group modes per port group which are dumped. Patch 05 adds support for group-and-source specific queries required for IGMPv3/MLDv2. Then patch 06 adds support for group and group-and-source query retransmissions via a new rexmit timer. Patches 07 and 08 make use of the already present mdb fill functions when sending notifications so we can have the full mdb entries' state filled in (with sources, mode etc). Patch 09 takes care of port group expiration, it switches the group mode to include and deletes it if there are no sources with active timers. Patches 10-13 are the core changes which add support for IGMPv3/MLDv2 reports and handle the source list set operations as per RFCs 3376 and 3810, all IGMPv3/MLDv2 report types with their transitions should be supported after these patches. I've used RFCs 3376, 3810 and FRR as a reference implementation. The source lists are capped at 32 entries, we can remove that limitation at a later point which would require a better data structure to hold them. IGMPv3 processing is hidden behind the bridge's multicast_igmp_version option which must be set to 3 in order to enable it. MLDv2 processing is hidden behind the bridge's multicast_mld_version which must be set to 2 in order to enable it. Patch 14 improves other querier processing a bit (more about this below). And finally patch 15 transforms the src gc so it can be used with all mcast objects since now we have multiple timers that can be running and we need to make sure they have all finished before freeing the objects. This is part 1, it only adds control plane support and doesn't change the fast path. A following patch-set will take care of that. Here're the sets that will come next (in order): - Fast path patch-set which adds support for (S, G) mdb entries needed for IGMPv3/MLDv2 forwarding, entry add source (kernel, user-space etc) needed for IGMPv3/MLDv2 entry management, entry block mode needed for IGMPv3/MLDv2 exclude mode. This set will also add iproute2 support for manipulating and showing all the new state. - Selftests patches which will verify all state transitions and forwarding - Explicit host tracking patch-set, needed for proper fast leave and with it fast leave will be enabled for IGMPv3/MLDv2 Not implemented yet: - Host IGMPv3/MLDv2 filter support (currently we handle only join/leave as before) - Proper other querier source timer and value updates - IGMPv3/v2 MLDv2/v1 compat (I have a few rough patches for this one) v4: move old patch 05 to 02 (group del patch), before src lists patch 02: set pg's fast leave flag when deleting due to fast leave patch 03: now can use the new port del function add igmpv2/mldv1 bool which are set when the entry is added in those modes (later will be passed as update_timer) patch 10: rename update_timer to igmpv2_mldv1 and use the passed value from br_multicast_add_group's callers v3: add IPv6/MLDv2 support, most patches are changed v2: patches 03-04: make src lists RCU friendly so they can be traversed when dumping, reduce limit to a more conservative 32 src group entries for a start patches 11-13: remove helper and directly do bitops patch 15: force mcast gc on bridge port del to make sure port group timers have finished before freeing the port ==================== Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/uapi/linux/if_bridge.h21
-rw-r--r--net/bridge/br_mdb.c256
-rw-r--r--net/bridge/br_multicast.c1290
-rw-r--r--net/bridge/br_private.h70
4 files changed, 1415 insertions, 222 deletions
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index c1227aecd38f..75a2ac479247 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -455,10 +455,31 @@ enum {
enum {
MDBA_MDB_EATTR_UNSPEC,
MDBA_MDB_EATTR_TIMER,
+ MDBA_MDB_EATTR_SRC_LIST,
+ MDBA_MDB_EATTR_GROUP_MODE,
__MDBA_MDB_EATTR_MAX
};
#define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)
+/* per mdb entry source */
+enum {
+ MDBA_MDB_SRCLIST_UNSPEC,
+ MDBA_MDB_SRCLIST_ENTRY,
+ __MDBA_MDB_SRCLIST_MAX
+};
+#define MDBA_MDB_SRCLIST_MAX (__MDBA_MDB_SRCLIST_MAX - 1)
+
+/* per mdb entry per source attributes
+ * these are embedded in MDBA_MDB_SRCLIST_ENTRY
+ */
+enum {
+ MDBA_MDB_SRCATTR_UNSPEC,
+ MDBA_MDB_SRCATTR_ADDRESS,
+ MDBA_MDB_SRCATTR_TIMER,
+ __MDBA_MDB_SRCATTR_MAX
+};
+#define MDBA_MDB_SRCATTR_MAX (__MDBA_MDB_SRCATTR_MAX - 1)
+
/* multicast router types */
enum {
MDB_RTR_TYPE_DISABLED,
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index da5ed4cf9233..67e0976aeed2 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -77,10 +77,67 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
#endif
}
+static int __mdb_fill_srcs(struct sk_buff *skb,
+ struct net_bridge_port_group *p)
+{
+ struct net_bridge_group_src *ent;
+ struct nlattr *nest, *nest_ent;
+
+ if (hlist_empty(&p->src_list))
+ return 0;
+
+ nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST);
+ if (!nest)
+ return -EMSGSIZE;
+
+ hlist_for_each_entry_rcu(ent, &p->src_list, node,
+ lockdep_is_held(&p->port->br->multicast_lock)) {
+ nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY);
+ if (!nest_ent)
+ goto out_cancel_err;
+ switch (ent->addr.proto) {
+ case htons(ETH_P_IP):
+ if (nla_put_in_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
+ ent->addr.u.ip4)) {
+ nla_nest_cancel(skb, nest_ent);
+ goto out_cancel_err;
+ }
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ if (nla_put_in6_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
+ &ent->addr.u.ip6)) {
+ nla_nest_cancel(skb, nest_ent);
+ goto out_cancel_err;
+ }
+ break;
+#endif
+ default:
+ nla_nest_cancel(skb, nest_ent);
+ continue;
+ }
+ if (nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER,
+ br_timer_value(&ent->timer))) {
+ nla_nest_cancel(skb, nest_ent);
+ goto out_cancel_err;
+ }
+ nla_nest_end(skb, nest_ent);
+ }
+
+ nla_nest_end(skb, nest);
+
+ return 0;
+
+out_cancel_err:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
static int __mdb_fill_info(struct sk_buff *skb,
struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *p)
{
+ bool dump_srcs_mode = false;
struct timer_list *mtimer;
struct nlattr *nest_ent;
struct br_mdb_entry e;
@@ -119,6 +176,23 @@ static int __mdb_fill_info(struct sk_buff *skb,
nla_nest_cancel(skb, nest_ent);
return -EMSGSIZE;
}
+ switch (mp->addr.proto) {
+ case htons(ETH_P_IP):
+ dump_srcs_mode = !!(p && mp->br->multicast_igmp_version == 3);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ dump_srcs_mode = !!(p && mp->br->multicast_mld_version == 2);
+ break;
+#endif
+ }
+ if (dump_srcs_mode &&
+ (__mdb_fill_srcs(skb, p) ||
+ nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, p->filter_mode))) {
+ nla_nest_cancel(skb, nest_ent);
+ return -EMSGSIZE;
+ }
+
nla_nest_end(skb, nest_ent);
return 0;
@@ -127,7 +201,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev)
{
- int idx = 0, s_idx = cb->args[1], err = 0;
+ int idx = 0, s_idx = cb->args[1], err = 0, pidx = 0, s_pidx = cb->args[2];
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_mdb_entry *mp;
struct nlattr *nest, *nest2;
@@ -152,7 +226,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
break;
}
- if (mp->host_joined) {
+ if (!s_pidx && mp->host_joined) {
err = __mdb_fill_info(skb, mp, NULL);
if (err) {
nla_nest_cancel(skb, nest2);
@@ -164,13 +238,19 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
pp = &p->next) {
if (!p->port)
continue;
+ if (pidx < s_pidx)
+ goto skip_pg;
err = __mdb_fill_info(skb, mp, p);
if (err) {
nla_nest_cancel(skb, nest2);
goto out;
}
+skip_pg:
+ pidx++;
}
+ pidx = 0;
+ s_pidx = 0;
nla_nest_end(skb, nest2);
skip:
idx++;
@@ -178,6 +258,7 @@ skip:
out:
cb->args[1] = idx;
+ cb->args[2] = pidx;
nla_nest_end(skb, nest);
return err;
}
@@ -263,14 +344,15 @@ out:
static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
struct net_device *dev,
- struct br_mdb_entry *entry, u32 pid,
- u32 seq, int type, unsigned int flags)
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ int type)
{
struct nlmsghdr *nlh;
struct br_port_msg *bpm;
struct nlattr *nest, *nest2;
- nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
+ nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0);
if (!nlh)
return -EMSGSIZE;
@@ -285,7 +367,7 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
if (nest2 == NULL)
goto end;
- if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(*entry), entry))
+ if (__mdb_fill_info(skb, mp, pg))
goto end;
nla_nest_end(skb, nest2);
@@ -300,10 +382,49 @@ cancel:
return -EMSGSIZE;
}
-static inline size_t rtnl_mdb_nlmsg_size(void)
+static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
{
- return NLMSG_ALIGN(sizeof(struct br_port_msg))
- + nla_total_size(sizeof(struct br_mdb_entry));
+ size_t nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) +
+ nla_total_size(sizeof(struct br_mdb_entry)) +
+ nla_total_size(sizeof(u32));
+ struct net_bridge_group_src *ent;
+ size_t addr_size = 0;
+
+ if (!pg)
+ goto out;
+
+ switch (pg->addr.proto) {
+ case htons(ETH_P_IP):
+ if (pg->port->br->multicast_igmp_version == 2)
+ goto out;
+ addr_size = sizeof(__be32);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ if (pg->port->br->multicast_mld_version == 1)
+ goto out;
+ addr_size = sizeof(struct in6_addr);
+ break;
+#endif
+ }
+
+ /* MDBA_MDB_EATTR_GROUP_MODE */
+ nlmsg_size += nla_total_size(sizeof(u8));
+
+ /* MDBA_MDB_EATTR_SRC_LIST nested attr */
+ if (!hlist_empty(&pg->src_list))
+ nlmsg_size += nla_total_size(0);
+
+ hlist_for_each_entry(ent, &pg->src_list, node) {
+ /* MDBA_MDB_SRCLIST_ENTRY nested attr +
+ * MDBA_MDB_SRCATTR_ADDRESS + MDBA_MDB_SRCATTR_TIMER
+ */
+ nlmsg_size += nla_total_size(0) +
+ nla_total_size(addr_size) +
+ nla_total_size(sizeof(u32));
+ }
+out:
+ return nlmsg_size;
}
struct br_mdb_complete_info {
@@ -341,21 +462,22 @@ err:
static void br_mdb_switchdev_host_port(struct net_device *dev,
struct net_device *lower_dev,
- struct br_mdb_entry *entry, int type)
+ struct net_bridge_mdb_entry *mp,
+ int type)
{
struct switchdev_obj_port_mdb mdb = {
.obj = {
.id = SWITCHDEV_OBJ_ID_HOST_MDB,
.flags = SWITCHDEV_F_DEFER,
},
- .vid = entry->vid,
+ .vid = mp->addr.vid,
};
- if (entry->addr.proto == htons(ETH_P_IP))
- ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
+ if (mp->addr.proto == htons(ETH_P_IP))
+ ip_eth_mc_map(mp->addr.u.ip4, mdb.addr);
#if IS_ENABLED(CONFIG_IPV6)
else
- ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
+ ipv6_eth_mc_map(&mp->addr.u.ip6, mdb.addr);
#endif
mdb.obj.orig_dev = dev;
@@ -370,17 +492,19 @@ static void br_mdb_switchdev_host_port(struct net_device *dev,
}
static void br_mdb_switchdev_host(struct net_device *dev,
- struct br_mdb_entry *entry, int type)
+ struct net_bridge_mdb_entry *mp, int type)
{
struct net_device *lower_dev;
struct list_head *iter;
netdev_for_each_lower_dev(dev, lower_dev, iter)
- br_mdb_switchdev_host_port(dev, lower_dev, entry, type);
+ br_mdb_switchdev_host_port(dev, lower_dev, mp, type);
}
-static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
- struct br_mdb_entry *entry, int type)
+void br_mdb_notify(struct net_device *dev,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ int type)
{
struct br_mdb_complete_info *complete_info;
struct switchdev_obj_port_mdb mdb = {
@@ -388,44 +512,45 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
.id = SWITCHDEV_OBJ_ID_PORT_MDB,
.flags = SWITCHDEV_F_DEFER,
},
- .vid = entry->vid,
+ .vid = mp->addr.vid,
};
- struct net_device *port_dev;
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
- port_dev = __dev_get_by_index(net, entry->ifindex);
- if (entry->addr.proto == htons(ETH_P_IP))
- ip_eth_mc_map(entry->addr.u.ip4, mdb.addr);
+ if (pg) {
+ if (mp->addr.proto == htons(ETH_P_IP))
+ ip_eth_mc_map(mp->addr.u.ip4, mdb.addr);
#if IS_ENABLED(CONFIG_IPV6)
- else
- ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr);
+ else
+ ipv6_eth_mc_map(&mp->addr.u.ip6, mdb.addr);
#endif
-
- mdb.obj.orig_dev = port_dev;
- if (p && port_dev && type == RTM_NEWMDB) {
- complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
- if (complete_info) {
- complete_info->port = p;
- __mdb_entry_to_br_ip(entry, &complete_info->ip);
+ mdb.obj.orig_dev = pg->port->dev;
+ switch (type) {
+ case RTM_NEWMDB:
+ complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC);
+ if (!complete_info)
+ break;
+ complete_info->port = pg->port;
+ complete_info->ip = mp->addr;
mdb.obj.complete_priv = complete_info;
mdb.obj.complete = br_mdb_complete;
- if (switchdev_port_obj_add(port_dev, &mdb.obj, NULL))
+ if (switchdev_port_obj_add(pg->port->dev, &mdb.obj, NULL))
kfree(complete_info);
+ break;
+ case RTM_DELMDB:
+ switchdev_port_obj_del(pg->port->dev, &mdb.obj);
+ break;
}
- } else if (p && port_dev && type == RTM_DELMDB) {
- switchdev_port_obj_del(port_dev, &mdb.obj);
+ } else {
+ br_mdb_switchdev_host(dev, mp, type);
}
- if (!p)
- br_mdb_switchdev_host(dev, entry, type);
-
- skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
+ skb = nlmsg_new(rtnl_mdb_nlmsg_size(pg), GFP_ATOMIC);
if (!skb)
goto errout;
- err = nlmsg_populate_mdb_fill(skb, dev, entry, 0, 0, type, NTF_SELF);
+ err = nlmsg_populate_mdb_fill(skb, dev, mp, pg, type);
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -437,26 +562,6 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_MDB, err);
}
-void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- struct br_ip *group, int type, u8 flags)
-{
- struct br_mdb_entry entry;
-
- memset(&entry, 0, sizeof(entry));
- if (port)
- entry.ifindex = port->dev->ifindex;
- else
- entry.ifindex = dev->ifindex;
- entry.addr.proto = group->proto;
- entry.addr.u.ip4 = group->u.ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- entry.addr.u.ip6 = group->u.ip6;
-#endif
- entry.vid = group->vid;
- __mdb_entry_fill_flags(&entry, flags);
- __br_mdb_notify(dev, port, &entry, type);
-}
-
static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
struct net_device *dev,
int ifindex, u32 pid,
@@ -600,7 +705,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
}
static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
- struct br_ip *group, unsigned char state)
+ struct br_ip *group, struct br_mdb_entry *entry)
{
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
@@ -619,12 +724,13 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
/* host join */
if (!port) {
/* don't allow any flags for host-joined groups */
- if (state)
+ if (entry->state)
return -EINVAL;
if (mp->host_joined)
return -EEXIST;
br_multicast_host_join(mp, false);
+ br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB);
return 0;
}
@@ -638,12 +744,14 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
break;
}
- p = br_multicast_new_port_group(port, group, *pp, state, NULL);
+ p = br_multicast_new_port_group(port, group, *pp, entry->state, NULL,
+ MCAST_EXCLUDE);
if (unlikely(!p))
return -ENOMEM;
rcu_assign_pointer(*pp, p);
- if (state == MDB_TEMPORARY)
+ if (entry->state == MDB_TEMPORARY)
mod_timer(&p->timer, now + br->multicast_membership_interval);
+ br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
return 0;
}
@@ -672,7 +780,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
__mdb_entry_to_br_ip(entry, &ip);
spin_lock_bh(&br->multicast_lock);
- ret = br_mdb_add_group(br, p, &ip, entry->state);
+ ret = br_mdb_add_group(br, p, &ip, entry);
spin_unlock_bh(&br->multicast_lock);
return ret;
}
@@ -717,12 +825,9 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
err = __br_mdb_add(net, br, entry);
if (err)
break;
- __br_mdb_notify(dev, p, entry, RTM_NEWMDB);
}
} else {
err = __br_mdb_add(net, br, entry);
- if (!err)
- __br_mdb_notify(dev, p, entry, RTM_NEWMDB);
}
return err;
@@ -750,6 +855,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
if (entry->ifindex == mp->br->dev->ifindex && mp->host_joined) {
br_multicast_host_leave(mp, false);
err = 0;
+ br_mdb_notify(br->dev, mp, NULL, RTM_DELMDB);
if (!mp->ports && netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
goto unlock;
@@ -764,16 +870,8 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
if (p->port->state == BR_STATE_DISABLED)
goto unlock;
- __mdb_entry_fill_flags(entry, p->flags);
- rcu_assign_pointer(*pp, p->next);
- hlist_del_init(&p->mglist);
- del_timer(&p->timer);
- kfree_rcu(p, rcu);
+ br_multicast_del_pg(mp, p, pp);
err = 0;
-
- if (!mp->ports && !mp->host_joined &&
- netif_running(br->dev))
- mod_timer(&mp->timer, jiffies);
break;
}
@@ -820,13 +918,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
list_for_each_entry(v, &vg->vlan_list, vlist) {
entry->vid = v->vid;
err = __br_mdb_del(br, entry);
- if (!err)
- __br_mdb_notify(dev, p, entry, RTM_DELMDB);
}
} else {
err = __br_mdb_del(br, entry);
- if (!err)
- __br_mdb_notify(dev, p, entry, RTM_DELMDB);
}
return err;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 4c4a93abde68..b83f11228948 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -50,6 +50,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
__be32 group,
__u16 vid,
const unsigned char *src);
+static void br_multicast_port_group_rexmit(struct timer_list *t);
static void __del_port_router(struct net_bridge_port *p);
#if IS_ENABLED(CONFIG_IPV6)
@@ -139,6 +140,29 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
return br_mdb_ip_get_rcu(br, &ip);
}
+static void br_multicast_destroy_mdb_entry(struct net_bridge_mcast_gc *gc)
+{
+ struct net_bridge_mdb_entry *mp;
+
+ mp = container_of(gc, struct net_bridge_mdb_entry, mcast_gc);
+ WARN_ON(!hlist_unhashed(&mp->mdb_node));
+ WARN_ON(mp->ports);
+
+ del_timer_sync(&mp->timer);
+ kfree_rcu(mp, rcu);
+}
+
+static void br_multicast_del_mdb_entry(struct net_bridge_mdb_entry *mp)
+{
+ struct net_bridge *br = mp->br;
+
+ rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
+ br_mdb_rht_params);
+ hlist_del_init_rcu(&mp->mdb_node);
+ hlist_add_head(&mp->mcast_gc.gc_node, &br->mcast_gc_list);
+ queue_work(system_long_wq, &br->mcast_gc_work);
+}
+
static void br_multicast_group_expired(struct timer_list *t)
{
struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer);
@@ -152,23 +176,71 @@ static void br_multicast_group_expired(struct timer_list *t)
if (mp->ports)
goto out;
+ br_multicast_del_mdb_entry(mp);
+out:
+ spin_unlock(&br->multicast_lock);
+}
- rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
- br_mdb_rht_params);
- hlist_del_rcu(&mp->mdb_node);
+static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc)
+{
+ struct net_bridge_group_src *src;
- kfree_rcu(mp, rcu);
+ src = container_of(gc, struct net_bridge_group_src, mcast_gc);
+ WARN_ON(!hlist_unhashed(&src->node));
-out:
- spin_unlock(&br->multicast_lock);
+ del_timer_sync(&src->timer);
+ kfree_rcu(src, rcu);
}
-static void br_multicast_del_pg(struct net_bridge *br,
- struct net_bridge_port_group *pg)
+static void br_multicast_del_group_src(struct net_bridge_group_src *src)
{
+ struct net_bridge *br = src->pg->port->br;
+
+ hlist_del_init_rcu(&src->node);
+ src->pg->src_ents--;
+ hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list);
+ queue_work(system_long_wq, &br->mcast_gc_work);
+}
+
+static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc)
+{
+ struct net_bridge_port_group *pg;
+
+ pg = container_of(gc, struct net_bridge_port_group, mcast_gc);
+ WARN_ON(!hlist_unhashed(&pg->mglist));
+ WARN_ON(!hlist_empty(&pg->src_list));
+
+ del_timer_sync(&pg->rexmit_timer);
+ del_timer_sync(&pg->timer);
+ kfree_rcu(pg, rcu);
+}
+
+void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ struct net_bridge_port_group __rcu **pp)
+{
+ struct net_bridge *br = pg->port->br;
+ struct net_bridge_group_src *ent;
+ struct hlist_node *tmp;
+
+ rcu_assign_pointer(*pp, pg->next);
+ hlist_del_init(&pg->mglist);
+ hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
+ br_multicast_del_group_src(ent);
+ br_mdb_notify(br->dev, mp, pg, RTM_DELMDB);
+ hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list);
+ queue_work(system_long_wq, &br->mcast_gc_work);
+
+ if (!mp->ports && !mp->host_joined && netif_running(br->dev))
+ mod_timer(&mp->timer, jiffies);
+}
+
+static void br_multicast_find_del_pg(struct net_bridge *br,
+ struct net_bridge_port_group *pg)
+{
+ struct net_bridge_port_group __rcu **pp;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
- struct net_bridge_port_group __rcu **pp;
mp = br_mdb_ip_get(br, &pg->addr);
if (WARN_ON(!mp))
@@ -180,17 +252,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
if (p != pg)
continue;
- rcu_assign_pointer(*pp, p->next);
- hlist_del_init(&p->mglist);
- del_timer(&p->timer);
- br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
- p->flags);
- kfree_rcu(p, rcu);
-
- if (!mp->ports && !mp->host_joined &&
- netif_running(br->dev))
- mod_timer(&mp->timer, jiffies);
-
+ br_multicast_del_pg(mp, pg, pp);
return;
}
@@ -200,35 +262,95 @@ static void br_multicast_del_pg(struct net_bridge *br,
static void br_multicast_port_group_expired(struct timer_list *t)
{
struct net_bridge_port_group *pg = from_timer(pg, t, timer);
+ struct net_bridge_group_src *src_ent;
struct net_bridge *br = pg->port->br;
+ struct hlist_node *tmp;
+ bool changed;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || timer_pending(&pg->timer) ||
hlist_unhashed(&pg->mglist) || pg->flags & MDB_PG_FLAGS_PERMANENT)
goto out;
- br_multicast_del_pg(br, pg);
+ changed = !!(pg->filter_mode == MCAST_EXCLUDE);
+ pg->filter_mode = MCAST_INCLUDE;
+ hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) {
+ if (!timer_pending(&src_ent->timer)) {
+ br_multicast_del_group_src(src_ent);
+ changed = true;
+ }
+ }
+
+ if (hlist_empty(&pg->src_list)) {
+ br_multicast_find_del_pg(br, pg);
+ } else if (changed) {
+ struct net_bridge_mdb_entry *mp = br_mdb_ip_get(br, &pg->addr);
+ if (WARN_ON(!mp))
+ goto out;
+ br_mdb_notify(br->dev, mp, pg, RTM_NEWMDB);
+ }
out:
spin_unlock(&br->multicast_lock);
}
-static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
- __be32 group,
- u8 *igmp_type)
+static void br_multicast_gc(struct hlist_head *head)
{
+ struct net_bridge_mcast_gc *gcent;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(gcent, tmp, head, gc_node) {
+ hlist_del_init(&gcent->gc_node);
+ gcent->destroy(gcent);
+ }
+}
+
+static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
+ struct net_bridge_port_group *pg,
+ __be32 ip_dst, __be32 group,
+ bool with_srcs, bool over_lmqt,
+ u8 sflag, u8 *igmp_type,
+ bool *need_rexmit)
+{
+ struct net_bridge_port *p = pg ? pg->port : NULL;
+ struct net_bridge_group_src *ent;
+ size_t pkt_size, igmp_hdr_size;
+ unsigned long now = jiffies;
struct igmpv3_query *ihv3;
- size_t igmp_hdr_size;
+ void *csum_start = NULL;
+ __sum16 *csum = NULL;
struct sk_buff *skb;
struct igmphdr *ih;
struct ethhdr *eth;
+ unsigned long lmqt;
struct iphdr *iph;
+ u16 lmqt_srcs = 0;
igmp_hdr_size = sizeof(*ih);
- if (br->multicast_igmp_version == 3)
+ if (br->multicast_igmp_version == 3) {
igmp_hdr_size = sizeof(*ihv3);
- skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) +
- igmp_hdr_size + 4);
+ if (pg && with_srcs) {
+ lmqt = now + (br->multicast_last_member_interval *
+ br->multicast_last_member_count);
+ hlist_for_each_entry(ent, &pg->src_list, node) {
+ if (over_lmqt == time_after(ent->timer.expires,
+ lmqt) &&
+ ent->src_query_rexmit_cnt > 0)
+ lmqt_srcs++;
+ }
+
+ if (!lmqt_srcs)
+ return NULL;
+ igmp_hdr_size += lmqt_srcs * sizeof(__be32);
+ }
+ }
+
+ pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size;
+ if ((p && pkt_size > p->dev->mtu) ||
+ pkt_size > br->dev->mtu)
+ return NULL;
+
+ skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
if (!skb)
goto out;
@@ -238,29 +360,24 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
eth = eth_hdr(skb);
ether_addr_copy(eth->h_source, br->dev->dev_addr);
- eth->h_dest[0] = 1;
- eth->h_dest[1] = 0;
- eth->h_dest[2] = 0x5e;
- eth->h_dest[3] = 0;
- eth->h_dest[4] = 0;
- eth->h_dest[5] = 1;
+ ip_eth_mc_map(ip_dst, eth->h_dest);
eth->h_proto = htons(ETH_P_IP);
skb_put(skb, sizeof(*eth));
skb_set_network_header(skb, skb->len);
iph = ip_hdr(skb);
+ iph->tot_len = htons(pkt_size - sizeof(*eth));
iph->version = 4;
iph->ihl = 6;
iph->tos = 0xc0;
- iph->tot_len = htons(sizeof(*iph) + igmp_hdr_size + 4);
iph->id = 0;
iph->frag_off = htons(IP_DF);
iph->ttl = 1;
iph->protocol = IPPROTO_IGMP;
iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0;
- iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
+ iph->daddr = ip_dst;
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
((u8 *)&iph[1])[2] = 0;
@@ -280,7 +397,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
(HZ / IGMP_TIMER_SCALE);
ih->group = group;
ih->csum = 0;
- ih->csum = ip_compute_csum((void *)ih, sizeof(*ih));
+ csum = &ih->csum;
+ csum_start = (void *)ih;
break;
case 3:
ihv3 = igmpv3_query_hdr(skb);
@@ -290,15 +408,40 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
(HZ / IGMP_TIMER_SCALE);
ihv3->group = group;
ihv3->qqic = br->multicast_query_interval / HZ;
- ihv3->nsrcs = 0;
+ ihv3->nsrcs = htons(lmqt_srcs);
ihv3->resv = 0;
- ihv3->suppress = 0;
+ ihv3->suppress = sflag;
ihv3->qrv = 2;
ihv3->csum = 0;
- ihv3->csum = ip_compute_csum((void *)ihv3, sizeof(*ihv3));
+ csum = &ihv3->csum;
+ csum_start = (void *)ihv3;
+ if (!pg || !with_srcs)
+ break;
+
+ lmqt_srcs = 0;
+ hlist_for_each_entry(ent, &pg->src_list, node) {
+ if (over_lmqt == time_after(ent->timer.expires,
+ lmqt) &&
+ ent->src_query_rexmit_cnt > 0) {
+ ihv3->srcs[lmqt_srcs++] = ent->addr.u.ip4;
+ ent->src_query_rexmit_cnt--;
+ if (need_rexmit && ent->src_query_rexmit_cnt)
+ *need_rexmit = true;
+ }
+ }
+ if (WARN_ON(lmqt_srcs != ntohs(ihv3->nsrcs))) {
+ kfree_skb(skb);
+ return NULL;
+ }
break;
}
+ if (WARN_ON(!csum || !csum_start)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ *csum = ip_compute_csum(csum_start, igmp_hdr_size);
skb_put(skb, igmp_hdr_size);
__skb_pull(skb, sizeof(*eth));
@@ -308,23 +451,54 @@ out:
#if IS_ENABLED(CONFIG_IPV6)
static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
- const struct in6_addr *grp,
- u8 *igmp_type)
-{
+ struct net_bridge_port_group *pg,
+ const struct in6_addr *ip6_dst,
+ const struct in6_addr *group,
+ bool with_srcs, bool over_llqt,
+ u8 sflag, u8 *igmp_type,
+ bool *need_rexmit)
+{
+ struct net_bridge_port *p = pg ? pg->port : NULL;
+ struct net_bridge_group_src *ent;
+ size_t pkt_size, mld_hdr_size;
+ unsigned long now = jiffies;
struct mld2_query *mld2q;
+ void *csum_start = NULL;
unsigned long interval;
+ __sum16 *csum = NULL;
struct ipv6hdr *ip6h;
struct mld_msg *mldq;
- size_t mld_hdr_size;
struct sk_buff *skb;
+ unsigned long llqt;
struct ethhdr *eth;
+ u16 llqt_srcs = 0;
u8 *hopopt;
mld_hdr_size = sizeof(*mldq);
- if (br->multicast_mld_version == 2)
+ if (br->multicast_mld_version == 2) {
mld_hdr_size = sizeof(*mld2q);
- skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) +
- 8 + mld_hdr_size);
+ if (pg && with_srcs) {
+ llqt = now + (br->multicast_last_member_interval *
+ br->multicast_last_member_count);
+ hlist_for_each_entry(ent, &pg->src_list, node) {
+ if (over_llqt == time_after(ent->timer.expires,
+ llqt) &&
+ ent->src_query_rexmit_cnt > 0)
+ llqt_srcs++;
+ }
+
+ if (!llqt_srcs)
+ return NULL;
+ mld_hdr_size += llqt_srcs * sizeof(struct in6_addr);
+ }
+ }
+
+ pkt_size = sizeof(*eth) + sizeof(*ip6h) + 8 + mld_hdr_size;
+ if ((p && pkt_size > p->dev->mtu) ||
+ pkt_size > br->dev->mtu)
+ return NULL;
+
+ skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
if (!skb)
goto out;
@@ -346,7 +520,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
ip6h->payload_len = htons(8 + mld_hdr_size);
ip6h->nexthdr = IPPROTO_HOPOPTS;
ip6h->hop_limit = 1;
- ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
+ ip6h->daddr = *ip6_dst;
if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
&ip6h->saddr)) {
kfree_skb(skb);
@@ -371,7 +545,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
/* ICMPv6 */
skb_set_transport_header(skb, skb->len);
- interval = ipv6_addr_any(grp) ?
+ interval = ipv6_addr_any(group) ?
br->multicast_query_response_interval :
br->multicast_last_member_interval;
*igmp_type = ICMPV6_MGM_QUERY;
@@ -383,12 +557,9 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
mldq->mld_cksum = 0;
mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
mldq->mld_reserved = 0;
- mldq->mld_mca = *grp;
- mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
- sizeof(*mldq), IPPROTO_ICMPV6,
- csum_partial(mldq,
- sizeof(*mldq),
- 0));
+ mldq->mld_mca = *group;
+ csum = &mldq->mld_cksum;
+ csum_start = (void *)mldq;
break;
case 2:
mld2q = (struct mld2_query *)icmp6_hdr(skb);
@@ -398,21 +569,43 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
mld2q->mld2q_cksum = 0;
mld2q->mld2q_resv1 = 0;
mld2q->mld2q_resv2 = 0;
- mld2q->mld2q_suppress = 0;
+ mld2q->mld2q_suppress = sflag;
mld2q->mld2q_qrv = 2;
- mld2q->mld2q_nsrcs = 0;
+ mld2q->mld2q_nsrcs = htons(llqt_srcs);
mld2q->mld2q_qqic = br->multicast_query_interval / HZ;
- mld2q->mld2q_mca = *grp;
- mld2q->mld2q_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
- sizeof(*mld2q),
- IPPROTO_ICMPV6,
- csum_partial(mld2q,
- sizeof(*mld2q),
- 0));
+ mld2q->mld2q_mca = *group;
+ csum = &mld2q->mld2q_cksum;
+ csum_start = (void *)mld2q;
+ if (!pg || !with_srcs)
+ break;
+
+ llqt_srcs = 0;
+ hlist_for_each_entry(ent, &pg->src_list, node) {
+ if (over_llqt == time_after(ent->timer.expires,
+ llqt) &&
+ ent->src_query_rexmit_cnt > 0) {
+ mld2q->mld2q_srcs[llqt_srcs++] = ent->addr.u.ip6;
+ ent->src_query_rexmit_cnt--;
+ if (need_rexmit && ent->src_query_rexmit_cnt)
+ *need_rexmit = true;
+ }
+ }
+ if (WARN_ON(llqt_srcs != ntohs(mld2q->mld2q_nsrcs))) {
+ kfree_skb(skb);
+ return NULL;
+ }
break;
}
- skb_put(skb, mld_hdr_size);
+ if (WARN_ON(!csum || !csum_start)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ *csum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, mld_hdr_size,
+ IPPROTO_ICMPV6,
+ csum_partial(csum_start, mld_hdr_size, 0));
+ skb_put(skb, mld_hdr_size);
__skb_pull(skb, sizeof(*eth));
out:
@@ -421,16 +614,39 @@ out:
#endif
static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
- struct br_ip *addr,
- u8 *igmp_type)
+ struct net_bridge_port_group *pg,
+ struct br_ip *ip_dst,
+ struct br_ip *group,
+ bool with_srcs, bool over_lmqt,
+ u8 sflag, u8 *igmp_type,
+ bool *need_rexmit)
{
- switch (addr->proto) {
+ __be32 ip4_dst;
+
+ switch (group->proto) {
case htons(ETH_P_IP):
- return br_ip4_multicast_alloc_query(br, addr->u.ip4, igmp_type);
+ ip4_dst = ip_dst ? ip_dst->u.ip4 : htonl(INADDR_ALLHOSTS_GROUP);
+ return br_ip4_multicast_alloc_query(br, pg,
+ ip4_dst, group->u.ip4,
+ with_srcs, over_lmqt,
+ sflag, igmp_type,
+ need_rexmit);
#if IS_ENABLED(CONFIG_IPV6)
- case htons(ETH_P_IPV6):
- return br_ip6_multicast_alloc_query(br, &addr->u.ip6,
- igmp_type);
+ case htons(ET