diff options
Diffstat (limited to 'net/ipv4')
45 files changed, 705 insertions, 515 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9d97bace13c8..9e1a186a3671 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o +obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2fe295432c24..cf58e29cf746 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -872,7 +872,7 @@ int inet_shutdown(struct socket *sock, int how) err = -ENOTCONN; /* Hack to wake up other listeners, who can poll for EPOLLHUP, even on eg. unconnected UDP sockets -- RR */ - /* fall through */ + fallthrough; default: sk->sk_shutdown |= how; if (sk->sk_prot->shutdown) @@ -886,7 +886,7 @@ int inet_shutdown(struct socket *sock, int how) case TCP_LISTEN: if (!(how & RCV_SHUTDOWN)) break; - /* fall through */ + fallthrough; case TCP_SYN_SENT: err = sk->sk_prot->disconnect(sk, O_NONBLOCK); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; @@ -1793,6 +1793,10 @@ static __net_exit void ipv4_mib_exit_net(struct net *net) free_percpu(net->mib.net_statistics); free_percpu(net->mib.ip_statistics); free_percpu(net->mib.tcp_statistics); +#ifdef CONFIG_MPTCP + /* allocated on demand, see mptcp_init_sock() */ + free_percpu(net->mib.mptcp_statistics); +#endif } static __net_initdata struct pernet_operations ipv4_mib_ops = { diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 974179b3b314..d99e1be94019 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -107,7 +107,7 @@ static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) if (optlen < 6) return -EINVAL; memcpy(daddr, optptr+optlen-4, 4); - /* Fall through */ + fallthrough; default: memset(optptr, 0, optlen); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 05eb42f347e8..687971d83b4e 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1181,7 +1181,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSARP: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - /* fall through */ + fallthrough; case SIOCGARP: err = copy_from_user(&r, arg, sizeof(struct arpreq)); if (err) diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 2bf3abeb1456..e3939f76b024 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -7,6 +7,7 @@ #include <linux/btf.h> #include <linux/filter.h> #include <net/tcp.h> +#include <net/bpf_sk_storage.h> static u32 optional_ops[] = { offsetof(struct tcp_congestion_ops, init), @@ -27,6 +28,27 @@ static u32 unsupported_ops[] = { static const struct btf_type *tcp_sock_type; static u32 tcp_sock_id, sock_id; +static int btf_sk_storage_get_ids[5]; +static struct bpf_func_proto btf_sk_storage_get_proto __read_mostly; + +static int btf_sk_storage_delete_ids[5]; +static struct bpf_func_proto btf_sk_storage_delete_proto __read_mostly; + +static void convert_sk_func_proto(struct bpf_func_proto *to, int *to_btf_ids, + const struct bpf_func_proto *from) +{ + int i; + + *to = *from; + to->btf_id = to_btf_ids; + for (i = 0; i < ARRAY_SIZE(to->arg_type); i++) { + if (to->arg_type[i] == ARG_PTR_TO_SOCKET) { + to->arg_type[i] = ARG_PTR_TO_BTF_ID; + to->btf_id[i] = tcp_sock_id; + } + } +} + static int bpf_tcp_ca_init(struct btf *btf) { s32 type_id; @@ -42,6 +64,13 @@ static int bpf_tcp_ca_init(struct btf *btf) tcp_sock_id = type_id; tcp_sock_type = btf_type_by_id(btf, tcp_sock_id); + convert_sk_func_proto(&btf_sk_storage_get_proto, + btf_sk_storage_get_ids, + &bpf_sk_storage_get_proto); + convert_sk_func_proto(&btf_sk_storage_delete_proto, + btf_sk_storage_delete_ids, + &bpf_sk_storage_delete_proto); + return 0; } @@ -167,6 +196,10 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, switch (func_id) { case BPF_FUNC_tcp_send_ack: return &bpf_tcp_send_ack_proto; + case BPF_FUNC_sk_storage_get: + return &btf_sk_storage_get_proto; + case BPF_FUNC_sk_storage_delete: + return &btf_sk_storage_delete_proto; default: return bpf_base_func_proto(func_id); } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e4632bd2026d..30fa42f5997d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1566,11 +1566,11 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, } } ip_mc_up(in_dev); - /* fall through */ + fallthrough; case NETDEV_CHANGEADDR: if (!IN_DEV_ARP_NOTIFY(in_dev)) break; - /* fall through */ + fallthrough; case NETDEV_NOTIFY_PEERS: /* Send gratuitous ARP to notify of link change */ inetdev_send_gratuitous_arp(dev, in_dev); @@ -1588,7 +1588,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (inetdev_valid_mtu(dev->mtu)) break; /* disable IP when MTU is not enough */ - /* fall through */ + fallthrough; case NETDEV_UNREGISTER: inetdev_destroy(in_dev); break; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 103c7d599a3c..8b07f3a4f2db 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -341,22 +341,6 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } -static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) -{ - /* Fill padding... */ - if (tfclen) { - memset(tail, 0, tfclen); - tail += tfclen; - } - do { - int i; - for (i = 0; i < plen - 2; i++) - tail[i] = i + 1; - } while (0); - tail[plen - 2] = plen - 2; - tail[plen - 1] = proto; -} - static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb, int encap_type, struct esp_info *esp, diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index e2e219c7854a..731022cff600 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -132,6 +132,36 @@ static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, return segs; } +static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + struct sk_buff *segs = ERR_PTR(-EINVAL); + const struct net_offload *ops; + int proto = xo->proto; + + skb->transport_header += x->props.header_len; + + if (proto == IPPROTO_BEETPH) { + struct ip_beet_phdr *ph = (struct ip_beet_phdr *)skb->data; + + skb->transport_header += ph->hdrlen * 8; + proto = ph->nexthdr; + } else if (x->sel.family != AF_INET6) { + skb->transport_header -= IPV4_BEET_PHMAXLEN; + } else if (proto == IPPROTO_TCP) { + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + } + + __skb_pull(skb, skb_transport_offset(skb)); + ops = rcu_dereference(inet_offloads[proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + + return segs; +} + static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) @@ -141,6 +171,8 @@ static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x, return xfrm4_tunnel_gso_segment(x, skb, features); case XFRM_MODE_TRANSPORT: return xfrm4_transport_gso_segment(x, skb, features); + case XFRM_MODE_BEET: + return xfrm4_beet_gso_segment(x, skb, features); } return ERR_PTR(-EOPNOTSUPP); diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c092e9a55790..818916b2a04d 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -35,7 +35,7 @@ static inline void fib_alias_accessed(struct fib_alias *fa) void fib_release_info(struct fib_info *); struct fib_info *fib_create_info(struct fib_config *cfg, struct netlink_ext_ack *extack); -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, +int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack); bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi); int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a803cdd9400a..6ed8c9317179 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -570,8 +570,9 @@ static int fib_detect_death(struct fib_info *fi, int order, return 1; } -int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, - u16 encap_type, void *cfg, gfp_t gfp_flags, +int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, + struct nlattr *encap, u16 encap_type, + void *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { int err; @@ -589,8 +590,9 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, err = -EINVAL; goto lwt_failure; } - err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family, - cfg, &lwtstate, extack); + err = lwtunnel_build_state(net, encap_type, encap, + nhc->nhc_family, cfg, &lwtstate, + extack); if (err) goto lwt_failure; @@ -614,7 +616,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, nh->fib_nh_family = AF_INET; - err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap, + err = fib_nh_common_init(net, &nh->nh_common, cfg->fc_encap, cfg->fc_encap_type, cfg, GFP_KERNEL, extack); if (err) return err; @@ -814,7 +816,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, #endif /* CONFIG_IP_ROUTE_MULTIPATH */ -static int fib_encap_match(u16 encap_type, +static int fib_encap_match(struct net *net, u16 encap_type, struct nlattr *encap, const struct fib_nh *nh, const struct fib_config *cfg, @@ -826,7 +828,7 @@ static int fib_encap_match(u16 encap_type, if (encap_type == LWTUNNEL_ENCAP_NONE) return 0; - ret = lwtunnel_build_state(encap_type, encap, AF_INET, + ret = lwtunnel_build_state(net, encap_type, encap, AF_INET, cfg, &lwtstate, extack); if (!ret) { result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws); @@ -836,7 +838,7 @@ static int fib_encap_match(u16 encap_type, return result; } -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, +int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack) { #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -857,8 +859,8 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, struct fib_nh *nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { - if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, - nh, cfg, extack)) + if (fib_encap_match(net, cfg->fc_encap_type, + cfg->fc_encap, nh, cfg, extack)) return 1; } #ifdef CONFIG_IP_ROUTE_CLASSID @@ -1962,7 +1964,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) case NETDEV_DOWN: case NETDEV_UNREGISTER: nexthop_nh->fib_nh_flags |= RTNH_F_DEAD; - /* fall through */ + fallthrough; case NETDEV_CHANGE: nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN; break; @@ -1984,7 +1986,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) case NETDEV_DOWN: case NETDEV_UNREGISTER: fi->fib_flags |= RTNH_F_DEAD; - /* fall through */ + fallthrough; case NETDEV_CHANGE: fi->fib_flags |= RTNH_F_LINKDOWN; break; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ff0c24371e33..4f334b425538 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -35,9 +35,6 @@ * Paul E. McKenney <paulmck@us.ibm.com> * Patrick McHardy <kaber@trash.net> */ - -#define VERSION "0.409" - #include <linux/cache.h> #include <linux/uaccess.h> #include <linux/bitops.h> @@ -304,8 +301,6 @@ static inline void alias_free_mem_rcu(struct fib_alias *fa) call_rcu(&fa->rcu, __alias_free_mem); } -#define TNODE_KMALLOC_MAX \ - ilog2((PAGE_SIZE - TNODE_SIZE(0)) / sizeof(struct key_vector *)) #define TNODE_VMALLOC_MAX \ ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *)) @@ -1684,7 +1679,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi, extack) == 0 && + fib_nh_match(net, cfg, fi, extack) == 0 && fib_metrics_match(cfg, fi)) { fa_to_delete = fa; break; @@ -2577,6 +2572,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) " %zd bytes, size of tnode: %zd bytes.\n", LEAF_SIZE, TNODE_SIZE(0)); + rcu_read_lock(); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; @@ -2596,7 +2592,9 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v) trie_show_usage(seq, t->stats); #endif } + cond_resched_rcu(); } + rcu_read_unlock(); return 0; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f369e7ce685b..fc61f51d87a3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -865,7 +865,7 @@ static bool icmp_unreach(struct sk_buff *skb) case 3: if (!icmp_tag_validation(iph->protocol)) goto out; - /* fall through */ + fallthrough; case 0: info = ntohs(icmph->un.frag.mtu); } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 3b9c7a2725a9..47f0502b2101 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -107,8 +107,6 @@ #ifdef CONFIG_IP_MULTICAST /* Parameter names and values are taken from igmp-v2-06 draft */ -#define IGMP_V2_UNSOLICITED_REPORT_INTERVAL (10*HZ) -#define IGMP_V3_UNSOLICITED_REPORT_INTERVAL (1*HZ) #define IGMP_QUERY_INTERVAL (125*HZ) #define IGMP_QUERY_RESPONSE_INTERVAL (10*HZ) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d545fb99a8a1..5f34eb951627 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -131,7 +131,7 @@ static int inet_csk_bind_conflict(const struct sock *sk, { struct sock *sk2; bool reuse = sk->sk_reuse; - bool reuseport = !!sk->sk_reuseport && reuseport_ok; + bool reuseport = !!sk->sk_reuseport; kuid_t uid = sock_i_uid((struct sock *)sk); /* @@ -146,17 +146,21 @@ static int inet_csk_bind_conflict(const struct sock *sk, (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if ((!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - (!reuseport || !sk2->sk_reuseport || - rcu_access_pointer(sk->sk_reuseport_cb) || - (sk2->sk_state != TCP_TIME_WAIT && - !uid_eq(uid, sock_i_uid(sk2))))) { - if (inet_rcv_saddr_equal(sk, sk2, true)) - break; - } - if (!relax && reuse && sk2->sk_reuse && + if (reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { + if ((!relax || + (!reuseport_ok && + reuseport && sk2->sk_reuseport && + !rcu_access_pointer(sk->sk_reuseport_cb) && + (sk2->sk_state == TCP_TIME_WAIT || + uid_eq(uid, sock_i_uid(sk2))))) && + inet_rcv_saddr_equal(sk, sk2, true)) + break; + } else if (!reuseport_ok || + !reuseport || !sk2->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, sock_i_uid(sk2)))) { if (inet_rcv_saddr_equal(sk, sk2, true)) break; } @@ -176,12 +180,14 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int * int port = 0; struct inet_bind_hashbucket *head; struct net *net = sock_net(sk); + bool relax = false; int i, low, high, attempt_half; struct inet_bind_bucket *tb; u32 remaining, offset; int l3mdev; l3mdev = inet_sk_bound_l3mdev(sk); +ports_exhausted: attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0; other_half_scan: inet_get_local_port_range(net, &low, &high); @@ -219,7 +225,7 @@ other_parity_scan: inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && tb->port == port) { - if (!inet_csk_bind_conflict(sk, tb, false, false)) + if (!inet_csk_bind_conflict(sk, tb, relax, false)) goto success; goto next_port; } @@ -239,6 +245,12 @@ next_port: attempt_half = 2; goto other_half_scan; } + + if (net->ipv4.sysctl_ip_autobind_reuse && !relax) { + /* We still have a chance to connect to different destinations */ + relax = true; + goto ports_exhausted; + } return NULL; success: *port_ret = port; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8c8377568a78..5d50aad3cdbf 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -23,6 +23,7 @@ #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> #include <net/inet6_hashtables.h> +#include <net/bpf_sk_storage.h> #include <net/netlink.h> #include <linux/inet.h> @@ -170,26 +171,28 @@ errout: } EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); +#define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, - struct sk_buff *skb, const struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, - bool net_admin) + struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *req, + u16 nlmsg_flags, bool net_admin) { const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; + struct inet_diag_dump_data *cb_data; int ext = req->idiag_ext; struct inet_diag_msg *r; struct nlmsghdr *nlh; struct nlattr *attr; void *info = NULL; + cb_data = cb->data; handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(!handler); - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - nlmsg_flags); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -201,7 +204,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 0; r->idiag_retrans = 0; - if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, + sk_user_ns(NETLINK_CB(cb->skb).sk), + net_admin)) goto errout; if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { @@ -298,6 +303,48 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } + /* Keep it at the end for potential retry with a larger skb, + * or else do best-effort fitting, which is only done for the + * first_nlmsg. + */ + if (cb_data->bpf_stg_diag) { + bool first_nlmsg = ((unsigned char *)nlh == skb->data); + unsigned int prev_min_dump_alloc; + unsigned int total_nla_size = 0; + unsigned int msg_len; + int err; + + msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; + err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb, + INET_DIAG_SK_BPF_STORAGES, + &total_nla_size); + + if (!err) + goto out; + + total_nla_size += msg_len; + prev_min_dump_alloc = cb->min_dump_alloc; + if (total_nla_size > prev_min_dump_alloc) + cb->min_dump_alloc = min_t(u32, total_nla_size, + MAX_DUMP_ALLOC_SIZE); + + if (!first_nlmsg) + goto errout; + + if (cb->min_dump_alloc > prev_min_dump_alloc) + /* Retry with pskb_expand_head() with + * __GFP_DIRECT_RECLAIM + */ + goto errout; + + WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc); + + /* Send what we have for this sk + * and move on to the next sk in the following + * dump() + */ + } + out: nlmsg_end(skb, nlh); return 0; @@ -308,30 +355,19 @@ errout: } EXPORT_SYMBOL_GPL(inet_sk_diag_fill); -static int inet_csk_diag_fill(struct sock *sk, - struct sk_buff *skb, - const struct inet_diag_req_v2 *req, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, - bool net_admin) -{ - return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns, - portid, seq, nlmsg_flags, unlh, net_admin); -} - static int inet_twsk_diag_fill(struct sock *sk, struct sk_buff *skb, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + struct netlink_callback *cb, + u16 nlmsg_flags) { struct inet_timewait_sock *tw = inet_twsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - nlmsg_flags); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, + sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -355,16 +391,16 @@ static int inet_twsk_diag_fill(struct sock *sk, } static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, bool net_admin) + struct netlink_callback *cb, + u16 nlmsg_flags, bool net_admin) { struct request_sock *reqsk = inet_reqsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; - nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), - nlmsg_flags); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -393,21 +429,18 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + struct netlink_callback *cb, const struct inet_diag_req_v2 *r, - struct user_namespace *user_ns, - u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh, bool net_admin) + u16 nlmsg_flags, bool net_admin) { if (sk->sk_state == TCP_TIME_WAIT) - return inet_twsk_diag_fill(sk, skb, portid, seq, - nlmsg_flags, unlh); + return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags); if (sk->sk_state == TCP_NEW_SYN_RECV) - return inet_req_diag_fill(sk, skb, portid, seq, - nlmsg_flags, unlh, net_admin); + return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin); - return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, - nlmsg_flags, unlh, net_admin); + return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags, + net_admin); } struct sock *inet_diag_find_one_icsk(struct net *net, @@ -455,10 +488,10 @@ struct sock *inet_diag_find_one_icsk(struct net *net, EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, - str |