diff options
Diffstat (limited to 'net/ipv4')
46 files changed, 1637 insertions, 785 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index a07b7dd06def..eec9569ffa5c 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -13,7 +13,10 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ - inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o + inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ + metrics.o netlink.o + +obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8a59428e63ab..15e125558c76 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -994,7 +994,9 @@ const struct proto_ops inet_stream_ops = { .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, - .mmap = sock_no_mmap, +#ifdef CONFIG_MMU + .mmap = tcp_mmap, +#endif .sendpage = inet_sendpage, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, @@ -1006,6 +1008,7 @@ const struct proto_ops inet_stream_ops = { .compat_getsockopt = compat_sock_common_getsockopt, .compat_ioctl = inet_compat_ioctl, #endif + .set_rcvlowat = tcp_set_rcvlowat, }; EXPORT_SYMBOL(inet_stream_ops); diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile new file mode 100644 index 000000000000..ce262d76cc48 --- /dev/null +++ b/net/ipv4/bpfilter/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_BPFILTER) += sockopt.o + diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c new file mode 100644 index 000000000000..5e04ed25bc0e --- /dev/null +++ b/net/ipv4/bpfilter/sockopt.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/uaccess.h> +#include <linux/bpfilter.h> +#include <uapi/linux/bpf.h> +#include <linux/wait.h> +#include <linux/kmod.h> + +int (*bpfilter_process_sockopt)(struct sock *sk, int optname, + char __user *optval, + unsigned int optlen, bool is_set); +EXPORT_SYMBOL_GPL(bpfilter_process_sockopt); + +static int bpfilter_mbox_request(struct sock *sk, int optname, + char __user *optval, + unsigned int optlen, bool is_set) +{ + if (!bpfilter_process_sockopt) { + int err = request_module("bpfilter"); + + if (err) + return err; + if (!bpfilter_process_sockopt) + return -ECHILD; + } + return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set); +} + +int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, + unsigned int optlen) +{ + return bpfilter_mbox_request(sk, optname, optval, optlen, true); +} + +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, + int __user *optlen) +{ + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + return bpfilter_mbox_request(sk, optname, optval, len, false); +} diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 40f001782c1b..d7585ab1a77a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -99,6 +99,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, [IFA_FLAGS] = { .type = NLA_U32 }, + [IFA_RT_PRIORITY] = { .type = NLA_U32 }, }; #define IN4_ADDR_HSIZE_SHIFT 8 @@ -835,6 +836,9 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); + if (tb[IFA_RT_PRIORITY]) + ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]); + if (tb[IFA_CACHEINFO]) { struct ifa_cacheinfo *ci; @@ -906,12 +910,20 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, extack); } else { + u32 new_metric = ifa->ifa_rt_priority; + inet_free_ifa(ifa); if (nlh->nlmsg_flags & NLM_F_EXCL || !(nlh->nlmsg_flags & NLM_F_REPLACE)) return -EEXIST; ifa = ifa_existing; + + if (ifa->ifa_rt_priority != new_metric) { + fib_modify_prefix_metric(ifa, new_metric); + ifa->ifa_rt_priority = new_metric; + } + set_ifa_lifetime(ifa, valid_lft, prefered_lft); cancel_delayed_work(&check_lifetime_work); queue_delayed_work(system_power_efficient_wq, @@ -1549,6 +1561,7 @@ static size_t inet_nlmsg_size(void) + nla_total_size(4) /* IFA_BROADCAST */ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */ + nla_total_size(4) /* IFA_FLAGS */ + + nla_total_size(4) /* IFA_RT_PRIORITY */ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */ } @@ -1618,6 +1631,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, (ifa->ifa_label[0] && nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || + (ifa->ifa_rt_priority && + nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) || put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, preferred, valid)) goto nla_put_failure; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e66172aaf241..63aa39b3af03 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -354,8 +354,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.fl4_dport = 0; } - trace_fib_validate_source(dev, &fl4); - if (fib_lookup(net, &fl4, &res, 0)) goto last_resort; if (res.type != RTN_UNICAST && @@ -650,6 +648,9 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_UID] = { .type = NLA_U32 }, [RTA_MARK] = { .type = NLA_U32 }, [RTA_TABLE] = { .type = NLA_U32 }, + [RTA_IP_PROTO] = { .type = NLA_U8 }, + [RTA_SPORT] = { .type = NLA_U16 }, + [RTA_DPORT] = { .type = NLA_U16 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, @@ -846,7 +847,8 @@ out_err: * to fib engine. It is legal, because all events occur * only when netlink is already locked. */ -static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) +static void fib_magic(int cmd, int type, __be32 dst, int dst_len, + struct in_ifaddr *ifa, u32 rt_priority) { struct net *net = dev_net(ifa->ifa_dev->dev); u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev); @@ -856,6 +858,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad .fc_type = type, .fc_dst = dst, .fc_dst_len = dst_len, + .fc_priority = rt_priority, .fc_prefsrc = ifa->ifa_local, .fc_oif = ifa->ifa_dev->dev->ifindex, .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, @@ -901,31 +904,57 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) } } - fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); + fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim, 0); if (!(dev->flags & IFF_UP)) return; /* Add broadcast address, if it is explicitly assigned. */ if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) - fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); + fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, + prim, 0); if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) { if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE)) fib_magic(RTM_NEWROUTE, dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, - prefix, ifa->ifa_prefixlen, prim); + prefix, ifa->ifa_prefixlen, prim, + ifa->ifa_rt_priority); /* Add network specific broadcasts, when it takes a sense */ if (ifa->ifa_prefixlen < 31) { - fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); + fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, + prim, 0); fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, - 32, prim); + 32, prim, 0); } } } +void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric) +{ + __be32 prefix = ifa->ifa_address & ifa->ifa_mask; + struct in_device *in_dev = ifa->ifa_dev; + struct net_device *dev = in_dev->dev; + + if (!(dev->flags & IFF_UP) || + ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) || + ipv4_is_zeronet(prefix) || + prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32) + return; + + /* add the new */ + fib_magic(RTM_NEWROUTE, + dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, + prefix, ifa->ifa_prefixlen, ifa, new_metric); + + /* delete the old */ + fib_magic(RTM_DELROUTE, + dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, + prefix, ifa->ifa_prefixlen, ifa, ifa->ifa_rt_priority); +} + /* Delete primary or secondary address. * Optionally, on secondary address promotion consider the addresses * from subnet iprim as deleted, even if they are in device list. @@ -967,7 +996,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE)) fib_magic(RTM_DELROUTE, dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST, - any, ifa->ifa_prefixlen, prim); + any, ifa->ifa_prefixlen, prim, 0); subnet = 1; } @@ -1051,17 +1080,20 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) no_promotions: if (!(ok & BRD_OK)) - fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); + fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, + prim, 0); if (subnet && ifa->ifa_prefixlen < 31) { if (!(ok & BRD1_OK)) - fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); + fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, + prim, 0); if (!(ok & BRD0_OK)) - fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); + fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, + prim, 0); } if (!(ok & LOCAL_OK)) { unsigned int addr_type; - fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); + fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim, 0); /* Check, that this local address finally disappeared. */ addr_type = inet_addr_type_dev_table(dev_net(dev), dev, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 737d11bc8838..f8eb78d042a4 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -213,14 +213,17 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, - struct nlattr **tb) + struct nlattr **tb, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); int err = -EINVAL; struct fib4_rule *rule4 = (struct fib4_rule *) rule; - if (frh->tos & ~IPTOS_TOS_MASK) + if (frh->tos & ~IPTOS_TOS_MASK) { + NL_SET_ERR_MSG(extack, "Invalid tos"); goto errout; + } /* split local/main if they are not already split */ err = fib_unmerge(net); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c27122f01b87..f3c89ccf14c5 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -717,6 +717,8 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) nla_strlcpy(tmp, nla, sizeof(tmp)); val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); } else { + if (nla_len(nla) != sizeof(u32)) + return false; val = nla_get_u32(nla); } @@ -1019,47 +1021,8 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) static int fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) { - bool ecn_ca = false; - struct nlattr *nla; - int remaining; - - if (!cfg->fc_mx) - return 0; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - u32 val; - - if (!type) - continue; - if (type > RTAX_MAX) - return -EINVAL; - - if (type == RTAX_CC_ALGO) { - char tmp[TCP_CA_NAME_MAX]; - - nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); - if (val == TCP_CA_UNSPEC) - return -EINVAL; - } else { - val = nla_get_u32(nla); - } - if (type == RTAX_ADVMSS && val > 65535 - 40) - val = 65535 - 40; - if (type == RTAX_MTU && val > 65535 - 15) - val = 65535 - 15; - if (type == RTAX_HOPLIMIT && val > 255) - val = 255; - if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) - return -EINVAL; - fi->fib_metrics->metrics[type - 1] = val; - } - - if (ecn_ca) - fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; - - return 0; + return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len, + fi->fib_metrics->metrics); } struct fib_info *fib_create_info(struct fib_config *cfg, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 99c23a0cb8ca..5bc0c89e81e4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1326,14 +1326,14 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, unsigned long index; t_key cindex; - trace_fib_table_lookup(tb->tb_id, flp); - pn = t->kv; cindex = 0; n = get_child_rcu(pn, cindex); - if (!n) + if (!n) { + trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN); return -EAGAIN; + } #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->gets); @@ -1416,8 +1416,11 @@ backtrace: * nothing for us to do as we do not have any * further nodes to parse. */ - if (IS_TRIE(pn)) + if (IS_TRIE(pn)) { + trace_fib_table_lookup(tb->tb_id, flp, + NULL, -EAGAIN); return -EAGAIN; + } #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->backtrack); #endif @@ -1459,6 +1462,7 @@ found: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif + trace_fib_table_lookup(tb->tb_id, flp, NULL, err); return err; } if (fi->fib_flags & RTNH_F_DEAD) @@ -1494,7 +1498,7 @@ found: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif - trace_fib_table_lookup_nh(nh); + trace_fib_table_lookup(tb->tb_id, flp, nh, err); return err; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 881ac6d046f2..33a88e045efd 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -27,11 +27,6 @@ #include <net/sock_reuseport.h> #include <net/addrconf.h> -#ifdef INET_CSK_DEBUG -const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; -EXPORT_SYMBOL(inet_csk_timer_bug_msg); -#endif - #if IS_ENABLED(CONFIG_IPV6) /* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 * only, and any IPv4 addresses if not IPv6 only diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f200b304f76c..2d8efeecf619 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -578,6 +578,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, int tunnel_hlen; int version; __be16 df; + int nhoff; + int thoff; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || @@ -605,6 +607,16 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, truncate = true; } + nhoff = skb_network_header(skb) - skb_mac_header(skb); + if (skb->protocol == htons(ETH_P_IP) && + (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff)) + truncate = true; + + thoff = skb_transport_header(skb) - skb_mac_header(skb); + if (skb->protocol == htons(ETH_P_IPV6) && + (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)) + truncate = true; + if (version == 1) { erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), ntohl(md->u.index), truncate, true); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d54abc097800..af5a830ff6ad 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -878,11 +878,14 @@ static int __ip_append_data(struct sock *sk, struct rtable *rt = (struct rtable *)cork->dst; unsigned int wmem_alloc_delta = 0; u32 tskey = 0; + bool paged; skb = skb_peek_tail(queue); exthdrlen = !skb ? rt->dst.header_len : 0; - mtu = cork->fragsize; + mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; + paged = !!cork->gso_size; + if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP && sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) tskey = sk->sk_tskey++; @@ -906,8 +909,8 @@ static int __ip_append_data(struct sock *sk, if (transhdrlen && length + fragheaderlen <= mtu && rt->dst.dev->features & (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM) && - !(flags & MSG_MORE) && - !exthdrlen) + (!(flags & MSG_MORE) || cork->gso_size) && + (!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM))) csummode = CHECKSUM_PARTIAL; cork->length += length; @@ -933,6 +936,7 @@ static int __ip_append_data(struct sock *sk, unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; + unsigned int pagedlen = 0; struct sk_buff *skb_prev; alloc_new_skb: skb_prev = skb; @@ -953,8 +957,12 @@ alloc_new_skb: if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; - else + else if (!paged) alloclen = fraglen; + else { + alloclen = min_t(int, fraglen, MAX_HEADER); + pagedlen = fraglen - alloclen; + } alloclen += exthdrlen; @@ -998,7 +1006,7 @@ alloc_new_skb: /* * Find where to start putting bytes. */ - data = skb_put(skb, fraglen + exthdrlen); + data = skb_put(skb, fraglen + exthdrlen - pagedlen); skb_set_network_header(skb, exthdrlen); skb->transport_header = (skb->network_header + fragheaderlen); @@ -1014,7 +1022,7 @@ alloc_new_skb: pskb_trim_unique(skb_prev, maxfraglen); } - copy = datalen - transhdrlen - fraggap; + copy = datalen - transhdrlen - fraggap - pagedlen; if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); @@ -1022,7 +1030,7 @@ alloc_new_skb: } offset += copy; - length -= datalen - fraggap; + length -= copy + transhdrlen; transhdrlen = 0; exthdrlen = 0; csummode = CHECKSUM_NONE; @@ -1136,6 +1144,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, *rtp = NULL; cork->fragsize = ip_sk_use_pmtu(sk) ? dst_mtu(&rt->dst) : rt->dst.dev->mtu; + + cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0; cork->dst = &rt->dst; cork->length = 0; cork->ttl = ipc->ttl; @@ -1215,7 +1225,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EOPNOTSUPP; hh_len = LL_RESERVED_SPACE(rt->dst.dev); - mtu = cork->fragsize; + mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; @@ -1471,9 +1481,8 @@ struct sk_buff *ip_make_skb(struct sock *sk, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable **rtp, - unsigned int flags) + struct inet_cork *cork, unsigned int flags) { - struct inet_cork cork; struct sk_buff_head queue; int err; @@ -1482,22 +1491,22 @@ struct sk_buff *ip_make_skb(struct sock *sk, __skb_queue_head_init(&queue); - cork.flags = 0; - cork.addr = 0; - cork.opt = NULL; - err = ip_setup_cork(sk, &cork, ipc, rtp); + cork->flags = 0; + cork->addr = 0; + cork->opt = NULL; + err = ip_setup_cork(sk, cork, ipc, rtp); if (err) return ERR_PTR(err); - err = __ip_append_data(sk, fl4, &queue, &cork, + err = __ip_append_data(sk, fl4, &queue, cork, ¤t->task_frag, getfrag, from, length, transhdrlen, flags); if (err) { - __ip_flush_pending_frames(sk, &queue, &cork); + __ip_flush_pending_frames(sk, &queue, cork); return ERR_PTR(err); } - return __ip_make_skb(sk, fl4, &queue, &cork); + return __ip_make_skb(sk, fl4, &queue, cork); } /* @@ -1553,7 +1562,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, oif = skb->skb_iif; flowi4_init_output(&fl4, oif, - IP4_REPLY_MARK(net, skb->mark), + IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark, RT_TOS(arg->tos), RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 57bbb060faaf..fc32fdbeefa6 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -47,6 +47,8 @@ #include <linux/errqueue.h> #include <linux/uaccess.h> +#include <linux/bpfilter.h> + /* * SOL_IP control messages. */ @@ -1242,6 +1244,11 @@ int ip_setsockopt(struct sock *sk, int level, return -ENOPROTOOPT; err = do_ip_setsockopt(sk, level, optname, optval, optlen); +#ifdef CONFIG_BPFILTER + if (optname >= BPFILTER_IPT_SO_SET_REPLACE && + optname < BPFILTER_IPT_SET_MAX) + err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); +#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_HDRINCL && @@ -1550,6 +1557,11 @@ int ip_getsockopt(struct sock *sk, int level, int err; err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); +#ifdef CONFIG_BPFILTER + if (optname >= BPFILTER_IPT_SO_GET_INFO && + optname < BPFILTER_IPT_GET_MAX) + err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); +#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && @@ -1582,6 +1594,11 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, err = do_ip_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); +#ifdef CONFIG_BPFILTER + if (optname >= BPFILTER_IPT_SO_GET_INFO && + optname < BPFILTER_IPT_GET_MAX) + err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); +#endif #ifdef CONFIG_NETF |