From 318892ac068397f40ff81d9155898da01493b1d2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 19 Jul 2019 10:29:14 -0700
Subject: net/tls: don't arm strparser immediately in tls_set_sw_offload()

In tls_set_device_offload_rx() we prepare the software context
for RX fallback and proceed to add the connection to the device.
Unfortunately, software context prep includes arming strparser
so in case of a later error we have to release the socket lock
to call strp_done().

In preparation for not releasing the socket lock half way through
callbacks move arming strparser into a separate function.
Following patches will make use of that.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/tls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 584609174fe0..43f551cd508b 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -355,6 +355,7 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
 		  unsigned int optlen);
 
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
+void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
-- 
cgit v1.2.3


From f87e62d45e51b12d48d2cb46b5cde8f83b866bc4 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 19 Jul 2019 10:29:16 -0700
Subject: net/tls: remove close callback sock unlock/lock around TX work flush

The tls close() callback currently drops the sock lock, makes a
cancel_delayed_work_sync() call, and then relocks the sock.

By restructuring the code we can avoid droping lock and then
reclaiming it. To simplify this we do the following,

 tls_sk_proto_close
 set_bit(CLOSING)
 set_bit(SCHEDULE)
 cancel_delay_work_sync() <- cancel workqueue
 lock_sock(sk)
 ...
 release_sock(sk)
 strp_done()

Setting the CLOSING bit prevents the SCHEDULE bit from being
cleared by any workqueue items e.g. if one happens to be
scheduled and run between when we set SCHEDULE bit and cancel
work. Then because SCHEDULE bit is set now no new work will
be scheduled.

Tested with net selftests and bpf selftests.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/tls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 43f551cd508b..d4276cb6de53 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -162,6 +162,7 @@ struct tls_sw_context_tx {
 	int async_capable;
 
 #define BIT_TX_SCHEDULED	0
+#define BIT_TX_CLOSING		1
 	unsigned long tx_bitmask;
 };
 
@@ -360,6 +361,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
 void tls_sw_close(struct sock *sk, long timeout);
+void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
 void tls_sw_free_resources_tx(struct sock *sk);
 void tls_sw_free_resources_rx(struct sock *sk);
 void tls_sw_release_resources_rx(struct sock *sk);
-- 
cgit v1.2.3


From 313ab004805cf52a42673b15852b3842474ccd87 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 19 Jul 2019 10:29:17 -0700
Subject: net/tls: remove sock unlock/lock around strp_done()

The tls close() callback currently drops the sock lock to call
strp_done(). Split up the RX cleanup into stopping the strparser
and releasing most resources, syncing strparser and finally
freeing the context.

To avoid the need for a strp_done() call on the cleanup path
of device offload make sure we don't arm the strparser until
we are sure init will be successful.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/tls.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index d4276cb6de53..235508e35fd4 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -107,9 +107,7 @@ struct tls_device {
 enum {
 	TLS_BASE,
 	TLS_SW,
-#ifdef CONFIG_TLS_DEVICE
 	TLS_HW,
-#endif
 	TLS_HW_RECORD,
 	TLS_NUM_CONFIG,
 };
@@ -357,14 +355,17 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
 
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
 void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
+void tls_sw_strparser_done(struct tls_context *tls_ctx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
 void tls_sw_close(struct sock *sk, long timeout);
 void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
-void tls_sw_free_resources_tx(struct sock *sk);
+void tls_sw_release_resources_tx(struct sock *sk);
+void tls_sw_free_ctx_tx(struct tls_context *tls_ctx);
 void tls_sw_free_resources_rx(struct sock *sk);
 void tls_sw_release_resources_rx(struct sock *sk);
+void tls_sw_free_ctx_rx(struct tls_context *tls_ctx);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		   int nonblock, int flags, int *addr_len);
 bool tls_sw_stream_read(const struct sock *sk);
-- 
cgit v1.2.3


From 32857cf57f920cdc03b5095f08febec94cf9c36b Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 19 Jul 2019 10:29:18 -0700
Subject: net/tls: fix transition through disconnect with close

It is possible (via shutdown()) for TCP socks to go through TCP_CLOSE
state via tcp_disconnect() without actually calling tcp_close which
would then call the tls close callback. Because of this a user could
disconnect a socket then put it in a LISTEN state which would break
our assumptions about sockets always being ESTABLISHED state.

More directly because close() can call unhash() and unhash is
implemented by sockmap if a sockmap socket has TLS enabled we can
incorrectly destroy the psock from unhash() and then call its close
handler again. But because the psock (sockmap socket representation)
is already destroyed we call close handler in sk->prot. However,
in some cases (TLS BASE/BASE case) this will still point at the
sockmap close handler resulting in a circular call and crash reported
by syzbot.

To fix both above issues implement the unhash() routine for TLS.

v4:
 - add note about tls offload still needing the fix;
 - move sk_proto to the cold cache line;
 - split TX context free into "release" and "free",
   otherwise the GC work itself is in already freed
   memory;
 - more TX before RX for consistency;
 - reuse tls_ctx_free();
 - schedule the GC work after we're done with context
   to avoid UAF;
 - don't set the unhash in all modes, all modes "inherit"
   TLS_BASE's callbacks anyway;
 - disable the unhash hook for TLS_HW.

Fixes: 3c4d7559159bf ("tls: kernel TLS support")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/tls.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 235508e35fd4..9e425ac2de45 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -271,6 +271,8 @@ struct tls_context {
 	unsigned long flags;
 
 	/* cache cold stuff */
+	struct proto *sk_proto;
+
 	void (*sk_destruct)(struct sock *sk);
 	void (*sk_proto_close)(struct sock *sk, long timeout);
 
@@ -288,6 +290,8 @@ struct tls_context {
 
 	struct list_head list;
 	refcount_t refcount;
+
+	struct work_struct gc;
 };
 
 enum tls_offload_ctx_dir {
@@ -359,7 +363,6 @@ void tls_sw_strparser_done(struct tls_context *tls_ctx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
-void tls_sw_close(struct sock *sk, long timeout);
 void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
 void tls_sw_release_resources_tx(struct sock *sk);
 void tls_sw_free_ctx_tx(struct tls_context *tls_ctx);
-- 
cgit v1.2.3


From 95fa145479fbc0a0c1fd3274ceb42ec03c042a4a Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 19 Jul 2019 10:29:22 -0700
Subject: bpf: sockmap/tls, close can race with map free

When a map free is called and in parallel a socket is closed we
have two paths that can potentially reset the socket prot ops, the
bpf close() path and the map free path. This creates a problem
with which prot ops should be used from the socket closed side.

If the map_free side completes first then we want to call the
original lowest level ops. However, if the tls path runs first
we want to call the sockmap ops. Additionally there was no locking
around prot updates in TLS code paths so the prot ops could
be changed multiple times once from TLS path and again from sockmap
side potentially leaving ops pointed at either TLS or sockmap
when psock and/or tls context have already been destroyed.

To fix this race first only update ops inside callback lock
so that TLS, sockmap and lowest level all agree on prot state.
Second and a ULP callback update() so that lower layers can
inform the upper layer when they are being removed allowing the
upper layer to reset prot ops.

This gets us close to allowing sockmap and tls to be stacked
in arbitrary order but will save that patch for *next trees.

v4:
 - make sure we don't free things for device;
 - remove the checks which swap the callbacks back
   only if TLS is at the top.

Reported-by: syzbot+06537213db7ba2745c4a@syzkaller.appspotmail.com
Fixes: 02c558b2d5d6 ("bpf: sockmap, support for msg_peek in sk_msg with redirect ingress")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skmsg.h | 8 +++++++-
 include/net/tcp.h     | 3 +++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 50ced8aba9db..e4b3fb4bb77c 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -354,7 +354,13 @@ static inline void sk_psock_restore_proto(struct sock *sk,
 	sk->sk_write_space = psock->saved_write_space;
 
 	if (psock->sk_proto) {
-		sk->sk_prot = psock->sk_proto;
+		struct inet_connection_sock *icsk = inet_csk(sk);
+		bool has_ulp = !!icsk->icsk_ulp_data;
+
+		if (has_ulp)
+			tcp_update_ulp(sk, psock->sk_proto);
+		else
+			sk->sk_prot = psock->sk_proto;
 		psock->sk_proto = NULL;
 	}
 }
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f42d300f0cfa..c82a23470081 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2103,6 +2103,8 @@ struct tcp_ulp_ops {
 
 	/* initialize ulp */
 	int (*init)(struct sock *sk);
+	/* update ulp */
+	void (*update)(struct sock *sk, struct proto *p);
 	/* cleanup ulp */
 	void (*release)(struct sock *sk);
 
@@ -2114,6 +2116,7 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type);
 int tcp_set_ulp(struct sock *sk, const char *name);
 void tcp_get_available_ulp(char *buf, size_t len);
 void tcp_cleanup_ulp(struct sock *sk);
+void tcp_update_ulp(struct sock *sk, struct proto *p);
 
 #define MODULE_ALIAS_TCP_ULP(name)				\
 	__MODULE_INFO(alias, alias_userspace, name);		\
-- 
cgit v1.2.3


From d9b8aadaffa65809d146cf0f8632a22a946367d7 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 19 Jul 2019 11:18:15 +0200
Subject: bpf: fix narrower loads on s390

The very first check in test_pkt_md_access is failing on s390, which
happens because loading a part of a struct __sk_buff field produces
an incorrect result.

The preprocessed code of the check is:

{
	__u8 tmp = *((volatile __u8 *)&skb->len +
		((sizeof(skb->len) - sizeof(__u8)) / sizeof(__u8)));
	if (tmp != ((*(volatile __u32 *)&skb->len) & 0xFF)) return 2;
};

clang generates the following code for it:

      0:	71 21 00 03 00 00 00 00	r2 = *(u8 *)(r1 + 3)
      1:	61 31 00 00 00 00 00 00	r3 = *(u32 *)(r1 + 0)
      2:	57 30 00 00 00 00 00 ff	r3 &= 255
      3:	5d 23 00 1d 00 00 00 00	if r2 != r3 goto +29 <LBB0_10>

Finally, verifier transforms it to:

  0: (61) r2 = *(u32 *)(r1 +104)
  1: (bc) w2 = w2
  2: (74) w2 >>= 24
  3: (bc) w2 = w2
  4: (54) w2 &= 255
  5: (bc) w2 = w2

The problem is that when verifier emits the code to replace a partial
load of a struct __sk_buff field (*(u8 *)(r1 + 3)) with a full load of
struct sk_buff field (*(u32 *)(r1 + 104)), an optional shift and a
bitwise AND, it assumes that the machine is little endian and
incorrectly decides to use a shift.

Adjust shift count calculation to account for endianness.

Fixes: 31fd85816dbe ("bpf: permits narrower load from bpf program context fields")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index ff65d22cf336..92c6e31fb008 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -24,6 +24,7 @@
 
 #include <net/sch_generic.h>
 
+#include <asm/byteorder.h>
 #include <uapi/linux/filter.h>
 #include <uapi/linux/bpf.h>
 
@@ -747,6 +748,18 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 	return size <= size_default && (size & (size - 1)) == 0;
 }
 
+static inline u8
+bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default)
+{
+	u8 load_off = off & (size_default - 1);
+
+#ifdef __LITTLE_ENDIAN
+	return load_off * 8;
+#else
+	return (size_default - (load_off + size)) * 8;
+#endif
+}
+
 #define bpf_ctx_wide_access_ok(off, size, type, field)			\
 	(size == sizeof(__u64) &&					\
 	off >= offsetof(type, field) &&					\
-- 
cgit v1.2.3


From f8be17b81d44aed1f9ea68c3fc70f501c9616e2d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 23 Jul 2019 10:22:48 +0300
Subject: lib/dim: Fix -Wunused-const-variable warnings

DIM causes to the following warnings during kernel compilation
which indicates that tx_profile and rx_profile are supposed to
be declared in *.c and not in *.h files.

In file included from ./include/rdma/ib_verbs.h:64,
                 from ./include/linux/mlx5/device.h:37,
                 from ./include/linux/mlx5/driver.h:51,
                 from ./include/linux/mlx5/vport.h:36,
                 from drivers/infiniband/hw/mlx5/ib_virt.c:34:
./include/linux/dim.h:326:1: warning: _tx_profile_ defined but not used [-Wunused-const-variable=]
  326 | tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
      | ^~~~~~~~~~
./include/linux/dim.h:320:1: warning: _rx_profile_ defined but not used [-Wunused-const-variable=]
  320 | rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
      | ^~~~~~~~~~

Fixes: 4f75da3666c0 ("linux/dim: Move implementation to .c files")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dim.h | 56 -----------------------------------------------------
 1 file changed, 56 deletions(-)

(limited to 'include')

diff --git a/include/linux/dim.h b/include/linux/dim.h
index d3a0fbfff2bb..9fa4b3f88c39 100644
--- a/include/linux/dim.h
+++ b/include/linux/dim.h
@@ -272,62 +272,6 @@ dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps,
 
 /* Net DIM */
 
-/*
- * Net DIM profiles:
- *        There are different set of profiles for each CQ period mode.
- *        There are different set of profiles for RX/TX CQs.
- *        Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
- */
-#define NET_DIM_PARAMS_NUM_PROFILES 5
-#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
-#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
-#define NET_DIM_DEF_PROFILE_CQE 1
-#define NET_DIM_DEF_PROFILE_EQE 1
-
-#define NET_DIM_RX_EQE_PROFILES { \
-	{1,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-	{8,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-	{64,  NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-	{128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-	{256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-}
-
-#define NET_DIM_RX_CQE_PROFILES { \
-	{2,  256},             \
-	{8,  128},             \
-	{16, 64},              \
-	{32, 64},              \
-	{64, 64}               \
-}
-
-#define NET_DIM_TX_EQE_PROFILES { \
-	{1,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-	{8,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-	{32,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-	{64,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-	{128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}   \
-}
-
-#define NET_DIM_TX_CQE_PROFILES { \
-	{5,  128},  \
-	{8,  64},  \
-	{16, 32},  \
-	{32, 32},  \
-	{64, 32}   \
-}
-
-static const struct dim_cq_moder
-rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
-	NET_DIM_RX_EQE_PROFILES,
-	NET_DIM_RX_CQE_PROFILES,
-};
-
-static const struct dim_cq_moder
-tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
-	NET_DIM_TX_EQE_PROFILES,
-	NET_DIM_TX_CQE_PROFILES,
-};
-
 /**
  *	net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile
  *	@cq_period_mode: CQ period mode
-- 
cgit v1.2.3


From 7a32f2962c56d9d8a836b4469855caeee8766bd4 Mon Sep 17 00:00:00 2001
From: Edward Srouji <edwards@mellanox.com>
Date: Tue, 23 Jul 2019 10:12:55 +0300
Subject: net/mlx5: Fix modify_cq_in alignment

Fix modify_cq_in alignment to match the device specification.
After this fix the 'cq_umem_valid' field will be in the right offset.

Cc: <stable@vger.kernel.org> # 4.19
Fixes: bd37197554eb ("net/mlx5: Update mlx5_ifc with DEVX UID bits")
Signed-off-by: Edward Srouji <edwards@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b3d5752657d9..ec571fd7fcf8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5975,10 +5975,12 @@ struct mlx5_ifc_modify_cq_in_bits {
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_at_280[0x40];
+	u8         reserved_at_280[0x60];
 
 	u8         cq_umem_valid[0x1];
-	u8         reserved_at_2c1[0x5bf];
+	u8         reserved_at_2e1[0x1f];
+
+	u8         reserved_at_300[0x580];
 
 	u8         pas[0][0x40];
 };
-- 
cgit v1.2.3


From 90bb769291161cf25a818d69cf608c181654473e Mon Sep 17 00:00:00 2001
From: Ariel Levkovich <lariel@mellanox.com>
Date: Sat, 6 Jul 2019 18:06:15 +0300
Subject: net/mlx5e: Prevent encap flow counter update async to user query

This patch prevents a race between user invoked cached counters
query and a neighbor last usage updater.

The cached flow counter stats can be queried by calling
"mlx5_fc_query_cached" which provides the number of bytes and
packets that passed via this flow since the last time this counter
was queried.
It does so by reducting the last saved stats from the current, cached
stats and then updating the last saved stats with the cached stats.
It also provide the lastuse value for that flow.

Since "mlx5e_tc_update_neigh_used_value" needs to retrieve the
last usage time of encapsulation flows, it calls the flow counter
query method periodically and async to user queries of the flow counter
using cls_flower.
This call is causing the driver to update the last reported bytes and
packets from the cache and therefore, future user queries of the flow
stats will return lower than expected number for bytes and packets
since the last saved stats in the driver was updated async to the last
saved stats in cls_flower.

This causes wrong stats presentation of encapsulation flows to user.

Since the neighbor usage updater only needs the lastuse stats from the
cached counter, the fix is to use a dedicated lastuse query call that
returns the lastuse value without synching between the cached stats and
the last saved stats.

Fixes: f6dfb4c3f216 ("net/mlx5e: Update neighbour 'used' state using HW flow rules counters")
Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 04a569568eac..f049af3f3cd8 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -220,6 +220,7 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
 
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
+u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse);
 int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
-- 
cgit v1.2.3


From e6f4051123fd33901e9655a675b22aefcdc5d277 Mon Sep 17 00:00:00 2001
From: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Date: Mon, 22 Jul 2019 12:44:50 +0530
Subject: {nl,mac}80211: fix interface combinations on crypto controlled
 devices

Commit 33d915d9e8ce ("{nl,mac}80211: allow 4addr AP operation on
crypto controlled devices") has introduced a change which allows
4addr operation on crypto controlled devices (ex: ath10k). This
change has inadvertently impacted the interface combinations logic
on such devices.

General rule is that software interfaces like AP/VLAN should not be
listed under supported interface combinations and should not be
considered during validation of these combinations; because of the
aforementioned change, AP/VLAN interfaces(if present) will be checked
against interfaces supported by the device and blocks valid interface
combinations.

Consider a case where an AP and AP/VLAN are up and running; when a
second AP device is brought up on the same physical device, this AP
will be checked against the AP/VLAN interface (which will not be
part of supported interface combinations of the device) and blocks
second AP to come up.

Add a new API cfg80211_iftype_allowed() to fix the problem, this
API works for all devices with/without SW crypto control.

Signed-off-by: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Fixes: 33d915d9e8ce ("{nl,mac}80211: allow 4addr AP operation on crypto controlled devices")
Link: https://lore.kernel.org/r/1563779690-9716-1-git-send-email-mpubbise@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 45850a8391d9..26e2ad2c7027 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7320,6 +7320,21 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev,
 			    struct cfg80211_pmsr_request *req,
 			    gfp_t gfp);
 
+/**
+ * cfg80211_iftype_allowed - check whether the interface can be allowed
+ * @wiphy: the wiphy
+ * @iftype: interface type
+ * @is_4addr: use_4addr flag, must be '0' when check_swif is '1'
+ * @check_swif: check iftype against software interfaces
+ *
+ * Check whether the interface is allowed to operate; additionally, this API
+ * can be used to check iftype against the software interfaces when
+ * check_swif is '1'.
+ */
+bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
+			     bool is_4addr, u8 check_swif);
+
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
-- 
cgit v1.2.3


From a7cf3d24ee6081930feb4c830a7f6f16ebe31c49 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Thu, 25 Jul 2019 12:07:12 -0600
Subject: net: qualcomm: rmnet: Fix incorrect UL checksum offload logic

The udp_ip4_ind bit is set only for IPv4 UDP non-fragmented packets
so that the hardware can flip the checksum to 0xFFFF if the computed
checksum is 0 per RFC768.

However, this bit had to be set for IPv6 UDP non fragmented packets
as well per hardware requirements. Otherwise, IPv6 UDP packets
with computed checksum as 0 were transmitted by hardware and were
dropped in the network.

In addition to setting this bit for IPv6 UDP, the field is also
appropriately renamed to udp_ind as part of this change.

Fixes: 5eb5f8608ef1 ("net: qualcomm: rmnet: Add support for TX checksum offload")
Cc: Sean Tranchetti <stranche@codeaurora.org>
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_rmnet.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
index b4f5403383fc..9661416a9bb4 100644
--- a/include/linux/if_rmnet.h
+++ b/include/linux/if_rmnet.h
@@ -41,11 +41,11 @@ struct rmnet_map_ul_csum_header {
 	__be16 csum_start_offset;
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	u16 csum_insert_offset:14;
-	u16 udp_ip4_ind:1;
+	u16 udp_ind:1;
 	u16 csum_enabled:1;
 #elif defined (__BIG_ENDIAN_BITFIELD)
 	u16 csum_enabled:1;
-	u16 udp_ip4_ind:1;
+	u16 udp_ind:1;
 	u16 csum_insert_offset:14;
 #else
 #error	"Please fix <asm/byteorder.h>"
-- 
cgit v1.2.3


From 91826ba13855f73e252fef68369b3b0e1ed25253 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 29 Jul 2019 00:51:38 +0900
Subject: netfilter: add include guard to xt_connlabel.h

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/xt_connlabel.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/xt_connlabel.h b/include/uapi/linux/netfilter/xt_connlabel.h
index 2312f0ec07b2..323f0dfc2a4e 100644
--- a/include/uapi/linux/netfilter/xt_connlabel.h
+++ b/include/uapi/linux/netfilter/xt_connlabel.h
@@ -1,4 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_XT_CONNLABEL_H
+#define _UAPI_XT_CONNLABEL_H
+
 #include <linux/types.h>
 
 #define XT_CONNLABEL_MAXBIT 127
@@ -11,3 +15,5 @@ struct xt_connlabel_mtinfo {
 	__u16 bit;
 	__u16 options;
 };
+
+#endif /* _UAPI_XT_CONNLABEL_H */
-- 
cgit v1.2.3


From 055d88242a6046a1ceac3167290f054c72571cd9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 30 Jul 2019 21:25:20 +0200
Subject: compat_ioctl: pppoe: fix PPPOEIOCSFWD handling

Support for handling the PPPOEIOCSFWD ioctl in compat mode was added in
linux-2.5.69 along with hundreds of other commands, but was always broken
sincen only the structure is compatible, but the command number is not,
due to the size being sizeof(size_t), or at first sizeof(sizeof((struct
sockaddr_pppox)), which is different on 64-bit architectures.

Guillaume Nault adds:

  And the implementation was broken until 2016 (see 29e73269aa4d ("pppoe:
  fix reference counting in PPPoE proxy")), and nobody ever noticed. I
  should probably have removed this ioctl entirely instead of fixing it.
  Clearly, it has never been used.

Fix it by adding a compat_ioctl handler for all pppoe variants that
translates the command number and then calls the regular ioctl function.

All other ioctl commands handled by pppoe are compatible between 32-bit
and 64-bit, and require compat_ptr() conversion.

This should apply to all stable kernels.

Acked-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_pppox.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 8b728750a625..69e813bcb947 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -80,6 +80,9 @@ extern int register_pppox_proto(int proto_num, const struct pppox_proto *pp);
 extern void unregister_pppox_proto(int proto_num);
 extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */
 extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+extern int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+
+#define PPPOEIOCSFWD32    _IOW(0xB1 ,0, compat_size_t)
 
 /* PPPoX socket states */
 enum {
-- 
cgit v1.2.3


From 5e5412c365a32e452daa762eac36121cb8a370bb Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Tue, 30 Jul 2019 11:30:33 -0400
Subject: net/socket: fix GCC8+ Wpacked-not-aligned warnings

There are a lot of those warnings with GCC8+ 64-bit,

In file included from ./include/linux/sctp.h:42,
                 from net/core/skbuff.c:47:
./include/uapi/linux/sctp.h:395:1: warning: alignment 4 of 'struct
sctp_paddr_change' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:728:1: warning: alignment 4 of 'struct
sctp_setpeerprim' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:727:26: warning: 'sspp_addr' offset 4 in
'struct sctp_setpeerprim' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage sspp_addr;
                          ^~~~~~~~~
./include/uapi/linux/sctp.h:741:1: warning: alignment 4 of 'struct
sctp_prim' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:740:26: warning: 'ssp_addr' offset 4 in
'struct sctp_prim' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage ssp_addr;
                          ^~~~~~~~
./include/uapi/linux/sctp.h:792:1: warning: alignment 4 of 'struct
sctp_paddrparams' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:784:26: warning: 'spp_address' offset 4 in
'struct sctp_paddrparams' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage spp_address;
                          ^~~~~~~~~~~
./include/uapi/linux/sctp.h:905:1: warning: alignment 4 of 'struct
sctp_paddrinfo' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:899:26: warning: 'spinfo_address' offset 4
in 'struct sctp_paddrinfo' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage spinfo_address;
                          ^~~~~~~~~~~~~~

This is because the commit 20c9c825b12f ("[SCTP] Fix SCTP socket options
to work with 32-bit apps on 64-bit kernels.") added "packed, aligned(4)"
GCC attributes to some structures but one of the members, i.e, "struct
sockaddr_storage" in those structures has the attribute,
"aligned(__alignof__ (struct sockaddr *)" which is 8-byte on 64-bit
systems, so the commit overwrites the designed alignments for
"sockaddr_storage".

To fix this, "struct sockaddr_storage" needs to be aligned to 4-byte as
it is only used in those packed sctp structure which is part of UAPI,
and "struct __kernel_sockaddr_storage" is used in some other
places of UAPI that need not to change alignments in order to not
breaking userspace.

Use an implicit alignment for "struct __kernel_sockaddr_storage" so it
can keep the same alignments as a member in both packed and un-packed
structures without breaking UAPI.

Suggested-by: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/socket.h | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index 8eb96021709c..c3409c8ec0dd 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -6,17 +6,24 @@
  * Desired design of maximum size and alignment (see RFC2553)
  */
 #define _K_SS_MAXSIZE	128	/* Implementation specific max size */
-#define _K_SS_ALIGNSIZE	(__alignof__ (struct sockaddr *))
-				/* Implementation specific desired alignment */
 
 typedef unsigned short __kernel_sa_family_t;
 
+/*
+ * The definition uses anonymous union and struct in order to control the
+ * default alignment.
+ */
 struct __kernel_sockaddr_storage {
-	__kernel_sa_family_t	ss_family;		/* address family */
-	/* Following field(s) are implementation specific */
-	char		__data[_K_SS_MAXSIZE - sizeof(unsigned short)];
+	union {
+		struct {
+			__kernel_sa_family_t	ss_family; /* address family */
+			/* Following field(s) are implementation specific */
+			char __data[_K_SS_MAXSIZE - sizeof(unsigned short)];
 				/* space to achieve desired size, */
 				/* _SS_MAXSIZE value minus size of ss_family */
-} __attribute__ ((aligned(_K_SS_ALIGNSIZE)));	/* force desired alignment */
+		};
+		void *__align; /* implementation specific desired alignment */
+	};
+};
 
 #endif /* _UAPI_LINUX_SOCKET_H */
-- 
cgit v1.2.3


From 5d92e631b8be8965a90c144320f06e096081a551 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 1 Aug 2019 14:36:01 -0700
Subject: net/tls: partially revert fix transition through disconnect with
 close

Looks like we were slightly overzealous with the shutdown()
cleanup. Even though the sock->sk_state can reach CLOSED again,
socket->state will not got back to SS_UNCONNECTED once
connections is ESTABLISHED. Meaning we will see EISCONN if
we try to reconnect, and EINVAL if we try to listen.

Only listen sockets can be shutdown() and reused, but since
ESTABLISHED sockets can never be re-connected() or used for
listen() we don't need to try to clean up the ULP state early.

Fixes: 32857cf57f92 ("net/tls: fix transition through disconnect with close")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 9e425ac2de45..41b2d41bb1b8 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -290,8 +290,6 @@ struct tls_context {
 
 	struct list_head list;
 	refcount_t refcount;
-
-	struct work_struct gc;
 };
 
 enum tls_offload_ctx_dir {
-- 
cgit v1.2.3


From c4bd48699beb92d6bb99d6139d1e9737cca73480 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Sat, 3 Aug 2019 16:36:18 +0300
Subject: net: sched: police: allow accessing police->params with rtnl

Recently implemented support for police action in flow_offload infra leads
to following rcu usage warning:

[ 1925.881092] =============================
[ 1925.881094] WARNING: suspicious RCU usage
[ 1925.881098] 5.3.0-rc1+ #574 Not tainted
[ 1925.881100] -----------------------------
[ 1925.881104] include/net/tc_act/tc_police.h:57 suspicious rcu_dereference_check() usage!
[ 1925.881106]
               other info that might help us debug this:

[ 1925.881109]
               rcu_scheduler_active = 2, debug_locks = 1
[ 1925.881112] 1 lock held by tc/18591:
[ 1925.881115]  #0: 00000000b03cb918 (rtnl_mutex){+.+.}, at: tc_new_tfilter+0x47c/0x970
[ 1925.881124]
               stack backtrace:
[ 1925.881127] CPU: 2 PID: 18591 Comm: tc Not tainted 5.3.0-rc1+ #574
[ 1925.881130] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[ 1925.881132] Call Trace:
[ 1925.881138]  dump_stack+0x85/0xc0
[ 1925.881145]  tc_setup_flow_action+0x1771/0x2040
[ 1925.881155]  fl_hw_replace_filter+0x11f/0x2e0 [cls_flower]
[ 1925.881175]  fl_change+0xd24/0x1b30 [cls_flower]
[ 1925.881200]  tc_new_tfilter+0x3e0/0x970
[ 1925.881231]  ? tc_del_tfilter+0x720/0x720
[ 1925.881243]  rtnetlink_rcv_msg+0x389/0x4b0
[ 1925.881250]  ? netlink_deliver_tap+0x95/0x400
[ 1925.881257]  ? rtnl_dellink+0x2d0/0x2d0
[ 1925.881264]  netlink_rcv_skb+0x49/0x110
[ 1925.881275]  netlink_unicast+0x171/0x200
[ 1925.881284]  netlink_sendmsg+0x224/0x3f0
[ 1925.881299]  sock_sendmsg+0x5e/0x60
[ 1925.881305]  ___sys_sendmsg+0x2ae/0x330
[ 1925.881309]  ? task_work_add+0x43/0x50
[ 1925.881314]  ? fput_many+0x45/0x80
[ 1925.881329]  ? __lock_acquire+0x248/0x1930
[ 1925.881342]  ? find_held_lock+0x2b/0x80
[ 1925.881347]  ? task_work_run+0x7b/0xd0
[ 1925.881359]  __sys_sendmsg+0x59/0xa0
[ 1925.881375]  do_syscall_64+0x5c/0xb0
[ 1925.881381]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 1925.881384] RIP: 0033:0x7feb245047b8
[ 1925.881388] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 65 8f 0c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83
 ec 28 89 54
[ 1925.881391] RSP: 002b:00007ffc2d2a5788 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[ 1925.881395] RAX: ffffffffffffffda RBX: 000000005d4497ed RCX: 00007feb245047b8
[ 1925.881398] RDX: 0000000000000000 RSI: 00007ffc2d2a57f0 RDI: 0000000000000003
[ 1925.881400] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000006
[ 1925.881403] R10: 0000000000404ec2 R11: 0000000000000246 R12: 0000000000000001
[ 1925.881406] R13: 0000000000480640 R14: 0000000000000012 R15: 0000000000000001

Change tcf_police_rate_bytes_ps() and tcf_police_tcfp_burst() helpers to
allow using them from both rtnl and rcu protected contexts.

Fixes: 8c8cfc6ed274 ("net/sched: add police action to the hardware intermediate representation")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_police.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h
index 8b9ef3664262..cfdc7cb82cad 100644
--- a/include/net/tc_act/tc_police.h
+++ b/include/net/tc_act/tc_police.h
@@ -54,7 +54,7 @@ static inline u64 tcf_police_rate_bytes_ps(const struct tc_action *act)
 	struct tcf_police *police = to_police(act);
 	struct tcf_police_params *params;
 
-	params = rcu_dereference_bh(police->params);
+	params = rcu_dereference_bh_rtnl(police->params);
 	return params->rate.rate_bytes_ps;
 }
 
@@ -63,7 +63,7 @@ static inline s64 tcf_police_tcfp_burst(const struct tc_action *act)
 	struct tcf_police *police = to_police(act);
 	struct tcf_police_params *params;
 
-	params = rcu_dereference_bh(police->params);
+	params = rcu_dereference_bh_rtnl(police->params);
 	return params->tcfp_burst;
 }
 
-- 
cgit v1.2.3


From 67cbf7dedd03a63ca2fbd9df2049eabba7a37edf Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Sat, 3 Aug 2019 16:36:19 +0300
Subject: net: sched: sample: allow accessing psample_group with rtnl

Recently implemented support for sample action in flow_offload infra leads
to following rcu usage warning:

[ 1938.234856] =============================
[ 1938.234858] WARNING: suspicious RCU usage
[ 1938.234863] 5.3.0-rc1+ #574 Not tainted
[ 1938.234866] -----------------------------
[ 1938.234869] include/net/tc_act/tc_sample.h:47 suspicious rcu_dereference_check() usage!
[ 1938.234872]
               other info that might help us debug this:

[ 1938.234875]
               rcu_scheduler_active = 2, debug_locks = 1
[ 1938.234879] 1 lock held by tc/19540:
[ 1938.234881]  #0: 00000000b03cb918 (rtnl_mutex){+.+.}, at: tc_new_tfilter+0x47c/0x970
[ 1938.234900]
               stack backtrace:
[ 1938.234905] CPU: 2 PID: 19540 Comm: tc Not tainted 5.3.0-rc1+ #574
[ 1938.234908] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[ 1938.234911] Call Trace:
[ 1938.234922]  dump_stack+0x85/0xc0
[ 1938.234930]  tc_setup_flow_action+0xed5/0x2040
[ 1938.234944]  fl_hw_replace_filter+0x11f/0x2e0 [cls_flower]
[ 1938.234965]  fl_change+0xd24/0x1b30 [cls_flower]
[ 1938.234990]  tc_new_tfilter+0x3e0/0x970
[ 1938.235021]  ? tc_del_tfilter+0x720/0x720
[ 1938.235028]  rtnetlink_rcv_msg+0x389/0x4b0
[ 1938.235038]  ? netlink_deliver_tap+0x95/0x400
[ 1938.235044]  ? rtnl_dellink+0x2d0/0x2d0
[ 1938.235053]  netlink_rcv_skb+0x49/0x110
[ 1938.235063]  netlink_unicast+0x171/0x200
[ 1938.235073]  netlink_sendmsg+0x224/0x3f0
[ 1938.235091]  sock_sendmsg+0x5e/0x60
[ 1938.235097]  ___sys_sendmsg+0x2ae/0x330
[ 1938.235111]  ? __handle_mm_fault+0x12cd/0x19e0
[ 1938.235125]  ? __handle_mm_fault+0x12cd/0x19e0
[ 1938.235138]  ? find_held_lock+0x2b/0x80
[ 1938.235147]  ? do_user_addr_fault+0x22d/0x490
[ 1938.235160]  __sys_sendmsg+0x59/0xa0
[ 1938.235178]  do_syscall_64+0x5c/0xb0
[ 1938.235187]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 1938.235192] RIP: 0033:0x7ff9a4d597b8
[ 1938.235197] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 65 8f 0c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83
 ec 28 89 54
[ 1938.235200] RSP: 002b:00007ffcfe381c48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[ 1938.235205] RAX: ffffffffffffffda RBX: 000000005d4497f9 RCX: 00007ff9a4d597b8
[ 1938.235208] RDX: 0000000000000000 RSI: 00007ffcfe381cb0 RDI: 0000000000000003
[ 1938.235211] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000006
[ 1938.235214] R10: 0000000000404ec2 R11: 0000000000000246 R12: 0000000000000001
[ 1938.235217] R13: 0000000000480640 R14: 0000000000000012 R15: 0000000000000001

Change tcf_sample_psample_group() helper to allow using it from both rtnl
and rcu protected contexts.

Fixes: a7a7be6087b0 ("net/sched: add sample action to the hardware intermediate representation")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_sample.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h
index 0a559d4b6f0f..b4fce0fae645 100644
--- a/include/net/tc_act/tc_sample.h
+++ b/include/net/tc_act/tc_sample.h
@@ -44,7 +44,7 @@ static inline int tcf_sample_trunc_size(const struct tc_action *a)
 static inline struct psample_group *
 tcf_sample_psample_group(const struct tc_action *a)
 {
-	return rcu_dereference(to_sample(a)->psample_group);
+	return rcu_dereference_rtnl(to_sample(a)->psample_group);
 }
 
 #endif /* __NET_TC_SAMPLE_H */
-- 
cgit v1.2.3