From 84882cf72cd774cf16fd338bdbf00f69ac9f9194 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 1 Nov 2021 22:26:08 -0700
Subject: [PATCH] Revert "net: avoid double accounting for pure zerocopy skbs"

This reverts commit f1a456f8f3fc5828d8abcad941860380ae147b1d.

  WARNING: CPU: 1 PID: 6819 at net/core/skbuff.c:5429 skb_try_coalesce+0x78b/0x7e0
  CPU: 1 PID: 6819 Comm: xxxxxxx Kdump: loaded Tainted: G S                5.15.0-04194-gd852503f7711 #16
  RIP: 0010:skb_try_coalesce+0x78b/0x7e0
  Code: e8 2a bf 41 ff 44 8b b3 bc 00 00 00 48 8b 7c 24 30 e8 19 c0 41 ff 44 89 f0 48 03 83 c0 00 00 00 48 89 44 24 40 e9 47 fb ff ff <0f> 0b e9 ca fc ff ff 4c 8d 70 ff 48 83 c0 07 48 89 44 24 38 e9 61
  RSP: 0018:ffff88881f449688 EFLAGS: 00010282
  RAX: 00000000fffffe96 RBX: ffff8881566e4460 RCX: ffffffff82079f7e
  RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8881566e47b0
  RBP: ffff8881566e46e0 R08: ffffed102619235d R09: ffffed102619235d
  R10: ffff888130c91ae3 R11: ffffed102619235c R12: ffff88881f4498a0
  R13: 0000000000000056 R14: 0000000000000009 R15: ffff888130c91ac0
  FS:  00007fec2cbb9700(0000) GS:ffff88881f440000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007fec1b060d80 CR3: 00000003acf94005 CR4: 00000000003706e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   <IRQ>
   tcp_try_coalesce+0xeb/0x290
   ? tcp_parse_options+0x610/0x610
   ? mark_held_locks+0x79/0xa0
   tcp_queue_rcv+0x69/0x2f0
   tcp_rcv_established+0xa49/0xd40
   ? tcp_data_queue+0x18a0/0x18a0
   tcp_v6_do_rcv+0x1c9/0x880
   ? rt6_mtu_change_route+0x100/0x100
   tcp_v6_rcv+0x1624/0x1830

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 19 +------------------
 include/net/tcp.h      |  8 ++------
 net/core/datagram.c    |  3 +--
 net/core/skbuff.c      |  3 +--
 net/ipv4/tcp.c         | 22 ++--------------------
 net/ipv4/tcp_output.c  |  7 ++-----
 6 files changed, 9 insertions(+), 53 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 10869906cc574..0bd6520329f6f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -454,15 +454,9 @@ enum {
 	 * all frags to avoid possible bad checksum
 	 */
 	SKBFL_SHARED_FRAG = BIT(1),
-
-	/* segment contains only zerocopy data and should not be
-	 * charged to the kernel memory.
-	 */
-	SKBFL_PURE_ZEROCOPY = BIT(2),
 };
 
 #define SKBFL_ZEROCOPY_FRAG	(SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG)
-#define SKBFL_ALL_ZEROCOPY	(SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY)
 
 /*
  * The callback notifies userspace to release buffers when skb DMA is done in
@@ -1470,17 +1464,6 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
 	return is_zcopy ? skb_uarg(skb) : NULL;
 }
 
-static inline bool skb_zcopy_pure(const struct sk_buff *skb)
-{
-	return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY;
-}
-
-static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1,
-				       const struct sk_buff *skb2)
-{
-	return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2);
-}
-
 static inline void net_zcopy_get(struct ubuf_info *uarg)
 {
 	refcount_inc(&uarg->refcnt);
@@ -1545,7 +1528,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success)
 		if (!skb_zcopy_is_nouarg(skb))
 			uarg->callback(skb, uarg, zerocopy_success);
 
-		skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY;
+		skb_shinfo(skb)->flags &= ~SKBFL_ZEROCOPY_FRAG;
 	}
 }
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index af91f370432ef..70972f3ac8fa3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -293,10 +293,7 @@ static inline bool tcp_out_of_memory(struct sock *sk)
 static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
 {
 	sk_wmem_queued_add(sk, -skb->truesize);
-	if (!skb_zcopy_pure(skb))
-		sk_mem_uncharge(sk, skb->truesize);
-	else
-		sk_mem_uncharge(sk, SKB_TRUESIZE(MAX_TCP_HEADER));
+	sk_mem_uncharge(sk, skb->truesize);
 	__kfree_skb(skb);
 }
 
@@ -977,8 +974,7 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
 					const struct sk_buff *from)
 {
 	return likely(tcp_skb_can_collapse_to(to) &&
-		      mptcp_skb_can_collapse(to, from) &&
-		      skb_pure_zcopy_same(to, from));
+		      mptcp_skb_can_collapse(to, from));
 }
 
 /* Events passed to congestion control interface */
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee290776c661d..15ab9ffb27fe9 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -646,8 +646,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
 		skb->truesize += truesize;
 		if (sk && sk->sk_type == SOCK_STREAM) {
 			sk_wmem_queued_add(sk, truesize);
-			if (!skb_zcopy_pure(skb))
-				sk_mem_charge(sk, truesize);
+			sk_mem_charge(sk, truesize);
 		} else {
 			refcount_add(truesize, &skb->sk->sk_wmem_alloc);
 		}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 29e617d8d7fb2..67a9188d8a49c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3433,9 +3433,8 @@ static inline void skb_split_no_header(struct sk_buff *skb,
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 {
 	int pos = skb_headlen(skb);
-	const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY;
 
-	skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags;
+	skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
 	skb_zerocopy_clone(skb1, skb, 0);
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2561c14a6e639..bc7f419184aa5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -863,7 +863,6 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
 	if (likely(skb)) {
 		bool mem_scheduled;
 
-		skb->truesize = SKB_TRUESIZE(size + MAX_TCP_HEADER);
 		if (force_schedule) {
 			mem_scheduled = true;
 			sk_forced_mem_schedule(sk, skb->truesize);
@@ -1320,15 +1319,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 
 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
-			/* skb changing from pure zc to mixed, must charge zc */
-			if (unlikely(skb_zcopy_pure(skb))) {
-				if (!sk_wmem_schedule(sk, skb->data_len))
-					goto wait_for_space;
-
-				sk_mem_charge(sk, skb->data_len);
-				skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
-			}
-
 			if (!sk_wmem_schedule(sk, copy))
 				goto wait_for_space;
 
@@ -1349,16 +1339,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 			}
 			pfrag->offset += copy;
 		} else {
-			/* First append to a fragless skb builds initial
-			 * pure zerocopy skb
-			 */
-			if (!skb->len)
-				skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY;
-
-			if (!skb_zcopy_pure(skb)) {
-				if (!sk_wmem_schedule(sk, copy))
-					goto wait_for_space;
-			}
+			if (!sk_wmem_schedule(sk, copy))
+				goto wait_for_space;
 
 			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
 			if (err == -EMSGSIZE || err == -EEXIST) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 287b57aadc374..6fbbf15580337 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1677,8 +1677,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 	if (delta_truesize) {
 		skb->truesize	   -= delta_truesize;
 		sk_wmem_queued_add(sk, -delta_truesize);
-		if (!skb_zcopy_pure(skb))
-			sk_mem_uncharge(sk, delta_truesize);
+		sk_mem_uncharge(sk, delta_truesize);
 	}
 
 	/* Any change of skb->len requires recalculation of tso factor. */
@@ -2296,9 +2295,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
 		if (len <= skb->len)
 			break;
 
-		if (unlikely(TCP_SKB_CB(skb)->eor) ||
-		    tcp_has_tx_tstamp(skb) ||
-		    !skb_pure_zcopy_same(skb, next))
+		if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
 			return false;
 
 		len -= skb->len;
-- 
GitLab