diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index da3ad02d6cebdee7991ed2f55a598f22743fe1c4..e43c5dc04282dcaec963d8aa41d4cc0cce7fb229 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -440,6 +440,20 @@ dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid)
 	return NULL;
 }
 
+/* If the ingress port offloads the bridge, we mark the frame as autonomously
+ * forwarded by hardware, so the software bridge doesn't forward in twice, back
+ * to us, because we already did. However, if we're in fallback mode and we do
+ * software bridging, we are not offloading it, therefore the dp->bridge_dev
+ * pointer is not populated, and flooding needs to be done by software (we are
+ * effectively operating in standalone ports mode).
+ */
+static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb)
+{
+	struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+
+	skb->offload_fwd_mark = !!(dp->bridge_dev);
+}
+
 /* switch.c */
 int dsa_switch_register_notifier(struct dsa_switch *ds);
 void dsa_switch_unregister_notifier(struct dsa_switch *ds);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 0750af951fc92c88229d97fe534de55ed5de14f2..a27f5096777aab4dc03e890124ebe024fcc513e8 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -167,7 +167,7 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
 	/* Remove Broadcom tag and update checksum */
 	skb_pull_rcsum(skb, BRCM_TAG_LEN);
 
-	skb->offload_fwd_mark = 1;
+	dsa_default_offload_fwd_mark(skb);
 
 	return skb;
 }
@@ -271,7 +271,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
 	/* Remove Broadcom tag and update checksum */
 	skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN);
 
-	skb->offload_fwd_mark = 1;
+	dsa_default_offload_fwd_mark(skb);
 
 	/* Move the Ethernet DA and SA */
 	memmove(skb->data - ETH_HLEN,
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 0f258218c8cf813ec571896ac946094563332226..3607499d06976986114f794c63d3d9224f6cb5ad 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -198,8 +198,8 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
 static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
 				  u8 extra)
 {
+	bool trap = false, trunk = false;
 	int source_device, source_port;
-	bool trunk = false;
 	enum dsa_code code;
 	enum dsa_cmd cmd;
 	u8 *dsa_header;
@@ -210,8 +210,6 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
 	cmd = dsa_header[0] >> 6;
 	switch (cmd) {
 	case DSA_CMD_FORWARD:
-		skb->offload_fwd_mark = 1;
-
 		trunk = !!(dsa_header[1] & 7);
 		break;
 
@@ -230,7 +228,6 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
 			 * device (like a bridge) that forwarding has
 			 * already been done by hardware.
 			 */
-			skb->offload_fwd_mark = 1;
 			break;
 		case DSA_CODE_MGMT_TRAP:
 		case DSA_CODE_IGMP_MLD_TRAP:
@@ -238,6 +235,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
 			/* Traps have, by definition, not been
 			 * forwarded by hardware, so don't mark them.
 			 */
+			trap = true;
 			break;
 		default:
 			/* Reserved code, this could be anything. Drop
@@ -271,6 +269,15 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
 	if (!skb->dev)
 		return NULL;
 
+	/* When using LAG offload, skb->dev is not a DSA slave interface,
+	 * so we cannot call dsa_default_offload_fwd_mark and we need to
+	 * special-case it.
+	 */
+	if (trunk)
+		skb->offload_fwd_mark = true;
+	else if (!trap)
+		dsa_default_offload_fwd_mark(skb);
+
 	/* If the 'tagged' bit is set; convert the DSA tag to a 802.1Q
 	 * tag, and delete the ethertype (extra) if applicable. If the
 	 * 'tagged' bit is cleared; delete the DSA tag, and ethertype
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
index 424130f85f596d3438ce9ae616c49b4448b54115..c41208cbd9363937d02663081d5bba4aad542fa7 100644
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -44,7 +44,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
 
 	pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN);
 
-	skb->offload_fwd_mark = true;
+	dsa_default_offload_fwd_mark(skb);
 
 	return skb;
 }
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index a201ccf2435d8dee44a042351a36337309cdbff1..1c2dfa80f9b0a9b9deb6bcc5c7667b8e01f3ba76 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -24,7 +24,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
 
 	pskb_trim_rcsum(skb, skb->len - len);
 
-	skb->offload_fwd_mark = true;
+	dsa_default_offload_fwd_mark(skb);
 
 	return skb;
 }
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 26207ef39ebcf0f43f95f3fd29ed1f92f86d03cc..cf7cf2fa12402a735a2a61c967de8be33c4374d9 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -115,7 +115,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
 	skb_pull_rcsum(skb, 2 + 2);
 	memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
 		2 * ETH_ALEN);
-	skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU);
+	if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU))
+		dsa_default_offload_fwd_mark(skb);
 
 	return skb;
 }
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index cc3ba864ad5badf51fa4fd4297605699b2f20284..3fb80e43f3a5b543d6df402165aa8b0b49a0300c 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -92,7 +92,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!skb->dev)
 		return NULL;
 
-	skb->offload_fwd_mark = 1;
+	dsa_default_offload_fwd_mark(skb);
 
 	return skb;
 }
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 190f4bfd3bef646ade3bcd9df88ab2a60f64e584..3252634a29b813edd50887a9f160054bb92a916e 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -104,7 +104,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 		 */
 		return NULL;
 
-	skb->offload_fwd_mark = 1;
+	dsa_default_offload_fwd_mark(skb);
 	skb->priority = qos_class;
 
 	/* Ocelot switches copy frames unmodified to the CPU. However, it is
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index d0781b05861055ee46c46933fb0440e45ad83883..c95de71d13b085efcab614995dc02b092f351a29 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -49,7 +49,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 	if (!skb->dev)
 		return NULL;
 
-	skb->offload_fwd_mark = 1;
+	dsa_default_offload_fwd_mark(skb);
 
 	return skb;
 }
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index 57c46b4ab2b3ff739f029d2b4d15df54baf4697f..f6b63aad655160b29f5cc232654b65f0807c2a17 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -114,7 +114,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
 		skb->data - ETH_HLEN - RTL4_A_HDR_LEN,
 		2 * ETH_ALEN);
 
-	skb->offload_fwd_mark = 1;
+	dsa_default_offload_fwd_mark(skb);
 
 	return skb;
 }
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index c1f993d592efcb7ee9bcb3d2509fbb72bf1565dc..664cb802b71a710903262b9c9c5866749f24c9ef 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -405,8 +405,6 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 	is_link_local = sja1105_is_link_local(skb);
 	is_meta = sja1105_is_meta_frame(skb);
 
-	skb->offload_fwd_mark = 1;
-
 	if (sja1105_skb_has_tag_8021q(skb)) {
 		/* Normal traffic path. */
 		sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid);
@@ -437,6 +435,9 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 		return NULL;
 	}
 
+	if (!is_link_local)
+		dsa_default_offload_fwd_mark(skb);
+
 	return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local,
 					      is_meta);
 }
@@ -480,7 +481,8 @@ static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
 
 static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
 							    int *source_port,
-							    int *switch_id)
+							    int *switch_id,
+							    bool *host_only)
 {
 	u16 rx_header;
 
@@ -494,6 +496,9 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
 	 */
 	rx_header = ntohs(*(__be16 *)skb->data);
 
+	if (rx_header & SJA1110_RX_HEADER_HOST_ONLY)
+		*host_only = true;
+
 	if (rx_header & SJA1110_RX_HEADER_IS_METADATA)
 		return sja1110_rcv_meta(skb, rx_header);
 
@@ -545,13 +550,13 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
 				   struct packet_type *pt)
 {
 	int source_port = -1, switch_id = -1;
+	bool host_only = false;
 	u16 vid;
 
-	skb->offload_fwd_mark = 1;
-
 	if (sja1110_skb_has_inband_control_extension(skb)) {
 		skb = sja1110_rcv_inband_control_extension(skb, &source_port,
-							   &switch_id);
+							   &switch_id,
+							   &host_only);
 		if (!skb)
 			return NULL;
 	}
@@ -569,6 +574,9 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
 		return NULL;
 	}
 
+	if (!host_only)
+		dsa_default_offload_fwd_mark(skb);
+
 	return skb;
 }
 
diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c
index a31ff7fcb45f63bf1ef24cf184ed08a0511e9c90..da231c16ac824e9ba9e0c508b52107a20eac3375 100644
--- a/net/dsa/tag_xrs700x.c
+++ b/net/dsa/tag_xrs700x.c
@@ -46,7 +46,7 @@ static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev,
 		return NULL;
 
 	/* Frame is forwarded by hardware, don't forward in software. */
-	skb->offload_fwd_mark = 1;
+	dsa_default_offload_fwd_mark(skb);
 
 	return skb;
 }