netfilter: bridge: confirm multicast packets before passing them up the stack

author Florian Westphal <fw@strlen.de>

Tue, 27 Feb 2024 15:17:51 +0000 (16:17 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 6 Mar 2024 14:38:46 +0000 (14:38 +0000)
author Florian Westphal <fw@strlen.de>
Tue, 27 Feb 2024 15:17:51 +0000 (16:17 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 6 Mar 2024 14:38:46 +0000 (14:38 +0000)
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h

index c92bb1580f4195766def737119f671f8061d870b..c69cbd64b5b462900d0508c2d2edd60a61a7546d 100644 (file)
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -461,6 +461,7 @@ struct nf_ct_hook {
                               const struct sk_buff *);
         void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
         void (*set_closing)(struct nf_conntrack *nfct);
+       int (*confirm)(struct sk_buff *skb);
  };
  extern const struct nf_ct_hook __rcu *nf_ct_hook;
  
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c

index f14beb9a62edbef62fdb19cbb884f5cc992cca00..8a114a50004669e4df485a4de36d05110c734c72 100644 (file)
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -43,6 +43,10 @@
  #include <linux/sysctl.h>
  #endif
  
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
  static unsigned int brnf_net_id __read_mostly;
  
  struct brnf_net {
@@ -537,6 +541,90 @@ static unsigned int br_nf_pre_routing(void *priv,
         return NF_STOLEN;
  }
  
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
+ * the same nf_conn entry, which will happen for multicast (broadcast)
+ * Frames on bridges.
+ *
+ * Example:
+ *      macvlan0
+ *      br0
+ *  ethX  ethY
+ *
+ * ethX (or Y) receives multicast or broadcast packet containing
+ * an IP packet, not yet in conntrack table.
+ *
+ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
+ *    -> skb->_nfct now references a unconfirmed entry
+ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
+ *    interface.
+ * 3. skb gets passed up the stack.
+ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
+ *    and schedules a work queue to send them out on the lower devices.
+ *
+ *    The clone skb->_nfct is not a copy, it is the same entry as the
+ *    original skb.  The macvlan rx handler then returns RX_HANDLER_PASS.
+ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
+ *
+ * The Macvlan broadcast worker and normal confirm path will race.
+ *
+ * This race will not happen if step 2 already confirmed a clone. In that
+ * case later steps perform skb_clone() with skb->_nfct already confirmed (in
+ * hash table).  This works fine.
+ *
+ * But such confirmation won't happen when eb/ip/nftables rules dropped the
+ * packets before they reached the nf_confirm step in postrouting.
+ *
+ * Work around this problem by explicit confirmation of the entry at
+ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
+ * entry.
+ *
+ */
+static unsigned int br_nf_local_in(void *priv,
+                                  struct sk_buff *skb,
+                                  const struct nf_hook_state *state)
+{
+       struct nf_conntrack *nfct = skb_nfct(skb);
+       const struct nf_ct_hook *ct_hook;
+       struct nf_conn *ct;
+       int ret;
+
+       if (!nfct || skb->pkt_type == PACKET_HOST)
+               return NF_ACCEPT;
+
+       ct = container_of(nfct, struct nf_conn, ct_general);
+       if (likely(nf_ct_is_confirmed(ct)))
+               return NF_ACCEPT;
+
+       WARN_ON_ONCE(skb_shared(skb));
+       WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
+
+       /* We can't call nf_confirm here, it would create a dependency
+        * on nf_conntrack module.
+        */
+       ct_hook = rcu_dereference(nf_ct_hook);
+       if (!ct_hook) {
+               skb->_nfct = 0ul;
+               nf_conntrack_put(nfct);
+               return NF_ACCEPT;
+       }
+
+       nf_bridge_pull_encap_header(skb);
+       ret = ct_hook->confirm(skb);
+       switch (ret & NF_VERDICT_MASK) {
+       case NF_STOLEN:
+               return NF_STOLEN;
+       default:
+               nf_bridge_push_encap_header(skb);
+               break;
+       }
+
+       ct = container_of(nfct, struct nf_conn, ct_general);
+       WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
+
+       return ret;
+}
+#endif
  
  /* PF_BRIDGE/FORWARD *************************************************/
  static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -935,6 +1023,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
                 .hooknum = NF_BR_PRE_ROUTING,
                 .priority = NF_BR_PRI_BRNF,
         },
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       {
+               .hook = br_nf_local_in,
+               .pf = NFPROTO_BRIDGE,
+               .hooknum = NF_BR_LOCAL_IN,
+               .priority = NF_BR_PRI_LAST,
+       },
+#endif
         {
                 .hook = br_nf_forward_ip,
                 .pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c

index d14b2dbbd1dfbe9ae137950096e17b3ce35e5184..83743e95939b15bff59705e9948fc8bdc45075b6 100644 (file)
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -290,6 +290,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
         return nf_conntrack_in(skb, &bridge_state);
  }
  
+static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
+                                   const struct nf_hook_state *state)
+{
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct;
+
+       if (skb->pkt_type == PACKET_HOST)
+               return NF_ACCEPT;
+
+       /* nf_conntrack_confirm() cannot handle concurrent clones,
+        * this happens for broad/multicast frames with e.g. macvlan on top
+        * of the bridge device.
+        */
+       ct = nf_ct_get(skb, &ctinfo);
+       if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+               return NF_ACCEPT;
+
+       /* let inet prerouting call conntrack again */
+       skb->_nfct = 0;
+       nf_ct_put(ct);
+
+       return NF_ACCEPT;
+}
+
  static void nf_ct_bridge_frag_save(struct sk_buff *skb,
                                    struct nf_bridge_frag_data *data)
  {
@@ -414,6 +438,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
                 .hooknum        = NF_BR_PRE_ROUTING,
                 .priority       = NF_IP_PRI_CONNTRACK,
         },
+       {
+               .hook           = nf_ct_bridge_in,
+               .pf             = NFPROTO_BRIDGE,
+               .hooknum        = NF_BR_LOCAL_IN,
+               .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
+       },
         {
                 .hook           = nf_ct_bridge_post,
                 .pf             = NFPROTO_BRIDGE,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index e0f4f76439d3d2549f5018cc7b756df0d12c3387..be6031886f942a75151bea4beb6b43b6d03ccd37 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2850,6 +2850,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
         .get_tuple_skb  = nf_conntrack_get_tuple_skb,
         .attach         = nf_conntrack_attach,
         .set_closing    = nf_conntrack_set_closing,
+       .confirm        = __nf_conntrack_confirm,
  };
  
  void nf_conntrack_init_end(void)
author	Florian Westphal <fw@strlen.de>
	Tue, 27 Feb 2024 15:17:51 +0000 (16:17 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 6 Mar 2024 14:38:46 +0000 (14:38 +0000)
include/linux/netfilter.h		patch \| blob \| history
net/bridge/br_netfilter_hooks.c		patch \| blob \| history
net/bridge/netfilter/nf_conntrack_bridge.c		patch \| blob \| history
net/netfilter/nf_conntrack_core.c		patch \| blob \| history