From f2908e792d084828b66528bea6afa674354f5f66 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 23 Jun 2025 14:06:41 +0200 Subject: [PATCH 01/15] ixgbevf: remove legacy Rx Similarly as in commit 53844673d555 ("iavf: kill "legacy-rx" for good"), drop skb construction logic in favor of only using napi_build_skb() as a superior option that reduces the need to allocate and copy memory. As IXGBEVF_PRIV_FLAGS_LEGACY_RX is the only private flag in ixgbevf, entirely remove private flags support from the driver. When compared to iavf changes, ixgbevf has a single complication: MAC type 82599 cannot finely limit the DMA write size with RXDCTL.RLPML, only 1024 increments through SRRCTL are available, see commit fe68195daf34 ("ixgbevf: Require large buffers for build_skb on 82599VF") and commit 2bafa8fac19a ("ixgbe: don't set RXDCTL.RLPML for 82599"). Therefore, this is a special case requiring legacy RX unless large buffers are used. For now, solve this by always using large buffers for this MAC type. Suggested-by: Alexander Lobakin Reviewed-by: Aleksandr Loktionov Reviewed-by: Alexander Lobakin Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ethtool.c | 48 -------- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 13 +-- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 106 ++---------------- 3 files changed, 13 insertions(+), 154 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 537a60d5276f0f..274eef39c58618 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -72,13 +72,6 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN) -static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = { -#define IXGBEVF_PRIV_FLAGS_LEGACY_RX BIT(0) - "legacy-rx", -}; - -#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings) - static int ixgbevf_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { @@ -216,8 +209,6 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev, strscpy(drvinfo->driver, ixgbevf_driver_name, sizeof(drvinfo->driver)); strscpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); - - drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN; } static void ixgbevf_get_ringparam(struct net_device *netdev, @@ -409,8 +400,6 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset) return IXGBEVF_TEST_LEN; case ETH_SS_STATS: return IXGBEVF_STATS_LEN; - case ETH_SS_PRIV_FLAGS: - return IXGBEVF_PRIV_FLAGS_STR_LEN; default: return -EINVAL; } @@ -538,10 +527,6 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset, p += ETH_GSTRING_LEN; } break; - case ETH_SS_PRIV_FLAGS: - memcpy(data, ixgbevf_priv_flags_strings, - IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); - break; } } @@ -931,37 +916,6 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, return err; } -static u32 ixgbevf_get_priv_flags(struct net_device *netdev) -{ - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - u32 priv_flags = 0; - - if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) - priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX; - - return priv_flags; -} - -static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags) -{ - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - unsigned int flags = adapter->flags; - - flags &= ~IXGBEVF_FLAGS_LEGACY_RX; - if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX) - flags |= IXGBEVF_FLAGS_LEGACY_RX; - - if (flags != adapter->flags) { - adapter->flags = flags; - - /* reset interface to repopulate queues */ - if (netif_running(netdev)) - ixgbevf_reinit_locked(adapter); - } - - return 0; -} - static const struct ethtool_ops ixgbevf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = ixgbevf_get_drvinfo, @@ -984,8 +938,6 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = { .get_rxfh_key_size = ixgbevf_get_rxfh_key_size, .get_rxfh = ixgbevf_get_rxfh, .get_link_ksettings = ixgbevf_get_link_ksettings, - .get_priv_flags = ixgbevf_get_priv_flags, - .set_priv_flags = ixgbevf_set_priv_flags, }; void ixgbevf_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 516a6fdd23d076..ae2763fea2be2e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -73,7 +73,6 @@ struct ixgbevf_rx_queue_stats { enum ixgbevf_ring_state_t { __IXGBEVF_RX_3K_BUFFER, - __IXGBEVF_RX_BUILD_SKB_ENABLED, __IXGBEVF_TX_DETECT_HANG, __IXGBEVF_HANG_CHECK_ARMED, __IXGBEVF_TX_XDP_RING, @@ -176,21 +175,13 @@ struct ixgbevf_ring { #define clear_ring_uses_large_buffer(ring) \ clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) -#define ring_uses_build_skb(ring) \ - test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) -#define set_ring_build_skb_enabled(ring) \ - set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) -#define clear_ring_build_skb_enabled(ring) \ - clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) - static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring) { #if (PAGE_SIZE < 8192) if (ring_uses_large_buffer(ring)) return IXGBEVF_RXBUFFER_3072; - if (ring_uses_build_skb(ring)) - return IXGBEVF_MAX_FRAME_BUILD_SKB; + return IXGBEVF_MAX_FRAME_BUILD_SKB; #endif return IXGBEVF_RXBUFFER_2048; } @@ -377,8 +368,6 @@ struct ixgbevf_adapter { u32 flags; bool link_state; -#define IXGBEVF_FLAGS_LEGACY_RX BIT(1) - #ifdef CONFIG_XFRM struct ixgbevf_ipsec *ipsec; #endif /* CONFIG_XFRM */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d5ce20f47def1f..fc48c89c7bb857 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -602,7 +602,7 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring) { - return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0; + return IXGBEVF_SKB_PAD; } static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, @@ -832,9 +832,7 @@ static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, #if (PAGE_SIZE < 8192) unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = ring_uses_build_skb(rx_ring) ? - SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) : - SKB_DATA_ALIGN(size); + unsigned int truesize = SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size); #endif skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); @@ -845,74 +843,6 @@ static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, #endif } -static -struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - union ixgbe_adv_rx_desc *rx_desc) -{ - unsigned int size = xdp->data_end - xdp->data; -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); -#endif - unsigned int headlen; - struct sk_buff *skb; - - /* prefetch first cache line of first page */ - net_prefetch(xdp->data); - - /* Note, we get here by enabling legacy-rx via: - * - * ethtool --set-priv-flags legacy-rx on - * - * In this mode, we currently get 0 extra XDP headroom as - * opposed to having legacy-rx off, where we process XDP - * packets going to stack via ixgbevf_build_skb(). - * - * For ixgbevf_construct_skb() mode it means that the - * xdp->data_meta will always point to xdp->data, since - * the helper cannot expand the head. Should this ever - * changed in future for legacy-rx mode on, then lets also - * add xdp->data_meta handling here. - */ - - /* allocate a skb to store the frags */ - skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE); - if (unlikely(!skb)) - return NULL; - - /* Determine available headroom for copy */ - headlen = size; - if (headlen > IXGBEVF_RX_HDR_SIZE) - headlen = eth_get_headlen(skb->dev, xdp->data, - IXGBEVF_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), xdp->data, - ALIGN(headlen, sizeof(long))); - - /* update all of the pointers */ - size -= headlen; - if (size) { - skb_add_rx_frag(skb, 0, rx_buffer->page, - (xdp->data + headlen) - - page_address(rx_buffer->page), - size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - } else { - rx_buffer->pagecnt_bias++; - } - - return skb; -} - static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, u32 qmask) { @@ -1092,10 +1022,8 @@ static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring, #if (PAGE_SIZE < 8192) truesize = ixgbevf_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ #else - truesize = ring_uses_build_skb(rx_ring) ? - SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : - SKB_DATA_ALIGN(size); + truesize = SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); #endif return truesize; } @@ -1182,12 +1110,9 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, total_rx_bytes += size; } else if (skb) { ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size); - } else if (ring_uses_build_skb(rx_ring)) { + } else { skb = ixgbevf_build_skb(rx_ring, rx_buffer, &xdp, rx_desc); - } else { - skb = ixgbevf_construct_skb(rx_ring, rx_buffer, - &xdp, rx_desc); } /* exit if we failed to retrieve a buffer */ @@ -1958,8 +1883,7 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, #if (PAGE_SIZE < 8192) /* Limit the maximum frame size so we don't overrun the skb */ - if (ring_uses_build_skb(ring) && - !ring_uses_large_buffer(ring)) + if (!ring_uses_large_buffer(ring)) rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB | IXGBE_RXDCTL_RLPML_EN; #endif @@ -1978,22 +1902,16 @@ static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, struct net_device *netdev = adapter->netdev; unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - /* set build_skb and buffer size flags */ - clear_ring_build_skb_enabled(rx_ring); + /* set buffer size flags */ clear_ring_uses_large_buffer(rx_ring); - if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) - return; - if (PAGE_SIZE < 8192) - if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB) + /* 82599 can't rely on RXDCTL.RLPML to restrict + * the size of the frame + */ + if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB || + adapter->hw.mac.type == ixgbe_mac_82599_vf) set_ring_uses_large_buffer(rx_ring); - - /* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */ - if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring)) - return; - - set_ring_build_skb_enabled(rx_ring); } /** From d7bf74b613322bb4682073988e4f98baece9370d Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 24 Jun 2025 12:49:22 +0200 Subject: [PATCH 02/15] ixgbevf: do not share pages between packets Again, same as in the related iavf commit 920d86f3c552 ("iavf: drop page splitting and recycling"), as an intermediate step, drop the page sharing and recycling logic in a preparation to offload it to page_pool. Instead of the previous sharing and recycling, just allocate a new page every time. Suggested-by: Alexander Lobakin Reviewed-by: Aleksandr Loktionov Reviewed-by: Alexander Lobakin Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 44 +--- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 239 ++---------------- 2 files changed, 28 insertions(+), 255 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index ae2763fea2be2e..2d7ca3f86868bc 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -45,12 +45,7 @@ struct ixgbevf_tx_buffer { struct ixgbevf_rx_buffer { dma_addr_t dma; struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) __u32 page_offset; -#else - __u16 page_offset; -#endif - __u16 pagecnt_bias; }; struct ixgbevf_stats { @@ -72,7 +67,6 @@ struct ixgbevf_rx_queue_stats { }; enum ixgbevf_ring_state_t { - __IXGBEVF_RX_3K_BUFFER, __IXGBEVF_TX_DETECT_HANG, __IXGBEVF_HANG_CHECK_ARMED, __IXGBEVF_TX_XDP_RING, @@ -143,8 +137,7 @@ struct ixgbevf_ring { #define IXGBEVF_MIN_RXD 64 /* Supported Rx Buffer Sizes */ -#define IXGBEVF_RXBUFFER_256 256 /* Used for packet split */ -#define IXGBEVF_RXBUFFER_2048 2048 +#define IXGBEVF_RXBUFFER_256 256 #define IXGBEVF_RXBUFFER_3072 3072 #define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256 @@ -152,12 +145,6 @@ struct ixgbevf_ring { #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) #define IXGBEVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#if (PAGE_SIZE < 8192) -#define IXGBEVF_MAX_FRAME_BUILD_SKB \ - (SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD) -#else -#define IXGBEVF_MAX_FRAME_BUILD_SKB IXGBEVF_RXBUFFER_2048 -#endif #define IXGBE_TX_FLAGS_CSUM BIT(0) #define IXGBE_TX_FLAGS_VLAN BIT(1) @@ -168,35 +155,6 @@ struct ixgbevf_ring { #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 -#define ring_uses_large_buffer(ring) \ - test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) -#define set_ring_uses_large_buffer(ring) \ - set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) -#define clear_ring_uses_large_buffer(ring) \ - clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) - -static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (ring_uses_large_buffer(ring)) - return IXGBEVF_RXBUFFER_3072; - - return IXGBEVF_MAX_FRAME_BUILD_SKB; -#endif - return IXGBEVF_RXBUFFER_2048; -} - -static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (ring_uses_large_buffer(ring)) - return 1; -#endif - return 0; -} - -#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring)) - #define check_for_tx_hang(ring) \ test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) #define set_check_for_tx_hang(ring) \ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index fc48c89c7bb857..05baf28823c869 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -112,9 +112,6 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter) static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter); static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector); static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter); -static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer); -static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *old_buff); static void ixgbevf_remove_adapter(struct ixgbe_hw *hw) { @@ -537,40 +534,20 @@ struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring, rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; prefetchw(rx_buffer->page); - /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, size, DMA_FROM_DEVICE); - rx_buffer->pagecnt_bias--; - return rx_buffer; } static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct sk_buff *skb) + struct ixgbevf_rx_buffer *rx_buffer) { - if (ixgbevf_can_reuse_rx_page(rx_buffer)) { - /* hand second half of page back to the ring */ - ixgbevf_reuse_rx_page(rx_ring, rx_buffer); - } else { - if (IS_ERR(skb)) - /* We are not reusing the buffer so unmap it and free - * any references we are holding to it - */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - } - - /* clear contents of rx_buffer */ - rx_buffer->page = NULL; + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, + DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); } /** @@ -600,38 +577,28 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, return true; } -static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring) -{ - return IXGBEVF_SKB_PAD; -} - static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *bi) { - struct page *page = bi->page; + struct page *page; dma_addr_t dma; - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) - return true; - /* alloc new page for storage */ - page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring)); + page = dev_alloc_page(); if (unlikely(!page)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, - ixgbevf_rx_pg_size(rx_ring), + dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, ixgbevf_rx_pg_order(rx_ring)); + __free_page(page); rx_ring->rx_stats.alloc_rx_page_failed++; return false; @@ -639,8 +606,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, bi->dma = dma; bi->page = page; - bi->page_offset = ixgbevf_rx_offset(rx_ring); - bi->pagecnt_bias = 1; + bi->page_offset = IXGBEVF_SKB_PAD; rx_ring->rx_stats.alloc_rx_page++; return true; @@ -673,7 +639,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, bi->page_offset, - ixgbevf_rx_bufsz(rx_ring), + IXGBEVF_RXBUFFER_3072, DMA_FROM_DEVICE); /* Refresh the desc even if pkt_addr didn't change @@ -755,66 +721,6 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, return false; } -/** - * ixgbevf_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *old_buff) -{ - struct ixgbevf_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_buffer_info[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->page = old_buff->page; - new_buff->dma = old_buff->dma; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - -static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer) -{ - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; - struct page *page = rx_buffer->page; - - /* avoid re-using remote and pfmemalloc pages */ - if (!dev_page_is_reusable(page)) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) - return false; -#else -#define IXGBEVF_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048) - - if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET) - return false; - -#endif - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); - rx_buffer->pagecnt_bias = USHRT_MAX; - } - - return true; -} - /** * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on @@ -829,18 +735,10 @@ static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, struct sk_buff *skb, unsigned int size) { -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else unsigned int truesize = SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size); -#endif + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif } static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, @@ -857,13 +755,9 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc) { unsigned int metasize = xdp->data - xdp->data_meta; -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + SKB_DATA_ALIGN(xdp->data_end - xdp->data_hard_start); -#endif struct sk_buff *skb; /* Prefetch first cache line of first page. If xdp->data_meta @@ -884,13 +778,6 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, if (metasize) skb_metadata_set(skb, metasize); - /* update buffer offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - return skb; } @@ -1014,38 +901,11 @@ static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, return result; } -static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring, - unsigned int size) -{ - unsigned int truesize; - -#if (PAGE_SIZE < 8192) - truesize = ixgbevf_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ -#else - truesize = SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); -#endif - return truesize; -} - -static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - unsigned int size) -{ - unsigned int truesize = ixgbevf_rx_frame_truesize(rx_ring, size); - -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif -} - static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); struct sk_buff *skb = rx_ring->skb; @@ -1054,10 +914,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, int xdp_res = 0; /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ -#if (PAGE_SIZE < 8192) - frame_sz = ixgbevf_rx_frame_truesize(rx_ring, 0); -#endif - xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); + xdp_init_buff(&xdp, IXGBEVF_RXBUFFER_3072, &rx_ring->xdp_rxq); while (likely(total_rx_packets < budget)) { struct ixgbevf_rx_buffer *rx_buffer; @@ -1081,31 +938,24 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, */ rmb(); - rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size); + rx_buffer = + ixgbevf_get_rx_buffer(rx_ring, IXGBEVF_RXBUFFER_3072); /* retrieve a buffer from the ring */ if (!skb) { - unsigned int offset = ixgbevf_rx_offset(rx_ring); + unsigned int offset = rx_buffer->page_offset; unsigned char *hard_start; hard_start = page_address(rx_buffer->page) + rx_buffer->page_offset - offset; xdp_prepare_buff(&xdp, hard_start, offset, size, true); -#if (PAGE_SIZE > 4096) - /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size); -#endif xdp_res = ixgbevf_run_xdp(adapter, rx_ring, &xdp); } if (xdp_res) { - if (xdp_res == IXGBEVF_XDP_TX) { + if (xdp_res == IXGBEVF_XDP_TX) xdp_xmit = true; - ixgbevf_rx_buffer_flip(rx_ring, rx_buffer, - size); - } else { - rx_buffer->pagecnt_bias++; - } + total_rx_packets++; total_rx_bytes += size; } else if (skb) { @@ -1118,11 +968,13 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* exit if we failed to retrieve a buffer */ if (!xdp_res && !skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; - rx_buffer->pagecnt_bias++; break; } - ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb); + ixgbevf_put_rx_buffer(rx_ring, rx_buffer); + if (xdp_res == IXGBEVF_XDP_CONSUMED) + __free_page(rx_buffer->page); + rx_buffer->page = NULL; cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -1699,10 +1551,7 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, srrctl = IXGBE_SRRCTL_DROP_EN; srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; - if (ring_uses_large_buffer(ring)) - srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - else - srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); @@ -1880,13 +1729,6 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, if (adapter->hw.mac.type != ixgbe_mac_82599_vf) { rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | IXGBE_RXDCTL_RLPML_EN); - -#if (PAGE_SIZE < 8192) - /* Limit the maximum frame size so we don't overrun the skb */ - if (!ring_uses_large_buffer(ring)) - rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB | - IXGBE_RXDCTL_RLPML_EN; -#endif } rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; @@ -1896,24 +1738,6 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring)); } -static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *rx_ring) -{ - struct net_device *netdev = adapter->netdev; - unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - - /* set buffer size flags */ - clear_ring_uses_large_buffer(rx_ring); - - if (PAGE_SIZE < 8192) - /* 82599 can't rely on RXDCTL.RLPML to restrict - * the size of the frame - */ - if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB || - adapter->hw.mac.type == ixgbe_mac_82599_vf) - set_ring_uses_large_buffer(rx_ring); -} - /** * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset * @adapter: board private structure @@ -1944,7 +1768,6 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbevf_ring *rx_ring = adapter->rx_ring[i]; - ixgbevf_set_rx_buffer_len(adapter, rx_ring); ixgbevf_configure_rx_ring(adapter, rx_ring); } } @@ -2323,19 +2146,13 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - ixgbevf_rx_bufsz(rx_ring), + IXGBEVF_RXBUFFER_3072, DMA_FROM_DEVICE); /* free resources associated with mapping */ - dma_unmap_page_attrs(rx_ring->dev, - rx_buffer->dma, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); - - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - + ixgbevf_put_rx_buffer(rx_ring, rx_buffer); + __free_page(rx_buffer->page); + rx_buffer->page = NULL; i++; if (i == rx_ring->count) i = 0; @@ -4394,9 +4211,7 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) /* verify ixgbevf ring attributes are sufficient for XDP */ for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbevf_ring *ring = adapter->rx_ring[i]; - - if (frame_size > ixgbevf_rx_bufsz(ring)) + if (frame_size > IXGBEVF_RXBUFFER_3072) return -EINVAL; } From 0cb83cd3caefa7e390b259f56d714a9e645b7e6c Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Wed, 9 Jul 2025 09:28:02 +0200 Subject: [PATCH 03/15] ixgbevf: use libeth in Rx processing Use page_pool buffers by the means of libeth in the Rx queues, this significantly reduces code complexity of the driver itself. Suggested-by: Alexander Lobakin Reviewed-by: Alexander Lobakin Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/Kconfig | 1 + drivers/net/ethernet/intel/ixgbevf/defines.h | 2 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 21 +- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 352 +++++++----------- 4 files changed, 139 insertions(+), 237 deletions(-) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 288fa8ce53af0a..b513baf3cbb296 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -203,6 +203,7 @@ config IXGBE_IPSEC config IXGBEVF tristate "Intel(R) 10GbE PCI Express Virtual Function Ethernet support" depends on PCI_MSI + select LIBETH_XDP help This driver supports Intel(R) PCI Express virtual functions for the Intel(R) ixgbe driver. For more information on how to identify your diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index e177d1d58696aa..afc927dd14381b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -71,7 +71,7 @@ typedef u32 ixgbe_link_speed; #define IXGBE_PSRTYPE_L2HDR 0x00001000 /* SRRCTL bit definitions */ -#define IXGBE_SRRCTL_BSIZEPKT_SHIFT 10 /* so many KBs */ +#define IXGBE_SRRCTL_BSIZEPKT_STEP 1024 #define IXGBE_SRRCTL_RDMTS_SHIFT 22 #define IXGBE_SRRCTL_RDMTS_MASK 0x01C00000 #define IXGBE_SRRCTL_DROP_EN 0x10000000 diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 2d7ca3f86868bc..ebf771f0caa4b3 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -42,12 +42,6 @@ struct ixgbevf_tx_buffer { u32 tx_flags; }; -struct ixgbevf_rx_buffer { - dma_addr_t dma; - struct page *page; - __u32 page_offset; -}; - struct ixgbevf_stats { u64 packets; u64 bytes; @@ -84,19 +78,22 @@ struct ixgbevf_ring { struct ixgbevf_ring *next; struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */ struct net_device *netdev; - struct bpf_prog *xdp_prog; - struct device *dev; + struct bpf_prog __rcu *xdp_prog; + union { + struct page_pool *pp; /* Rx ring */ + struct device *dev; /* Tx ring */ + }; void *desc; /* descriptor ring memory */ dma_addr_t dma; /* phys. address of descriptor ring */ unsigned int size; /* length in bytes */ + u32 truesize; /* Rx buffer full size */ u16 count; /* amount of descriptors */ u16 next_to_use; u16 next_to_clean; - u16 next_to_alloc; union { + struct libeth_fqe *rx_fqes; struct ixgbevf_tx_buffer *tx_buffer_info; - struct ixgbevf_rx_buffer *rx_buffer_info; }; unsigned long state; struct ixgbevf_stats stats; @@ -115,6 +112,7 @@ struct ixgbevf_ring { */ u16 reg_idx; int queue_index; /* needed for multiqueue queue management */ + u32 rx_buf_len; } ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ @@ -144,7 +142,8 @@ struct ixgbevf_ring { #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) -#define IXGBEVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#define IXGBEVF_RX_PAGE_LEN(hr) (ALIGN_DOWN(LIBETH_RX_PAGE_LEN(hr), \ + IXGBE_SRRCTL_BSIZEPKT_STEP)) #define IXGBE_TX_FLAGS_CSUM BIT(0) #define IXGBE_TX_FLAGS_VLAN BIT(1) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 05baf28823c869..cedbf0a4d0a546 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "ixgbevf.h" @@ -82,6 +83,7 @@ static const struct pci_device_id ixgbevf_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, ixgbevf_pci_tbl); MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network Driver"); +MODULE_IMPORT_NS("LIBETH"); MODULE_LICENSE("GPL v2"); #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) @@ -304,7 +306,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, /* free the skb */ if (ring_is_xdp(tx_ring)) - page_frag_free(tx_buffer->data); + libeth_xdp_return_va(tx_buffer->data, true); else napi_consume_skb(tx_buffer->skb, napi_budget); @@ -521,33 +523,6 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); - - skb->protocol = eth_type_trans(skb, rx_ring->netdev); -} - -static -struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring, - const unsigned int size) -{ - struct ixgbevf_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - prefetchw(rx_buffer->page); - - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); - - return rx_buffer; -} - -static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer) -{ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, - DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); } /** @@ -577,41 +552,6 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, return true; } -static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *bi) -{ - struct page *page; - dma_addr_t dma; - - /* alloc new page for storage */ - page = dev_alloc_page(); - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_rx_page_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, - DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); - - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_page(page); - - rx_ring->rx_stats.alloc_rx_page_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = IXGBEVF_SKB_PAD; - rx_ring->rx_stats.alloc_rx_page++; - - return true; -} - /** * ixgbevf_alloc_rx_buffers - Replace used receive buffers; packet split * @rx_ring: rx descriptor ring (for a specific queue) to setup buffers on @@ -621,39 +561,34 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, u16 cleaned_count) { union ixgbe_adv_rx_desc *rx_desc; - struct ixgbevf_rx_buffer *bi; - unsigned int i = rx_ring->next_to_use; + const struct libeth_fq_fp fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + .truesize = rx_ring->truesize, + .count = rx_ring->count, + }; + u16 ntu = rx_ring->next_to_use; /* nothing to do or no valid netdev defined */ - if (!cleaned_count || !rx_ring->netdev) + if (unlikely(!cleaned_count || !rx_ring->netdev)) return; - rx_desc = IXGBEVF_RX_DESC(rx_ring, i); - bi = &rx_ring->rx_buffer_info[i]; - i -= rx_ring->count; + rx_desc = IXGBEVF_RX_DESC(rx_ring, ntu); do { - if (!ixgbevf_alloc_mapped_page(rx_ring, bi)) + dma_addr_t addr; + + addr = libeth_rx_alloc(&fq, ntu); + if (addr == DMA_MAPPING_ERROR) break; - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, - IXGBEVF_RXBUFFER_3072, - DMA_FROM_DEVICE); - - /* Refresh the desc even if pkt_addr didn't change - * because each write-back erases this info. - */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + rx_desc->read.pkt_addr = cpu_to_le64(addr); rx_desc++; - bi++; - i++; - if (unlikely(!i)) { + ntu++; + if (unlikely(ntu == fq.count)) { rx_desc = IXGBEVF_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_buffer_info; - i -= rx_ring->count; + ntu = 0; } /* clear the length for the next_to_use descriptor */ @@ -662,14 +597,9 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, cleaned_count--; } while (cleaned_count); - i += rx_ring->count; - - if (rx_ring->next_to_use != i) { + if (likely(rx_ring->next_to_use != ntu)) { /* record the next descriptor to use */ - rx_ring->next_to_use = i; - - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = i; + rx_ring->next_to_use = ntu; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -677,7 +607,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, * such as IA-64). */ wmb(); - ixgbevf_write_tail(rx_ring, i); + ixgbevf_write_tail(rx_ring, ntu); } } @@ -714,10 +644,6 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, } } - /* if eth_skb_pad returns an error the skb was freed */ - if (eth_skb_pad(skb)) - return true; - return false; } @@ -730,15 +656,15 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, * * This function will add the data contained in rx_buffer->page to the skb. **/ -static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, +static void ixgbevf_add_rx_frag(const struct libeth_fqe *rx_buffer, struct sk_buff *skb, unsigned int size) { - unsigned int truesize = SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size); + u32 hr = netmem_get_pp(rx_buffer->netmem)->p.offset; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); + skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags, + rx_buffer->netmem, rx_buffer->offset + hr, + size, rx_buffer->truesize); } static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, @@ -749,38 +675,6 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); } -static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - union ixgbe_adv_rx_desc *rx_desc) -{ - unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); - struct sk_buff *skb; - - /* Prefetch first cache line of first page. If xdp->data_meta - * is unused, this points to xdp->data, otherwise, we likely - * have a consumer accessing first few bytes of meta data, - * and then actual data. - */ - net_prefetch(xdp->data_meta); - - /* build an skb around the page buffer */ - skb = napi_build_skb(xdp->data_hard_start, truesize); - if (unlikely(!skb)) - return NULL; - - /* update pointers within the skb to store the data */ - skb_reserve(skb, xdp->data - xdp->data_hard_start); - __skb_put(skb, xdp->data_end - xdp->data); - if (metasize) - skb_metadata_set(skb, metasize); - - return skb; -} - #define IXGBEVF_XDP_PASS 0 #define IXGBEVF_XDP_CONSUMED 1 #define IXGBEVF_XDP_TX 2 @@ -864,25 +758,25 @@ static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring, static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring, - struct xdp_buff *xdp) + struct libeth_xdp_buff *xdp) { int result = IXGBEVF_XDP_PASS; struct ixgbevf_ring *xdp_ring; struct bpf_prog *xdp_prog; u32 act; - xdp_prog = READ_ONCE(rx_ring->xdp_prog); + xdp_prog = rcu_dereference(rx_ring->xdp_prog); if (!xdp_prog) goto xdp_out; - act = bpf_prog_run_xdp(xdp_prog, xdp); + act = bpf_prog_run_xdp(xdp_prog, &xdp->base); switch (act) { case XDP_PASS: break; case XDP_TX: xdp_ring = adapter->xdp_ring[rx_ring->queue_index]; - result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp); + result = ixgbevf_xmit_xdp_ring(xdp_ring, &xdp->base); if (result == IXGBEVF_XDP_CONSUMED) goto out_failure; break; @@ -895,6 +789,7 @@ static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, fallthrough; /* handle aborts by dropping packet */ case XDP_DROP: result = IXGBEVF_XDP_CONSUMED; + libeth_xdp_return_buff(xdp); break; } xdp_out: @@ -909,16 +804,15 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); struct sk_buff *skb = rx_ring->skb; + LIBETH_XDP_ONSTACK_BUFF(xdp); bool xdp_xmit = false; - struct xdp_buff xdp; int xdp_res = 0; - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ - xdp_init_buff(&xdp, IXGBEVF_RXBUFFER_3072, &rx_ring->xdp_rxq); + xdp->base.rxq = &rx_ring->xdp_rxq; while (likely(total_rx_packets < budget)) { - struct ixgbevf_rx_buffer *rx_buffer; union ixgbe_adv_rx_desc *rx_desc; + struct libeth_fqe *rx_buffer; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -929,7 +823,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean); size = le16_to_cpu(rx_desc->wb.upper.length); - if (!size) + if (unlikely(!size)) break; /* This memory barrier is needed to keep us from reading @@ -938,18 +832,14 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, */ rmb(); - rx_buffer = - ixgbevf_get_rx_buffer(rx_ring, IXGBEVF_RXBUFFER_3072); + rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; + libeth_rx_sync_for_cpu(rx_buffer, size); /* retrieve a buffer from the ring */ if (!skb) { - unsigned int offset = rx_buffer->page_offset; - unsigned char *hard_start; - - hard_start = page_address(rx_buffer->page) + - rx_buffer->page_offset - offset; - xdp_prepare_buff(&xdp, hard_start, offset, size, true); - xdp_res = ixgbevf_run_xdp(adapter, rx_ring, &xdp); + libeth_xdp_prepare_buff(xdp, rx_buffer, size); + prefetch(xdp->data); + xdp_res = ixgbevf_run_xdp(adapter, rx_ring, xdp); } if (xdp_res) { @@ -959,22 +849,18 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, total_rx_packets++; total_rx_bytes += size; } else if (skb) { - ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size); + ixgbevf_add_rx_frag(rx_buffer, skb, size); } else { - skb = ixgbevf_build_skb(rx_ring, rx_buffer, - &xdp, rx_desc); + skb = xdp_build_skb_from_buff(&xdp->base); } /* exit if we failed to retrieve a buffer */ - if (!xdp_res && !skb) { + if (unlikely(!xdp_res && !skb)) { + libeth_xdp_return_buff_slow(xdp); rx_ring->rx_stats.alloc_rx_buff_failed++; break; } - ixgbevf_put_rx_buffer(rx_ring, rx_buffer); - if (xdp_res == IXGBEVF_XDP_CONSUMED) - __free_page(rx_buffer->page); - rx_buffer->page = NULL; cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -982,21 +868,19 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, continue; /* verify the packet layout is correct */ - if (xdp_res || ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) { + if (xdp_res || + unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { skb = NULL; continue; } - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - /* Workaround hardware that can't do proper VEPA multicast * source pruning. */ - if ((skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) && - ether_addr_equal(rx_ring->netdev->dev_addr, - eth_hdr(skb)->h_source)) { + if (unlikely((skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) && + ether_addr_equal(rx_ring->netdev->dev_addr, + eth_hdr(skb)->h_source))) { dev_kfree_skb_irq(skb); continue; } @@ -1004,13 +888,14 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* populate checksum, VLAN, and protocol */ ixgbevf_process_skb_fields(rx_ring, rx_desc, skb); + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; + total_rx_packets++; + ixgbevf_rx_skb(q_vector, skb); /* reset skb pointer */ skb = NULL; - - /* update budget accounting */ - total_rx_packets++; } /* place incomplete frames back on ring for completion */ @@ -1543,7 +1428,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter) #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring, int index) + struct ixgbevf_ring *ring, int index, + bool rlpml_valid) { struct ixgbe_hw *hw = &adapter->hw; u32 srrctl; @@ -1551,7 +1437,11 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, srrctl = IXGBE_SRRCTL_DROP_EN; srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + if (rlpml_valid) + srrctl |= DIV_ROUND_UP(ring->rx_buf_len, + IXGBE_SRRCTL_BSIZEPKT_STEP); + else + srrctl |= ring->rx_buf_len / IXGBE_SRRCTL_BSIZEPKT_STEP; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); @@ -1682,9 +1572,10 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, { struct ixgbe_hw *hw = &adapter->hw; union ixgbe_adv_rx_desc *rx_desc; + u8 reg_idx = ring->reg_idx; + bool rlpml_valid = false; u64 rdba = ring->dma; u32 rxdctl; - u8 reg_idx = ring->reg_idx; /* disable queue to avoid issues while updating state */ rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(reg_idx)); @@ -1710,10 +1601,6 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VFRDT(reg_idx), 0); ring->tail = adapter->io_addr + IXGBE_VFRDT(reg_idx); - /* initialize rx_buffer_info */ - memset(ring->rx_buffer_info, 0, - sizeof(struct ixgbevf_rx_buffer) * ring->count); - /* initialize Rx descriptor 0 */ rx_desc = IXGBEVF_RX_DESC(ring, 0); rx_desc->wb.upper.length = 0; @@ -1721,16 +1608,21 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, /* reset ntu and ntc to place SW in sync with hardwdare */ ring->next_to_clean = 0; ring->next_to_use = 0; - ring->next_to_alloc = 0; - - ixgbevf_configure_srrctl(adapter, ring, reg_idx); /* RXDCTL.RLPML does not work on 82599 */ if (adapter->hw.mac.type != ixgbe_mac_82599_vf) { - rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | - IXGBE_RXDCTL_RLPML_EN); + u32 pkt_len = + READ_ONCE(adapter->netdev->mtu) + LIBETH_RX_LL_LEN; + + rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | IXGBE_RXDCTL_RLPML_EN); + if (pkt_len <= IXGBE_RXDCTL_RLPMLMASK) { + rxdctl |= pkt_len | IXGBE_RXDCTL_RLPML_EN; + rlpml_valid = true; + } } + ixgbevf_configure_srrctl(adapter, ring, reg_idx, rlpml_valid); + rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl); @@ -2126,8 +2018,6 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) **/ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) { - u16 i = rx_ring->next_to_clean; - /* Free Rx ring sk_buff */ if (rx_ring->skb) { dev_kfree_skb(rx_ring->skb); @@ -2135,30 +2025,14 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) } /* Free all the Rx ring pages */ - while (i != rx_ring->next_to_alloc) { - struct ixgbevf_rx_buffer *rx_buffer; + for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { + const struct libeth_fqe *rx_fqe = &rx_ring->rx_fqes[i]; - rx_buffer = &rx_ring->rx_buffer_info[i]; - - /* Invalidate cache lines that may have been written to by - * device so that we avoid corrupting memory. - */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - IXGBEVF_RXBUFFER_3072, - DMA_FROM_DEVICE); - - /* free resources associated with mapping */ - ixgbevf_put_rx_buffer(rx_ring, rx_buffer); - __free_page(rx_buffer->page); - rx_buffer->page = NULL; - i++; - if (i == rx_ring->count) + libeth_rx_recycle_slow(rx_fqe->netmem); + if (unlikely(++i == rx_ring->count)) i = 0; } - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } @@ -2177,7 +2051,7 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) /* Free all the Tx ring sk_buffs */ if (ring_is_xdp(tx_ring)) - page_frag_free(tx_buffer->data); + libeth_xdp_return_va(tx_buffer->data, false); else dev_kfree_skb_any(tx_buffer->skb); @@ -3259,12 +3133,25 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring) { - int size; + struct libeth_fq fq = { + .count = rx_ring->count, + .nid = NUMA_NO_NODE, + .type = LIBETH_FQE_MTU, + .xdp = !!rx_ring->xdp_prog, + .buf_len = IXGBEVF_RX_PAGE_LEN(rx_ring->xdp_prog ? + LIBETH_XDP_HEADROOM : + LIBETH_SKB_HEADROOM), + }; + int ret; - size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vmalloc(size); - if (!rx_ring->rx_buffer_info) - goto err; + ret = libeth_rx_fq_create(&fq, &rx_ring->q_vector->napi); + if (ret) + return ret; + + rx_ring->pp = fq.pp; + rx_ring->rx_fqes = fq.fqes; + rx_ring->truesize = fq.truesize; + rx_ring->rx_buf_len = fq.buf_len; u64_stats_init(&rx_ring->syncp); @@ -3272,25 +3159,31 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - rx_ring->desc = dma_alloc_coherent(rx_ring->dev, rx_ring->size, + rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); - if (!rx_ring->desc) + if (!rx_ring->desc) { + ret = -ENOMEM; goto err; + } /* XDP RX-queue info */ - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, 0) < 0) + ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, + rx_ring->queue_index, 0, rx_ring->truesize); + if (ret) goto err; - rx_ring->xdp_prog = adapter->xdp_prog; + xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, fq.pp); + + rcu_assign_pointer(rx_ring->xdp_prog, adapter->xdp_prog); return 0; err: - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; dev_err(rx_ring->dev, "Unable to allocate memory for the Rx descriptor ring\n"); - return -ENOMEM; + return ret; } /** @@ -3331,17 +3224,24 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter) **/ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring) { + struct libeth_fq fq = { + .fqes = rx_ring->rx_fqes, + .pp = rx_ring->pp, + }; + ixgbevf_clean_rx_ring(rx_ring); - rx_ring->xdp_prog = NULL; + rcu_assign_pointer(rx_ring->xdp_prog, NULL); + xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; - dma_free_coherent(rx_ring->dev, rx_ring->size, rx_ring->desc, + dma_free_coherent(fq.pp->p.dev, rx_ring->size, rx_ring->desc, rx_ring->dma); - rx_ring->desc = NULL; + + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; } /** @@ -4233,7 +4133,9 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) ixgbevf_open(dev); } else { for (i = 0; i < adapter->num_rx_queues; i++) - xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog); + rcu_assign_pointer(adapter->rx_ring[i]->xdp_prog, + adapter->xdp_prog); + synchronize_net(); } if (old_prog) From 93e53abfadb2fb0f1466213e5be794d85b04ee4f Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 2 Sep 2025 16:31:51 +0200 Subject: [PATCH 04/15] ixgbevf: support XDP multi-buffer on Rx path Implement XDP support for received fragmented packets, this requires using some helpers from libeth_xdp. Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 3 +- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 97 +++++++------------ 2 files changed, 35 insertions(+), 65 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index ebf771f0caa4b3..2626af0393614b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "vf.h" @@ -105,7 +106,6 @@ struct ixgbevf_ring { struct xdp_rxq_info xdp_rxq; u64 hw_csum_rx_error; u8 __iomem *tail; - struct sk_buff *skb; /* holds the special value that gets the hardware register offset * associated with this ring, which is different for DCB and RSS modes @@ -113,6 +113,7 @@ struct ixgbevf_ring { u16 reg_idx; int queue_index; /* needed for multiqueue queue management */ u32 rx_buf_len; + struct libeth_xdp_buff_stash xdp_stash; } ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index cedbf0a4d0a546..641d87f93864c2 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -84,6 +84,7 @@ MODULE_DEVICE_TABLE(pci, ixgbevf_pci_tbl); MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network Driver"); MODULE_IMPORT_NS("LIBETH"); +MODULE_IMPORT_NS("LIBETH_XDP"); MODULE_LICENSE("GPL v2"); #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) @@ -647,26 +648,6 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, return false; } -/** - * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add - * @skb: sk_buff to place the data into - * @size: size of buffer to be added - * - * This function will add the data contained in rx_buffer->page to the skb. - **/ -static void ixgbevf_add_rx_frag(const struct libeth_fqe *rx_buffer, - struct sk_buff *skb, - unsigned int size) -{ - u32 hr = netmem_get_pp(rx_buffer->netmem)->p.offset; - - skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags, - rx_buffer->netmem, rx_buffer->offset + hr, - size, rx_buffer->truesize); -} - static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, u32 qmask) { @@ -803,16 +784,16 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); - struct sk_buff *skb = rx_ring->skb; LIBETH_XDP_ONSTACK_BUFF(xdp); bool xdp_xmit = false; int xdp_res = 0; - xdp->base.rxq = &rx_ring->xdp_rxq; + libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; struct libeth_fqe *rx_buffer; + struct sk_buff *skb; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -833,43 +814,38 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, rmb(); rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; - libeth_rx_sync_for_cpu(rx_buffer, size); + libeth_xdp_process_buff(xdp, rx_buffer, size); - /* retrieve a buffer from the ring */ - if (!skb) { - libeth_xdp_prepare_buff(xdp, rx_buffer, size); - prefetch(xdp->data); - xdp_res = ixgbevf_run_xdp(adapter, rx_ring, xdp); - } + cleaned_count++; + /* fetch next buffer in frame if non-eop */ + if (ixgbevf_is_non_eop(rx_ring, rx_desc)) + continue; + + total_rx_packets++; + total_rx_bytes += xdp_get_buff_len(&xdp->base); + xdp_res = ixgbevf_run_xdp(adapter, rx_ring, xdp); if (xdp_res) { if (xdp_res == IXGBEVF_XDP_TX) xdp_xmit = true; - total_rx_packets++; - total_rx_bytes += size; - } else if (skb) { - ixgbevf_add_rx_frag(rx_buffer, skb, size); - } else { - skb = xdp_build_skb_from_buff(&xdp->base); + xdp->data = NULL; + continue; } + skb = xdp_build_skb_from_buff(&xdp->base); + /* exit if we failed to retrieve a buffer */ - if (unlikely(!xdp_res && !skb)) { + if (unlikely(!skb)) { libeth_xdp_return_buff_slow(xdp); rx_ring->rx_stats.alloc_rx_buff_failed++; break; } - cleaned_count++; - - /* fetch next buffer in frame if non-eop */ - if (ixgbevf_is_non_eop(rx_ring, rx_desc)) - continue; + xdp->data = NULL; /* verify the packet layout is correct */ - if (xdp_res || - unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { + if (unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { skb = NULL; continue; } @@ -888,18 +864,11 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* populate checksum, VLAN, and protocol */ ixgbevf_process_skb_fields(rx_ring, rx_desc, skb); - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - total_rx_packets++; - ixgbevf_rx_skb(q_vector, skb); - - /* reset skb pointer */ - skb = NULL; } /* place incomplete frames back on ring for completion */ - rx_ring->skb = skb; + libeth_xdp_save_buff(&rx_ring->xdp_stash, xdp); if (xdp_xmit) { struct ixgbevf_ring *xdp_ring = @@ -2019,10 +1988,7 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) { /* Free Rx ring sk_buff */ - if (rx_ring->skb) { - dev_kfree_skb(rx_ring->skb); - rx_ring->skb = NULL; - } + libeth_xdp_return_stash(&rx_ring->xdp_stash); /* Free all the Rx ring pages */ for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { @@ -4103,16 +4069,19 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, return features; } -static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) +static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) { - int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + u32 frame_size = READ_ONCE(dev->mtu) + LIBETH_RX_LL_LEN; struct ixgbevf_adapter *adapter = netdev_priv(dev); struct bpf_prog *old_prog; + bool requires_mbuf; - /* verify ixgbevf ring attributes are sufficient for XDP */ - for (i = 0; i < adapter->num_rx_queues; i++) { - if (frame_size > IXGBEVF_RXBUFFER_3072) - return -EINVAL; + requires_mbuf = frame_size > IXGBEVF_RX_PAGE_LEN(LIBETH_XDP_HEADROOM); + if (prog && !prog->aux->xdp_has_frags && requires_mbuf) { + NL_SET_ERR_MSG_MOD(extack, + "Configured MTU requires non-linear frames and XDP prog does not support frags"); + return -EOPNOTSUPP; } old_prog = xchg(&adapter->xdp_prog, prog); @@ -4132,7 +4101,7 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (netif_running(dev)) ixgbevf_open(dev); } else { - for (i = 0; i < adapter->num_rx_queues; i++) + for (int i = 0; i < adapter->num_rx_queues; i++) rcu_assign_pointer(adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog); synchronize_net(); @@ -4148,7 +4117,7 @@ static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: - return ixgbevf_xdp_setup(dev, xdp->prog); + return ixgbevf_xdp_setup(dev, xdp->prog, xdp->extack); default: return -EINVAL; } @@ -4300,7 +4269,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; - netdev->xdp_features = NETDEV_XDP_ACT_BASIC; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_RX_SG; /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; From 60a8d27dcfc7b9f88c9e8192c3bfdc2d70a096c0 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 9 Sep 2025 13:46:44 +0200 Subject: [PATCH 05/15] ixgbevf: XDP_TX in multi-buffer through libeth Use libeth to support XDP_TX action for segmented packets. Reviewed-by: Alexander Lobakin Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 14 +- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 217 +++++++++--------- .../ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h | 112 +++++++++ 3 files changed, 224 insertions(+), 119 deletions(-) create mode 100644 drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 2626af0393614b..a27081ee764b40 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -81,20 +81,22 @@ struct ixgbevf_ring { struct net_device *netdev; struct bpf_prog __rcu *xdp_prog; union { - struct page_pool *pp; /* Rx ring */ + struct page_pool *pp; /* Rx and XDP rings */ struct device *dev; /* Tx ring */ }; void *desc; /* descriptor ring memory */ - dma_addr_t dma; /* phys. address of descriptor ring */ - unsigned int size; /* length in bytes */ - u32 truesize; /* Rx buffer full size */ + union { + u32 truesize; /* Rx buffer full size */ + u32 pending; /* Sent-not-completed descriptors */ + }; u16 count; /* amount of descriptors */ - u16 next_to_use; u16 next_to_clean; + u32 next_to_use; union { struct libeth_fqe *rx_fqes; struct ixgbevf_tx_buffer *tx_buffer_info; + struct libeth_sqe *xdp_sqes; }; unsigned long state; struct ixgbevf_stats stats; @@ -114,6 +116,8 @@ struct ixgbevf_ring { int queue_index; /* needed for multiqueue queue management */ u32 rx_buf_len; struct libeth_xdp_buff_stash xdp_stash; + unsigned int dma_size; /* length in bytes */ + dma_addr_t dma; /* phys. address of descriptor ring */ } ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 641d87f93864c2..7073bbec872a2c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -33,7 +33,7 @@ #include #include -#include "ixgbevf.h" +#include "ixgbevf_xdp_lib.h" const char ixgbevf_driver_name[] = "ixgbevf"; static const char ixgbevf_driver_string[] = @@ -306,10 +306,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, total_ipsec++; /* free the skb */ - if (ring_is_xdp(tx_ring)) - libeth_xdp_return_va(tx_buffer->data, true); - else - napi_consume_skb(tx_buffer->skb, napi_budget); + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -392,9 +389,8 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, eop_desc, (eop_desc ? eop_desc->wb.status : 0), tx_ring->tx_buffer_info[i].time_stamp, jiffies); - if (!ring_is_xdp(tx_ring)) - netif_stop_subqueue(tx_ring->netdev, - tx_ring->queue_index); + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); /* schedule immediate reset if we believe we hung */ ixgbevf_tx_timeout_reset(adapter); @@ -402,9 +398,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, return true; } - if (ring_is_xdp(tx_ring)) - return !!budget; - #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { @@ -660,94 +653,85 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, #define IXGBEVF_XDP_CONSUMED 1 #define IXGBEVF_XDP_TX 2 -static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring, - struct xdp_buff *xdp) +static void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring) { - struct ixgbevf_tx_buffer *tx_buffer; - union ixgbe_adv_tx_desc *tx_desc; - u32 len, cmd_type; - dma_addr_t dma; - u16 i; + ixgbevf_clean_xdp_num(xdp_ring, false, xdp_ring->pending); +} - len = xdp->data_end - xdp->data; +static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, + u64 priv) +{ + union ixgbe_adv_tx_desc *tx_desc = + &((union ixgbe_adv_tx_desc *)sq->descs)[i]; - if (unlikely(!ixgbevf_desc_unused(ring))) - return IXGBEVF_XDP_CONSUMED; + u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS | + desc.len; - dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE); - if (dma_mapping_error(ring->dev, dma)) - return IXGBEVF_XDP_CONSUMED; + if (desc.flags & LIBETH_XDP_TX_LAST) + cmd_type |= IXGBE_TXD_CMD_EOP; - /* record the location of the first descriptor for this packet */ - i = ring->next_to_use; - tx_buffer = &ring->tx_buffer_info[i]; - - dma_unmap_len_set(tx_buffer, len, len); - dma_unmap_addr_set(tx_buffer, dma, dma); - tx_buffer->data = xdp->data; - tx_buffer->bytecount = len; - tx_buffer->gso_segs = 1; - tx_buffer->protocol = 0; - - /* Populate minimal context descriptor that will provide for the - * fact that we are expected to process Ethernet frames. - */ - if (!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state)) { - struct ixgbe_adv_tx_context_desc *context_desc; + if (desc.flags & LIBETH_XDP_TX_FIRST) { + struct skb_shared_info *sinfo = sq->sqes[i].sinfo; + u16 full_len = desc.len; - set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); + if (desc.flags & LIBETH_XDP_TX_MULTI) + full_len += sinfo->xdp_frags_size; - context_desc = IXGBEVF_TX_CTXTDESC(ring, 0); - context_desc->vlan_macip_lens = - cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); - context_desc->fceof_saidx = 0; - context_desc->type_tucmd_mlhl = - cpu_to_le32(IXGBE_TXD_CMD_DEXT | - IXGBE_ADVTXD_DTYP_CTXT); - context_desc->mss_l4len_idx = 0; - - i = 1; + tx_desc->read.olinfo_status = + cpu_to_le32((full_len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); } - /* put descriptor type bits */ - cmd_type = IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_DEXT | - IXGBE_ADVTXD_DCMD_IFCS; - cmd_type |= len | IXGBE_TXD_CMD; + tx_desc->read.buffer_addr = cpu_to_le64(desc.addr); + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); +} - tx_desc = IXGBEVF_TX_DESC(ring, i); - tx_desc->read.buffer_addr = cpu_to_le64(dma); +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbevf_xdp_flush_tx, ixgbevf_prep_xdp_sq, + ixgbevf_xdp_xmit_desc); +LIBETH_XDP_DEFINE_END(); - tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); - tx_desc->read.olinfo_status = - cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) | - IXGBE_ADVTXD_CC); +static void ixgbevf_xdp_set_rs(struct ixgbevf_ring *xdp_ring, u32 cached_ntu) +{ + u32 ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; + union ixgbe_adv_tx_desc *desc; - /* Avoid any potential race with cleanup */ - smp_wmb(); + desc = IXGBEVF_TX_DESC(xdp_ring, ltu); + xdp_ring->xdp_sqes[cached_ntu].rs_idx = ltu + 1; + desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); +} - /* set next_to_watch value indicating a packet is present */ - i++; - if (i == ring->count) - i = 0; +static void ixgbevf_rx_finalize_xdp(struct libeth_xdp_tx_bulk *tx_bulk, + bool xdp_xmit, u32 cached_ntu) +{ + struct ixgbevf_ring *xdp_ring = tx_bulk->xdpsq; + + if (!xdp_xmit) + goto unlock; + + if (tx_bulk->count) + ixgbevf_xdp_flush_tx(tx_bulk, LIBETH_XDP_TX_DROP); - tx_buffer->next_to_watch = tx_desc; - ring->next_to_use = i; + ixgbevf_xdp_set_rs(xdp_ring, cached_ntu); - return IXGBEVF_XDP_TX; + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); +unlock: + rcu_read_unlock(); } -static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *rx_ring, +static int ixgbevf_run_xdp(struct libeth_xdp_tx_bulk *tx_bulk, struct libeth_xdp_buff *xdp) { int result = IXGBEVF_XDP_PASS; - struct ixgbevf_ring *xdp_ring; - struct bpf_prog *xdp_prog; + const struct bpf_prog *xdp_prog; u32 act; - xdp_prog = rcu_dereference(rx_ring->xdp_prog); - + xdp_prog = tx_bulk->prog; if (!xdp_prog) goto xdp_out; @@ -756,17 +740,16 @@ static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, case XDP_PASS: break; case XDP_TX: - xdp_ring = adapter->xdp_ring[rx_ring->queue_index]; - result = ixgbevf_xmit_xdp_ring(xdp_ring, &xdp->base); - if (result == IXGBEVF_XDP_CONSUMED) - goto out_failure; + result = IXGBEVF_XDP_TX; + if (!libeth_xdp_tx_queue_bulk(tx_bulk, xdp, + ixgbevf_xdp_flush_tx)) + result = IXGBEVF_XDP_CONSUMED; break; default: - bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); + bpf_warn_invalid_xdp_action(tx_bulk->dev, xdp_prog, act); fallthrough; case XDP_ABORTED: -out_failure: - trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + trace_xdp_exception(tx_bulk->dev, xdp_prog, act); fallthrough; /* handle aborts by dropping packet */ case XDP_DROP: result = IXGBEVF_XDP_CONSUMED; @@ -784,11 +767,19 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); + LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); LIBETH_XDP_ONSTACK_BUFF(xdp); + u32 cached_ntu; bool xdp_xmit = false; int xdp_res = 0; libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); + libeth_xdp_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, adapter->xdp_ring, + adapter->num_xdp_queues); + if (xdp_tx_bulk.prog) + cached_ntu = + ((struct ixgbevf_ring *)xdp_tx_bulk.xdpsq)->next_to_use; while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; @@ -824,7 +815,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, total_rx_packets++; total_rx_bytes += xdp_get_buff_len(&xdp->base); - xdp_res = ixgbevf_run_xdp(adapter, rx_ring, xdp); + xdp_res = ixgbevf_run_xdp(&xdp_tx_bulk, xdp); if (xdp_res) { if (xdp_res == IXGBEVF_XDP_TX) xdp_xmit = true; @@ -870,16 +861,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* place incomplete frames back on ring for completion */ libeth_xdp_save_buff(&rx_ring->xdp_stash, xdp); - if (xdp_xmit) { - struct ixgbevf_ring *xdp_ring = - adapter->xdp_ring[rx_ring->queue_index]; - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. - */ - wmb(); - ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); - } + ixgbevf_rx_finalize_xdp(&xdp_tx_bulk, xdp_xmit, cached_ntu); u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; @@ -909,6 +891,8 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) bool clean_complete = true; ixgbevf_for_each_ring(ring, q_vector->tx) { + if (ring_is_xdp(ring)) + continue; if (!ixgbevf_clean_tx_irq(q_vector, ring, budget)) clean_complete = false; } @@ -1348,6 +1332,7 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, /* reset ntu and ntc to place SW in sync with hardwdare */ ring->next_to_clean = 0; ring->next_to_use = 0; + ring->pending = 0; /* In order to avoid issues WTHRESH + PTHRESH should always be equal * to or less than the number of on chip descriptors, which is @@ -1360,8 +1345,12 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, 32; /* PTHRESH = 32 */ /* reinitialize tx_buffer_info */ - memset(ring->tx_buffer_info, 0, - sizeof(struct ixgbevf_tx_buffer) * ring->count); + if (!ring_is_xdp(ring)) + memset(ring->tx_buffer_info, 0, + sizeof(struct ixgbevf_tx_buffer) * ring->count); + else + memset(ring->xdp_sqes, 0, + sizeof(struct libeth_sqe) * ring->count); clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); @@ -2016,10 +2005,7 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) union ixgbe_adv_tx_desc *eop_desc, *tx_desc; /* Free all the Tx ring sk_buffs */ - if (ring_is_xdp(tx_ring)) - libeth_xdp_return_va(tx_buffer->data, false); - else - dev_kfree_skb_any(tx_buffer->skb); + dev_kfree_skb_any(tx_buffer->skb); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -2088,7 +2074,7 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) ixgbevf_clean_tx_ring(adapter->tx_ring[i]); for (i = 0; i < adapter->num_xdp_queues; i++) - ixgbevf_clean_tx_ring(adapter->xdp_ring[i]); + ixgbevf_clean_xdp_ring(adapter->xdp_ring[i]); } void ixgbevf_down(struct ixgbevf_adapter *adapter) @@ -2834,8 +2820,6 @@ static void ixgbevf_check_hang_subtask(struct ixgbevf_adapter *adapter) if (netif_carrier_ok(adapter->netdev)) { for (i = 0; i < adapter->num_tx_queues; i++) set_check_for_tx_hang(adapter->tx_ring[i]); - for (i = 0; i < adapter->num_xdp_queues; i++) - set_check_for_tx_hang(adapter->xdp_ring[i]); } /* get one bit for every active Tx/Rx interrupt vector */ @@ -2979,7 +2963,10 @@ static void ixgbevf_service_task(struct work_struct *work) **/ void ixgbevf_free_tx_resources(struct ixgbevf_ring *tx_ring) { - ixgbevf_clean_tx_ring(tx_ring); + if (!ring_is_xdp(tx_ring)) + ixgbevf_clean_tx_ring(tx_ring); + else + ixgbevf_clean_xdp_ring(tx_ring); vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; @@ -2988,7 +2975,7 @@ void ixgbevf_free_tx_resources(struct ixgbevf_ring *tx_ring) if (!tx_ring->desc) return; - dma_free_coherent(tx_ring->dev, tx_ring->size, tx_ring->desc, + dma_free_coherent(tx_ring->dev, tx_ring->dma_size, tx_ring->desc, tx_ring->dma); tx_ring->desc = NULL; @@ -3023,7 +3010,9 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev); int size; - size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count; + size = (!ring_is_xdp(tx_ring) ? sizeof(struct ixgbevf_tx_buffer) : + sizeof(struct libeth_sqe)) * tx_ring->count; + tx_ring->tx_buffer_info = vmalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -3031,10 +3020,10 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) u64_stats_init(&tx_ring->syncp); /* round up to nearest 4K */ - tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); - tx_ring->size = ALIGN(tx_ring->size, 4096); + tx_ring->dma_size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); + tx_ring->dma_size = ALIGN(tx_ring->dma_size, 4096); - tx_ring->desc = dma_alloc_coherent(tx_ring->dev, tx_ring->size, + tx_ring->desc = dma_alloc_coherent(tx_ring->dev, tx_ring->dma_size, &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) goto err; @@ -3122,10 +3111,10 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, u64_stats_init(&rx_ring->syncp); /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); - rx_ring->size = ALIGN(rx_ring->size, 4096); + rx_ring->dma_size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); + rx_ring->dma_size = ALIGN(rx_ring->dma_size, 4096); - rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->size, + rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->dma_size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) { @@ -3201,7 +3190,7 @@ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring) xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - dma_free_coherent(fq.pp->p.dev, rx_ring->size, rx_ring->desc, + dma_free_coherent(fq.pp->p.dev, rx_ring->dma_size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h new file mode 100644 index 00000000000000..629a5943eb3e27 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2010-2026 Intel Corporation */ + +#ifndef _IXGBEVF_XDP_LIB_H_ +#define _IXGBEVF_XDP_LIB_H_ + +#include + +#include "ixgbevf.h" + +static inline u16 ixgbevf_tx_get_num_sent(struct ixgbevf_ring *xdp_ring) +{ + u16 ntc = xdp_ring->next_to_clean; + u16 to_clean = 0; + + while (likely(to_clean < xdp_ring->pending)) { + u32 idx = xdp_ring->xdp_sqes[ntc].rs_idx; + union ixgbe_adv_tx_desc *rs_desc; + + if (!idx--) + break; + + rs_desc = IXGBEVF_TX_DESC(xdp_ring, idx); + + if (!(rs_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) + break; + + xdp_ring->xdp_sqes[ntc].rs_idx = 0; + + to_clean += + (idx >= ntc ? idx : idx + xdp_ring->count) - ntc + 1; + + ntc = (idx + 1 == xdp_ring->count) ? 0 : idx + 1; + } + + return to_clean; +} + +static inline void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, + bool in_napi, u16 to_clean) +{ + struct libeth_xdpsq_napi_stats stats = { }; + u32 ntc = xdp_ring->next_to_clean; + struct xdp_frame_bulk cbulk; + struct libeth_cq_pp cp = { + .bq = &cbulk, + .dev = xdp_ring->dev, + .xss = &stats, + .napi = in_napi, + }; + + xdp_frame_bulk_init(&cbulk); + xdp_ring->pending -= to_clean; + + while (likely(to_clean--)) { + libeth_xdp_complete_tx(&xdp_ring->xdp_sqes[ntc], &cp); + ntc++; + ntc = unlikely(ntc == xdp_ring->count) ? 0 : ntc; + } + + xdp_ring->next_to_clean = ntc; + xdp_flush_frame_bulk(&cbulk); +} + +static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) +{ + struct ixgbevf_ring *xdp_ring = xdpsq; + + if (unlikely(ixgbevf_desc_unused(xdp_ring) < LIBETH_XDP_TX_BULK)) { + u16 to_clean = ixgbevf_tx_get_num_sent(xdp_ring); + + if (likely(to_clean)) + ixgbevf_clean_xdp_num(xdp_ring, true, to_clean); + } + + if (unlikely(!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, + &xdp_ring->state))) { + struct ixgbe_adv_tx_context_desc *context_desc; + + set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &xdp_ring->state); + + context_desc = IXGBEVF_TX_CTXTDESC(xdp_ring, 0); + context_desc->vlan_macip_lens = + cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); + context_desc->fceof_saidx = 0; + context_desc->type_tucmd_mlhl = + cpu_to_le32(IXGBE_TXD_CMD_DEXT | + IXGBE_ADVTXD_DTYP_CTXT); + context_desc->mss_l4len_idx = 0; + + xdp_ring->next_to_use = 1; + xdp_ring->pending = 1; + + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(xdp_ring, 1); + } + + *sq = (struct libeth_xdpsq) { + .count = xdp_ring->count, + .descs = xdp_ring->desc, + .lock = NULL, + .ntu = &xdp_ring->next_to_use, + .pending = &xdp_ring->pending, + .pool = NULL, + .sqes = xdp_ring->xdp_sqes, + }; + + return ixgbevf_desc_unused(xdp_ring); +} + +#endif /* _IXGBEVF_XDP_LIB_H_ */ From 57735ac5314255e03e611e6c9d6043b5e5da4385 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 22 Sep 2025 07:14:24 +0200 Subject: [PATCH 06/15] ixgbevf: support XDP_REDIRECT and .ndo_xdp_xmit To fully support XDP_REDIRECT, utilize more libeth helpers in XDP Rx path, hence save cached_ntu in the ring structure instead of stack. ixgbevf-supported VFs usually have few queues, so use libeth_xdpsq_lock functionality for XDP queue sharing. Adjust filling-in of XDP Tx descriptors to use data from xdp frame. Otherwise, simply use libeth helpers to implement .ndo_xdp_xmit(). While at it, fix a typo in libeth docs. Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 2 + .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 135 ++++++------------ .../ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h | 30 +++- include/net/libeth/xdp.h | 2 +- 4 files changed, 78 insertions(+), 91 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index a27081ee764b40..ea86679e4f81d0 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -98,6 +98,8 @@ struct ixgbevf_ring { struct ixgbevf_tx_buffer *tx_buffer_info; struct libeth_sqe *xdp_sqes; }; + struct libeth_xdpsq_lock xdpq_lock; + u32 cached_ntu; unsigned long state; struct ixgbevf_stats stats; struct u64_stats_sync syncp; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 7073bbec872a2c..f26528cadb981f 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -649,13 +649,10 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); } -#define IXGBEVF_XDP_PASS 0 -#define IXGBEVF_XDP_CONSUMED 1 -#define IXGBEVF_XDP_TX 2 - static void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring) { ixgbevf_clean_xdp_num(xdp_ring, false, xdp_ring->pending); + libeth_xdpsq_put(&xdp_ring->xdpq_lock, xdp_ring->netdev); } static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, @@ -674,11 +671,16 @@ static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, cmd_type |= IXGBE_TXD_CMD_EOP; if (desc.flags & LIBETH_XDP_TX_FIRST) { - struct skb_shared_info *sinfo = sq->sqes[i].sinfo; + struct libeth_sqe *sqe = &sq->sqes[i]; + struct skb_shared_info *sinfo; u16 full_len = desc.len; - if (desc.flags & LIBETH_XDP_TX_MULTI) + if (desc.flags & LIBETH_XDP_TX_MULTI) { + sinfo = sqe->type == LIBETH_SQE_XDP_TX ? + sqe->sinfo : + xdp_get_shared_info_from_frame(sqe->xdpf); full_len += sinfo->xdp_frags_size; + } tx_desc->read.olinfo_status = cpu_to_le32((full_len << IXGBE_ADVTXD_PAYLEN_SHIFT) | @@ -692,74 +694,13 @@ static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, LIBETH_XDP_DEFINE_START(); LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbevf_xdp_flush_tx, ixgbevf_prep_xdp_sq, ixgbevf_xdp_xmit_desc); +LIBETH_XDP_DEFINE_FLUSH_XMIT(static ixgbevf_xdp_flush_xmit, ixgbevf_prep_xdp_sq, + ixgbevf_xdp_xmit_desc); +LIBETH_XDP_DEFINE_RUN_PROG(static ixgbevf_xdp_run_prog, ixgbevf_xdp_flush_tx); +LIBETH_XDP_DEFINE_FINALIZE(static ixgbevf_xdp_finalize_xdp_napi, + ixgbevf_xdp_flush_tx, ixgbevf_xdp_rs_and_bump); LIBETH_XDP_DEFINE_END(); -static void ixgbevf_xdp_set_rs(struct ixgbevf_ring *xdp_ring, u32 cached_ntu) -{ - u32 ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; - union ixgbe_adv_tx_desc *desc; - - desc = IXGBEVF_TX_DESC(xdp_ring, ltu); - xdp_ring->xdp_sqes[cached_ntu].rs_idx = ltu + 1; - desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); -} - -static void ixgbevf_rx_finalize_xdp(struct libeth_xdp_tx_bulk *tx_bulk, - bool xdp_xmit, u32 cached_ntu) -{ - struct ixgbevf_ring *xdp_ring = tx_bulk->xdpsq; - - if (!xdp_xmit) - goto unlock; - - if (tx_bulk->count) - ixgbevf_xdp_flush_tx(tx_bulk, LIBETH_XDP_TX_DROP); - - ixgbevf_xdp_set_rs(xdp_ring, cached_ntu); - - /* Finish descriptor writes before bumping tail */ - wmb(); - ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); -unlock: - rcu_read_unlock(); -} - -static int ixgbevf_run_xdp(struct libeth_xdp_tx_bulk *tx_bulk, - struct libeth_xdp_buff *xdp) -{ - int result = IXGBEVF_XDP_PASS; - const struct bpf_prog *xdp_prog; - u32 act; - - xdp_prog = tx_bulk->prog; - if (!xdp_prog) - goto xdp_out; - - act = bpf_prog_run_xdp(xdp_prog, &xdp->base); - switch (act) { - case XDP_PASS: - break; - case XDP_TX: - result = IXGBEVF_XDP_TX; - if (!libeth_xdp_tx_queue_bulk(tx_bulk, xdp, - ixgbevf_xdp_flush_tx)) - result = IXGBEVF_XDP_CONSUMED; - break; - default: - bpf_warn_invalid_xdp_action(tx_bulk->dev, xdp_prog, act); - fallthrough; - case XDP_ABORTED: - trace_xdp_exception(tx_bulk->dev, xdp_prog, act); - fallthrough; /* handle aborts by dropping packet */ - case XDP_DROP: - result = IXGBEVF_XDP_CONSUMED; - libeth_xdp_return_buff(xdp); - break; - } -xdp_out: - return result; -} - static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) @@ -769,17 +710,11 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, u16 cleaned_count = ixgbevf_desc_unused(rx_ring); LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); LIBETH_XDP_ONSTACK_BUFF(xdp); - u32 cached_ntu; - bool xdp_xmit = false; - int xdp_res = 0; libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); libeth_xdp_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, adapter->netdev, adapter->xdp_ring, adapter->num_xdp_queues); - if (xdp_tx_bulk.prog) - cached_ntu = - ((struct ixgbevf_ring *)xdp_tx_bulk.xdpsq)->next_to_use; while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; @@ -815,14 +750,9 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, total_rx_packets++; total_rx_bytes += xdp_get_buff_len(&xdp->base); - xdp_res = ixgbevf_run_xdp(&xdp_tx_bulk, xdp); - if (xdp_res) { - if (xdp_res == IXGBEVF_XDP_TX) - xdp_xmit = true; - - xdp->data = NULL; + if (xdp_tx_bulk.prog && + !ixgbevf_xdp_run_prog(xdp, &xdp_tx_bulk)) continue; - } skb = xdp_build_skb_from_buff(&xdp->base); @@ -861,7 +791,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* place incomplete frames back on ring for completion */ libeth_xdp_save_buff(&rx_ring->xdp_stash, xdp); - ixgbevf_rx_finalize_xdp(&xdp_tx_bulk, xdp_xmit, cached_ntu); + ixgbevf_xdp_finalize_xdp_napi(&xdp_tx_bulk); u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; @@ -873,6 +803,23 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, return total_rx_packets; } +static int ixgbevf_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct ixgbevf_adapter *adapter = netdev_priv(dev); + + if (unlikely(test_bit(__IXGBEVF_DOWN, &adapter->state))) + return -ENETDOWN; + + if (unlikely(!adapter->num_xdp_queues)) + return -ENXIO; + + return libeth_xdp_xmit_do_bulk(dev, n, frames, flags, adapter->xdp_ring, + adapter->num_xdp_queues, + ixgbevf_xdp_flush_xmit, + ixgbevf_xdp_rs_and_bump); +} + /** * ixgbevf_poll - NAPI polling calback * @napi: napi struct with our devices info in it @@ -1333,6 +1280,7 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, ring->next_to_clean = 0; ring->next_to_use = 0; ring->pending = 0; + ring->cached_ntu = 0; /* In order to avoid issues WTHRESH + PTHRESH should always be equal * to or less than the number of on chip descriptors, which is @@ -1345,12 +1293,15 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, 32; /* PTHRESH = 32 */ /* reinitialize tx_buffer_info */ - if (!ring_is_xdp(ring)) + if (!ring_is_xdp(ring)) { memset(ring->tx_buffer_info, 0, sizeof(struct ixgbevf_tx_buffer) * ring->count); - else + } else { memset(ring->xdp_sqes, 0, sizeof(struct libeth_sqe) * ring->count); + libeth_xdpsq_get(&ring->xdpq_lock, ring->netdev, + num_possible_cpus() > adapter->num_xdp_queues); + } clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); @@ -4077,6 +4028,8 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog, /* If transitioning XDP modes reconfigure rings */ if (!!prog != !!old_prog) { + xdp_features_clear_redirect_target(dev); + /* Hardware has to reinitialize queues and interrupts to * match packet buffer alignment. Unfortunately, the * hardware is not flexible enough to do this dynamically. @@ -4096,6 +4049,9 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog, synchronize_net(); } + if (prog) + xdp_features_set_redirect_target(dev, true); + if (old_prog) bpf_prog_put(old_prog); @@ -4126,6 +4082,7 @@ static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_vlan_rx_kill_vid = ixgbevf_vlan_rx_kill_vid, .ndo_features_check = ixgbevf_features_check, .ndo_bpf = ixgbevf_xdp, + .ndo_xdp_xmit = ixgbevf_xdp_xmit, }; static void ixgbevf_assign_netdev_ops(struct net_device *dev) @@ -4258,7 +4215,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; - netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_RX_SG; + libeth_xdp_set_features_noredir(netdev, NULL, 0, NULL); /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h index 629a5943eb3e27..da4f397944ed88 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h @@ -66,6 +66,7 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) { struct ixgbevf_ring *xdp_ring = xdpsq; + libeth_xdpsq_lock(&xdp_ring->xdpq_lock); if (unlikely(ixgbevf_desc_unused(xdp_ring) < LIBETH_XDP_TX_BULK)) { u16 to_clean = ixgbevf_tx_get_num_sent(xdp_ring); @@ -99,7 +100,7 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) *sq = (struct libeth_xdpsq) { .count = xdp_ring->count, .descs = xdp_ring->desc, - .lock = NULL, + .lock = &xdp_ring->xdpq_lock, .ntu = &xdp_ring->next_to_use, .pending = &xdp_ring->pending, .pool = NULL, @@ -109,4 +110,31 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) return ixgbevf_desc_unused(xdp_ring); } +static inline void ixgbevf_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) +{ + struct ixgbevf_ring *xdp_ring = xdpsq; + union ixgbe_adv_tx_desc *desc; + u32 ltu; + + libeth_xdpsq_lock(&xdp_ring->xdpq_lock); + + if ((!flush && xdp_ring->pending < xdp_ring->count - 1) || + xdp_ring->cached_ntu == xdp_ring->next_to_use) + goto unlock; + + ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; + desc = IXGBEVF_TX_DESC(xdp_ring, ltu); + desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); + + xdp_ring->xdp_sqes[xdp_ring->cached_ntu].rs_idx = ltu + 1; + xdp_ring->cached_ntu = xdp_ring->next_to_use; + + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); + +unlock: + libeth_xdpsq_unlock(&xdp_ring->xdpq_lock); +} + #endif /* _IXGBEVF_XDP_LIB_H_ */ diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index 898723ab62e812..2e2154ccecae61 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -1094,7 +1094,7 @@ __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, * @xqs: array of XDPSQs driver structs * @nqs: number of active XDPSQs, the above array length * @fl: driver callback to flush an XDP xmit bulk - * @fin: driver cabback to finalize the queue + * @fin: driver callback to finalize the queue * * If the driver has active XDPSQs, perform common checks and send the frames. * Finalize the queue, if requested. From d6ec774def48d3557103786a73c1cd306e0a277a Mon Sep 17 00:00:00 2001 From: Natalia Wochtman Date: Fri, 19 Sep 2025 15:30:15 +0200 Subject: [PATCH 07/15] ixgbevf: add pseudo header split Introduce pseudo header split support in the ixgbevf driver, specifically targeting ixgbe_mac_82599_vf. On older hardware (e.g. ixgbe_mac_82599_vf), RX DMA write size can only be limited in 1K increments. This causes issues when attempting to fit multiple packets per page, as a DMA write may overwrite the headroom of the next packet. To address this, introduce pseudo header split support, where the hardware copies the full L2 header into a dedicated header buffer. This avoids the need for HR/TR alignment and allows safe skb construction from the header buffer without risking overwrites. Given that once packet is too big to fit into a single page, the behaviour is the same for all supported HW, use pseudo header split only for smaller packets. Signed-off-by: Natalia Wochtman Reviewed-by: Aleksandr Loktionov Co-developed-by: Larysa Zaremba Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 7 + .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 181 +++++++++++++++--- 2 files changed, 164 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index ea86679e4f81d0..17958cfb4ee65b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -89,6 +89,7 @@ struct ixgbevf_ring { u32 truesize; /* Rx buffer full size */ u32 pending; /* Sent-not-completed descriptors */ }; + u32 hdr_truesize; /* Rx header buffer full size */ u16 count; /* amount of descriptors */ u16 next_to_clean; u32 next_to_use; @@ -107,6 +108,8 @@ struct ixgbevf_ring { struct ixgbevf_tx_queue_stats tx_stats; struct ixgbevf_rx_queue_stats rx_stats; }; + struct libeth_fqe *hdr_fqes; + struct page_pool *hdr_pp; struct xdp_rxq_info xdp_rxq; u64 hw_csum_rx_error; u8 __iomem *tail; @@ -151,6 +154,8 @@ struct ixgbevf_ring { #define IXGBEVF_RX_PAGE_LEN(hr) (ALIGN_DOWN(LIBETH_RX_PAGE_LEN(hr), \ IXGBE_SRRCTL_BSIZEPKT_STEP)) +#define IXGBEVF_RX_SRRCTL_BUF_SIZE(mtu) (ALIGN((mtu) + LIBETH_RX_LL_LEN, \ + IXGBE_SRRCTL_BSIZEPKT_STEP)) #define IXGBE_TX_FLAGS_CSUM BIT(0) #define IXGBE_TX_FLAGS_VLAN BIT(1) @@ -349,6 +354,8 @@ enum ixbgevf_state_t { __IXGBEVF_QUEUE_RESET_REQUESTED, }; +#define IXGBEVF_FLAG_HSPLIT BIT(0) + enum ixgbevf_boards { board_82599_vf, board_82599_vf_hv, diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index f26528cadb981f..7080ea5512c751 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -561,6 +561,12 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, .truesize = rx_ring->truesize, .count = rx_ring->count, }; + const struct libeth_fq_fp hdr_fq = { + .pp = rx_ring->hdr_pp, + .fqes = rx_ring->hdr_fqes, + .truesize = rx_ring->hdr_truesize, + .count = rx_ring->count, + }; u16 ntu = rx_ring->next_to_use; /* nothing to do or no valid netdev defined */ @@ -578,6 +584,14 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, rx_desc->read.pkt_addr = cpu_to_le64(addr); + if (hdr_fq.pp) { + addr = libeth_rx_alloc(&hdr_fq, ntu); + if (addr == DMA_MAPPING_ERROR) { + libeth_rx_recycle_slow(fq.fqes[ntu].netmem); + break; + } + } + rx_desc++; ntu++; if (unlikely(ntu == fq.count)) { @@ -701,6 +715,32 @@ LIBETH_XDP_DEFINE_FINALIZE(static ixgbevf_xdp_finalize_xdp_napi, ixgbevf_xdp_flush_tx, ixgbevf_xdp_rs_and_bump); LIBETH_XDP_DEFINE_END(); +static u32 ixgbevf_rx_hsplit_wa(const struct libeth_fqe *hdr, + struct libeth_fqe *buf, u32 data_len) +{ + u32 copy = data_len <= L1_CACHE_BYTES ? data_len : ETH_HLEN; + struct page *hdr_page, *buf_page; + const void *src; + void *dst; + + if (unlikely(netmem_is_net_iov(buf->netmem)) || + !libeth_rx_sync_for_cpu(buf, copy)) + return 0; + + hdr_page = __netmem_to_page(hdr->netmem); + buf_page = __netmem_to_page(buf->netmem); + + dst = page_address(hdr_page) + hdr->offset + + pp_page_to_nmdesc(hdr_page)->pp->p.offset; + src = page_address(buf_page) + buf->offset + + pp_page_to_nmdesc(buf_page)->pp->p.offset; + + memcpy(dst, src, LARGEST_ALIGN(copy)); + buf->offset += copy; + + return copy; +} + static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) @@ -740,6 +780,23 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, rmb(); rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; + + if (unlikely(rx_ring->hdr_pp)) { + struct libeth_fqe *hdr_buff; + unsigned int hdr_size = 0; + + hdr_buff = &rx_ring->hdr_fqes[rx_ring->next_to_clean]; + + if (!xdp->data) { + hdr_size = ixgbevf_rx_hsplit_wa(hdr_buff, + rx_buffer, + size); + size -= hdr_size ? : size; + } + + libeth_xdp_process_buff(xdp, hdr_buff, hdr_size); + } + libeth_xdp_process_buff(xdp, rx_buffer, size); cleaned_count++; @@ -1476,6 +1533,87 @@ static void ixgbevf_setup_vfmrqc(struct ixgbevf_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_VFMRQC, vfmrqc); } +static void ixgbevf_rx_destroy_pp(struct ixgbevf_ring *rx_ring) +{ + struct libeth_fq fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + }; + + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; + + if (!rx_ring->hdr_pp) + return; + + fq = (struct libeth_fq) { + .pp = rx_ring->hdr_pp, + .fqes = rx_ring->hdr_fqes, + }; + + libeth_rx_fq_destroy(&fq); + rx_ring->hdr_fqes = NULL; + rx_ring->hdr_pp = NULL; +} + +static int ixgbevf_rx_create_pp(struct ixgbevf_ring *rx_ring) +{ + u32 adapter_flags = rx_ring->q_vector->adapter->flags; + struct libeth_fq fq = { + .count = rx_ring->count, + .nid = NUMA_NO_NODE, + .type = LIBETH_FQE_MTU, + .xdp = !!rx_ring->xdp_prog, + .buf_len = IXGBEVF_RX_PAGE_LEN(rx_ring->xdp_prog ? + LIBETH_XDP_HEADROOM : + LIBETH_SKB_HEADROOM), + }; + u32 frame_size; + int ret; + + /* Some HW requires DMA write sizes to be aligned to 1K, + * which warrants fake header split usage, but this is + * not an issue if the frame size is at its maximum of 3K + */ + frame_size = + IXGBEVF_RX_SRRCTL_BUF_SIZE(READ_ONCE(rx_ring->netdev->mtu)); + fq.hsplit = (adapter_flags & IXGBEVF_FLAG_HSPLIT) && + frame_size < fq.buf_len; + ret = libeth_rx_fq_create(&fq, &rx_ring->q_vector->napi); + if (ret) + return ret; + + rx_ring->pp = fq.pp; + rx_ring->rx_fqes = fq.fqes; + rx_ring->truesize = fq.truesize; + rx_ring->rx_buf_len = fq.buf_len; + + if (!fq.hsplit) + return 0; + + fq = (struct libeth_fq) { + .count = rx_ring->count, + .nid = NUMA_NO_NODE, + .type = LIBETH_FQE_HDR, + .xdp = !!rx_ring->xdp_prog, + }; + + ret = libeth_rx_fq_create(&fq, &rx_ring->q_vector->napi); + if (ret) + goto err; + + rx_ring->hdr_pp = fq.pp; + rx_ring->hdr_fqes = fq.fqes; + rx_ring->hdr_truesize = fq.truesize; + + return 0; + +err: + ixgbevf_rx_destroy_pp(rx_ring); + return ret; +} + static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *ring) { @@ -1933,8 +2071,13 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) /* Free all the Rx ring pages */ for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { const struct libeth_fqe *rx_fqe = &rx_ring->rx_fqes[i]; + const struct libeth_fqe *hdr_fqe = rx_ring->hdr_fqes ? + &rx_ring->hdr_fqes[i] : + NULL; libeth_rx_recycle_slow(rx_fqe->netmem); + if (hdr_fqe) + libeth_rx_recycle_slow(hdr_fqe->netmem); if (unlikely(++i == rx_ring->count)) i = 0; } @@ -2596,6 +2739,9 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) goto out; } + if (adapter->hw.mac.type == ixgbe_mac_82599_vf) + adapter->flags |= IXGBEVF_FLAG_HSPLIT; + /* assume legacy case in which PF would only give VF 2 queues */ hw->mac.max_tx_queues = 2; hw->mac.max_rx_queues = 2; @@ -3030,42 +3176,29 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) } /** - * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors) + * ixgbevf_setup_rx_resources - allocate Rx resources * @adapter: board private structure * @rx_ring: Rx descriptor ring (for a specific queue) to setup * - * Returns 0 on success, negative on failure + * Returns: 0 on success, negative on failure. **/ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring) { - struct libeth_fq fq = { - .count = rx_ring->count, - .nid = NUMA_NO_NODE, - .type = LIBETH_FQE_MTU, - .xdp = !!rx_ring->xdp_prog, - .buf_len = IXGBEVF_RX_PAGE_LEN(rx_ring->xdp_prog ? - LIBETH_XDP_HEADROOM : - LIBETH_SKB_HEADROOM), - }; int ret; - ret = libeth_rx_fq_create(&fq, &rx_ring->q_vector->napi); + ret = ixgbevf_rx_create_pp(rx_ring); if (ret) return ret; - rx_ring->pp = fq.pp; - rx_ring->rx_fqes = fq.fqes; - rx_ring->truesize = fq.truesize; - rx_ring->rx_buf_len = fq.buf_len; - u64_stats_init(&rx_ring->syncp); /* Round up to nearest 4K */ rx_ring->dma_size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->dma_size = ALIGN(rx_ring->dma_size, 4096); - rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->dma_size, + rx_ring->desc = dma_alloc_coherent(rx_ring->pp->p.dev, + rx_ring->dma_size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) { @@ -3079,16 +3212,15 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, if (ret) goto err; - xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, fq.pp); + xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, rx_ring->pp); rcu_assign_pointer(rx_ring->xdp_prog, adapter->xdp_prog); return 0; err: - libeth_rx_fq_destroy(&fq); - rx_ring->rx_fqes = NULL; - rx_ring->pp = NULL; + ixgbevf_rx_destroy_pp(rx_ring); dev_err(rx_ring->dev, "Unable to allocate memory for the Rx descriptor ring\n"); + return ret; } @@ -4017,10 +4149,11 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog, struct bpf_prog *old_prog; bool requires_mbuf; - requires_mbuf = frame_size > IXGBEVF_RX_PAGE_LEN(LIBETH_XDP_HEADROOM); + requires_mbuf = frame_size > IXGBEVF_RX_PAGE_LEN(LIBETH_XDP_HEADROOM) || + adapter->flags & IXGBEVF_FLAG_HSPLIT; if (prog && !prog->aux->xdp_has_frags && requires_mbuf) { NL_SET_ERR_MSG_MOD(extack, - "Configured MTU requires non-linear frames and XDP prog does not support frags"); + "Configured MTU or HW limitations require non-linear frames and XDP prog does not support frags"); return -EOPNOTSUPP; } From 57d29b337f36d5e91f6e4139fdc6674967cc9154 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 25 Nov 2025 16:31:14 +0100 Subject: [PATCH 08/15] ixgbevf: reconfigure page pool when reallocating buffers Currently, when MTU is changed, page pool is not reconfigured, which leads to usage of suboptimal buffer sizes. Always destroy page pool when cleaning the ring up and create it anew when we first allocate Rx buffers. Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 80 ++++++++++--------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 7080ea5512c751..dcb3ebdedc6b2c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -569,8 +569,8 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, }; u16 ntu = rx_ring->next_to_use; - /* nothing to do or no valid netdev defined */ - if (unlikely(!cleaned_count || !rx_ring->netdev)) + /* nothing to do or page pool is not present */ + if (unlikely(!cleaned_count || !fq.pp)) return; rx_desc = IXGBEVF_RX_DESC(rx_ring, ntu); @@ -1540,6 +1540,14 @@ static void ixgbevf_rx_destroy_pp(struct ixgbevf_ring *rx_ring) .fqes = rx_ring->rx_fqes, }; + if (!fq.pp) + return; + + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) { + xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + } + libeth_rx_fq_destroy(&fq); rx_ring->rx_fqes = NULL; rx_ring->pp = NULL; @@ -1589,6 +1597,14 @@ static int ixgbevf_rx_create_pp(struct ixgbevf_ring *rx_ring) rx_ring->truesize = fq.truesize; rx_ring->rx_buf_len = fq.buf_len; + /* XDP RX-queue info */ + ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, 0, rx_ring->truesize); + if (ret) + goto err; + + xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, rx_ring->pp); + if (!fq.hsplit) return 0; @@ -1623,6 +1639,7 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, bool rlpml_valid = false; u64 rdba = ring->dma; u32 rxdctl; + int err; /* disable queue to avoid issues while updating state */ rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(reg_idx)); @@ -1656,6 +1673,14 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, ring->next_to_clean = 0; ring->next_to_use = 0; + err = ixgbevf_rx_create_pp(ring); + if (err) { + netdev_err(ring->netdev, + "Failed to create Page Pool for buffer allocation: (%pe), RxQ %d is disabled, driver reload may be needed\n", + ERR_PTR(err), ring->queue_index); + return; + } + /* RXDCTL.RLPML does not work on 82599 */ if (adapter->hw.mac.type != ixgbe_mac_82599_vf) { u32 pkt_len = @@ -2153,8 +2178,10 @@ static void ixgbevf_clean_all_rx_rings(struct ixgbevf_adapter *adapter) { int i; - for (i = 0; i < adapter->num_rx_queues; i++) + for (i = 0; i < adapter->num_rx_queues; i++) { ixgbevf_clean_rx_ring(adapter->rx_ring[i]); + ixgbevf_rx_destroy_pp(adapter->rx_ring[i]); + } } /** @@ -3175,6 +3202,11 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) return err; } +static struct device *ixgbevf_dma_dev_from_ring(struct ixgbevf_ring *ring) +{ + return &ring->q_vector->adapter->pdev->dev; +} + /** * ixgbevf_setup_rx_resources - allocate Rx resources * @adapter: board private structure @@ -3185,43 +3217,25 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring) { - int ret; - - ret = ixgbevf_rx_create_pp(rx_ring); - if (ret) - return ret; - u64_stats_init(&rx_ring->syncp); /* Round up to nearest 4K */ rx_ring->dma_size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->dma_size = ALIGN(rx_ring->dma_size, 4096); - rx_ring->desc = dma_alloc_coherent(rx_ring->pp->p.dev, + rx_ring->desc = dma_alloc_coherent(ixgbevf_dma_dev_from_ring(rx_ring), rx_ring->dma_size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) { - ret = -ENOMEM; - goto err; + dev_err(rx_ring->dev, + "Unable to allocate memory for the Rx descriptor ring\n"); + return -ENOMEM; } - /* XDP RX-queue info */ - ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, 0, rx_ring->truesize); - if (ret) - goto err; - - xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, rx_ring->pp); - rcu_assign_pointer(rx_ring->xdp_prog, adapter->xdp_prog); return 0; -err: - ixgbevf_rx_destroy_pp(rx_ring); - dev_err(rx_ring->dev, "Unable to allocate memory for the Rx descriptor ring\n"); - - return ret; } /** @@ -3262,24 +3276,14 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter) **/ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring) { - struct libeth_fq fq = { - .fqes = rx_ring->rx_fqes, - .pp = rx_ring->pp, - }; - ixgbevf_clean_rx_ring(rx_ring); - + ixgbevf_rx_destroy_pp(rx_ring); rcu_assign_pointer(rx_ring->xdp_prog, NULL); - xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - dma_free_coherent(fq.pp->p.dev, rx_ring->dma_size, rx_ring->desc, + dma_free_coherent(ixgbevf_dma_dev_from_ring(rx_ring), + rx_ring->dma_size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; - - libeth_rx_fq_destroy(&fq); - rx_ring->rx_fqes = NULL; - rx_ring->pp = NULL; } /** From e3676d79b20b2a806c688672a6a49ac90c7b1ba8 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Thu, 8 Jan 2026 13:44:03 +0100 Subject: [PATCH 09/15] ixgbevf: allow changing MTU when XDP program is attached xskxceiver attempts to change MTU after attaching XDP program, ixgbevf rejects the request leading to test being failed. Support MTU change operation even when XDP program is already attached, perform the same frame size check as when attaching a program. Reviewed-by: Aleksandr Loktionov Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index dcb3ebdedc6b2c..c1016fde105fea 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3969,6 +3969,18 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) return 0; } +static bool ixgbevf_xdp_mtu_ok(const struct ixgbevf_adapter *adapter, + const struct bpf_prog *prog, unsigned int mtu) +{ + u32 frame_size = mtu + LIBETH_RX_LL_LEN; + bool requires_mbuf; + + requires_mbuf = frame_size > IXGBEVF_RX_PAGE_LEN(LIBETH_XDP_HEADROOM) || + adapter->flags & IXGBEVF_FLAG_HSPLIT; + + return prog->aux->xdp_has_frags || !requires_mbuf; +} + /** * ixgbevf_change_mtu - Change the Maximum Transfer Unit * @netdev: network interface device structure @@ -3984,8 +3996,10 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) int ret; /* prevent MTU being changed to a size unsupported by XDP */ - if (adapter->xdp_prog) { - dev_warn(&adapter->pdev->dev, "MTU cannot be changed while XDP program is loaded\n"); + if (adapter->xdp_prog && + !ixgbevf_xdp_mtu_ok(adapter, adapter->xdp_prog, new_mtu)) { + netdev_warn(netdev, + "MTU value provided cannot be set while current XDP program is attached\n"); return -EPERM; } @@ -4148,14 +4162,10 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog, struct netlink_ext_ack *extack) { - u32 frame_size = READ_ONCE(dev->mtu) + LIBETH_RX_LL_LEN; struct ixgbevf_adapter *adapter = netdev_priv(dev); struct bpf_prog *old_prog; - bool requires_mbuf; - requires_mbuf = frame_size > IXGBEVF_RX_PAGE_LEN(LIBETH_XDP_HEADROOM) || - adapter->flags & IXGBEVF_FLAG_HSPLIT; - if (prog && !prog->aux->xdp_has_frags && requires_mbuf) { + if (prog && !ixgbevf_xdp_mtu_ok(adapter, prog, READ_ONCE(dev->mtu))) { NL_SET_ERR_MSG_MOD(extack, "Configured MTU or HW limitations require non-linear frames and XDP prog does not support frags"); return -EOPNOTSUPP; From 1160e99f45068428047019d630d17790e8ddf658 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 24 Oct 2025 12:19:51 +0200 Subject: [PATCH 10/15] ixgbevf: move skb-filling code to a header AF_XDP ZC Rx path is also required to implement skb creation. Move all common functions to a header file as inlines. Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 167 +----------------- .../ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h | 162 +++++++++++++++++ 2 files changed, 164 insertions(+), 165 deletions(-) create mode 100644 drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index c1016fde105fea..f301ded0067bc4 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -33,6 +33,7 @@ #include #include +#include "ixgbevf_txrx_lib.h" #include "ixgbevf_xdp_lib.h" const char ixgbevf_driver_name[] = "ixgbevf"; @@ -418,134 +419,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, return !!budget; } -/** - * ixgbevf_rx_skb - Helper function to determine proper Rx method - * @q_vector: structure containing interrupt and ring information - * @skb: packet to send up - **/ -static void ixgbevf_rx_skb(struct ixgbevf_q_vector *q_vector, - struct sk_buff *skb) -{ - napi_gro_receive(&q_vector->napi, skb); -} - -#define IXGBE_RSS_L4_TYPES_MASK \ - ((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP)) - -static inline void ixgbevf_rx_hash(struct ixgbevf_ring *ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - u16 rss_type; - - if (!(ring->netdev->features & NETIF_F_RXHASH)) - return; - - rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & - IXGBE_RXDADV_RSSTYPE_MASK; - - if (!rss_type) - return; - - skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), - (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ? - PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); -} - -/** - * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum - * @ring: structure containig ring specific data - * @rx_desc: current Rx descriptor being processed - * @skb: skb currently being received and modified - **/ -static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - skb_checksum_none_assert(skb); - - /* Rx csum disabled */ - if (!(ring->netdev->features & NETIF_F_RXCSUM)) - return; - - /* if IP and error */ - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) && - ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) { - ring->rx_stats.csum_err++; - return; - } - - if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS)) - return; - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { - ring->rx_stats.csum_err++; - return; - } - - /* It must be a TCP or UDP packet with a valid checksum */ - skb->ip_summed = CHECKSUM_UNNECESSARY; -} - -/** - * ixgbevf_process_skb_fields - Populate skb header fields from Rx descriptor - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being populated - * - * This function checks the ring, descriptor, and packet information in - * order to populate the checksum, VLAN, protocol, and other fields within - * the skb. - **/ -static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - ixgbevf_rx_hash(rx_ring, rx_desc, skb); - ixgbevf_rx_checksum(rx_ring, rx_desc, skb); - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { - u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); - unsigned long *active_vlans = netdev_priv(rx_ring->netdev); - - if (test_bit(vid & VLAN_VID_MASK, active_vlans)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); - } - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) - ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); -} - -/** - * ixgbevf_is_non_eop - process handling of non-EOP buffers - * @rx_ring: Rx ring being processed - * @rx_desc: Rx descriptor for current buffer - * - * This function updates next to clean. If the buffer is an EOP buffer - * this function exits returning false, otherwise it will place the - * sk_buff in the next buffer to be chained and return true indicating - * that this is in fact a non-EOP buffer. - **/ -static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc) -{ - u32 ntc = rx_ring->next_to_clean + 1; - - /* fetch, update, and store next to clean */ - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - - prefetch(IXGBEVF_RX_DESC(rx_ring, ntc)); - - if (likely(ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) - return false; - - return true; -} - /** * ixgbevf_alloc_rx_buffers - Replace used receive buffers; packet split * @rx_ring: rx descriptor ring (for a specific queue) to setup buffers on @@ -619,42 +492,6 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, } } -/** - * ixgbevf_cleanup_headers - Correct corrupted or empty headers - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being fixed - * - * Check for corrupted packet headers caused by senders on the local L2 - * embedded NIC switch not setting up their Tx Descriptors right. These - * should be very rare. - * - * Also address the case where we are pulling data in on pages only - * and as such no data is present in the skb header. - * - * In addition if skb is not at least 60 bytes we need to pad it so that - * it is large enough to qualify as a valid Ethernet frame. - * - * Returns true if an error was encountered and skb was freed. - **/ -static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - /* verify that the packet does not have any known errors */ - if (unlikely(ixgbevf_test_staterr(rx_desc, - IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { - struct net_device *netdev = rx_ring->netdev; - - if (!(netdev->features & NETIF_F_RXALL)) { - dev_kfree_skb_any(skb); - return true; - } - } - - return false; -} - static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, u32 qmask) { @@ -842,7 +679,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* populate checksum, VLAN, and protocol */ ixgbevf_process_skb_fields(rx_ring, rx_desc, skb); - ixgbevf_rx_skb(q_vector, skb); + napi_gro_receive(&q_vector->napi, skb); } /* place incomplete frames back on ring for completion */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h new file mode 100644 index 00000000000000..851d1c054d9810 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2010-2026 Intel Corporation */ + +#ifndef _IXGBEVF_TXRX_LIB_H_ +#define _IXGBEVF_TXRX_LIB_H_ + +#include "ixgbevf.h" + +/** + * ixgbevf_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static inline bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(IXGBEVF_RX_DESC(rx_ring, ntc)); + + if (likely(ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; + + return true; +} + +/** + * ixgbevf_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Check for corrupted packet headers caused by senders on the local L2 + * embedded NIC switch not setting up their Tx Descriptors right. These + * should be very rare. + * + * Also address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static inline bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + /* verify that the packet does not have any known errors */ + if (unlikely(ixgbevf_test_staterr(rx_desc, + IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { + struct net_device *netdev = rx_ring->netdev; + + if (!(netdev->features & NETIF_F_RXALL)) { + dev_kfree_skb_any(skb); + return true; + } + } + + return false; +} + +#define IXGBE_RSS_L4_TYPES_MASK \ + ((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP)) + +static inline void ixgbevf_rx_hash(struct ixgbevf_ring *ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u16 rss_type; + + if (!(ring->netdev->features & NETIF_F_RXHASH)) + return; + + rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & + IXGBE_RXDADV_RSSTYPE_MASK; + + if (!rss_type) + return; + + skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ? + PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); +} + +/** + * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum + * @ring: structure containing ring specific data + * @rx_desc: current Rx descriptor being processed + * @skb: skb currently being received and modified + **/ +static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + /* Rx csum disabled */ + if (!(ring->netdev->features & NETIF_F_RXCSUM)) + return; + + /* if IP and error */ + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) && + ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) { + ring->rx_stats.csum_err++; + return; + } + + if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS)) + return; + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { + ring->rx_stats.csum_err++; + return; + } + + /* It must be a TCP or UDP packet with a valid checksum */ + skb->ip_summed = CHECKSUM_UNNECESSARY; +} + +/** + * ixgbevf_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * + * This function checks the ring, descriptor, and packet information in + * order to populate the checksum, VLAN, protocol, and other fields within + * the skb. + **/ +static inline void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + ixgbevf_rx_hash(rx_ring, rx_desc, skb); + ixgbevf_rx_checksum(rx_ring, rx_desc, skb); + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { + u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); + unsigned long *active_vlans = netdev_priv(rx_ring->netdev); + + if (test_bit(vid & VLAN_VID_MASK, active_vlans)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); + } + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) + ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); +} + +#endif /* _IXGBEVF_TXRX_LIB_H_ */ From ad388e475a0ac7f16120c48c68606d4a285e4f89 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 27 Oct 2025 13:32:49 +0100 Subject: [PATCH 11/15] ixgbevf: implement AF_XDP ZC initialization Implement xsk_buff_pool configuration and supporting functionality, such as a single queue pair reconfiguration. Also, properly initialize Rx buffers. Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/Makefile | 2 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 32 +++- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 155 ++++++++++++++---- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.c | 131 +++++++++++++++ .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.h | 12 ++ 5 files changed, 297 insertions(+), 35 deletions(-) create mode 100644 drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c create mode 100644 drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile index 01d3e892f3fa7b..cdae62f25fd926 100644 --- a/drivers/net/ethernet/intel/ixgbevf/Makefile +++ b/drivers/net/ethernet/intel/ixgbevf/Makefile @@ -6,5 +6,5 @@ obj-$(CONFIG_IXGBEVF) += ixgbevf.o -ixgbevf-y := vf.o mbx.o ethtool.o ixgbevf_main.o +ixgbevf-y := vf.o mbx.o ethtool.o ixgbevf_main.o ixgbevf_xsk.o ixgbevf-$(CONFIG_IXGBEVF_IPSEC) += ipsec.o diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 17958cfb4ee65b..d8f841515ca62a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -66,6 +66,7 @@ enum ixgbevf_ring_state_t { __IXGBEVF_HANG_CHECK_ARMED, __IXGBEVF_TX_XDP_RING, __IXGBEVF_TX_XDP_RING_PRIMED, + __IXGBEVF_RXTX_XSK_RING, }; #define ring_is_xdp(ring) \ @@ -75,6 +76,13 @@ enum ixgbevf_ring_state_t { #define clear_ring_xdp(ring) \ clear_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state) +#define ring_is_xsk(ring) \ + test_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) +#define set_ring_xsk(ring) \ + set_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) +#define clear_ring_xsk(ring) \ + clear_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) + struct ixgbevf_ring { struct ixgbevf_ring *next; struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */ @@ -85,22 +93,22 @@ struct ixgbevf_ring { struct device *dev; /* Tx ring */ }; void *desc; /* descriptor ring memory */ - union { - u32 truesize; /* Rx buffer full size */ - u32 pending; /* Sent-not-completed descriptors */ - }; + u32 truesize; /* Rx buffer full size */ u32 hdr_truesize; /* Rx header buffer full size */ u16 count; /* amount of descriptors */ u16 next_to_clean; u32 next_to_use; + u32 pending; /* Sent-not-completed descriptors */ union { struct libeth_fqe *rx_fqes; + struct libeth_xdp_buff **xsk_fqes; struct ixgbevf_tx_buffer *tx_buffer_info; struct libeth_sqe *xdp_sqes; }; struct libeth_xdpsq_lock xdpq_lock; u32 cached_ntu; + u32 thresh; unsigned long state; struct ixgbevf_stats stats; struct u64_stats_sync syncp; @@ -121,8 +129,10 @@ struct ixgbevf_ring { int queue_index; /* needed for multiqueue queue management */ u32 rx_buf_len; struct libeth_xdp_buff_stash xdp_stash; + struct libeth_xdp_buff *xsk_xdp_head; unsigned int dma_size; /* length in bytes */ dma_addr_t dma; /* phys. address of descriptor ring */ + struct xsk_buff_pool *xsk_pool; /* AF_XDP ZC rings */ } ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ @@ -399,14 +409,28 @@ int ixgbevf_open(struct net_device *netdev); int ixgbevf_close(struct net_device *netdev); void ixgbevf_up(struct ixgbevf_adapter *adapter); void ixgbevf_down(struct ixgbevf_adapter *adapter); +void ixgbevf_flush_tx_queue(struct ixgbevf_ring *ring); +void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); +void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter); void ixgbevf_reset(struct ixgbevf_adapter *adapter); void ixgbevf_set_ethtool_ops(struct net_device *netdev); int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring); +void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter); +void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); int ixgbevf_setup_tx_resources(struct ixgbevf_ring *); +void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); void ixgbevf_free_rx_resources(struct ixgbevf_ring *); +void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring); +void ixgbevf_rx_destroy_pp(struct ixgbevf_ring *rx_ring); void ixgbevf_free_tx_resources(struct ixgbevf_ring *); +void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring); +void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring); void ixgbevf_update_stats(struct ixgbevf_adapter *adapter); int ethtool_ioctl(struct ifreq *ifr); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index f301ded0067bc4..d5dc52d04d30e3 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -30,11 +30,12 @@ #include #include #include -#include +#include #include #include "ixgbevf_txrx_lib.h" #include "ixgbevf_xdp_lib.h" +#include "ixgbevf_xsk.h" const char ixgbevf_driver_name[] = "ixgbevf"; static const char ixgbevf_driver_string[] = @@ -500,7 +501,7 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); } -static void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring) +void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring) { ixgbevf_clean_xdp_num(xdp_ring, false, xdp_ring->pending); libeth_xdpsq_put(&xdp_ring->xdpq_lock, xdp_ring->netdev); @@ -1122,7 +1123,7 @@ static inline void ixgbevf_irq_disable(struct ixgbevf_adapter *adapter) * ixgbevf_irq_enable - Enable default interrupt generation settings * @adapter: board private structure **/ -static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) +void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -1131,6 +1132,24 @@ static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, adapter->eims_enable_mask); } +/** + * ixgbevf_xsk_pool_from_q - get ZC XSK buffer pool bound to a queue ID + * @ring: Rx or Tx ring + * + * Return: A pointer to xsk_buff_pool structure if there is a buffer pool + * attached, configured as zero-copy, and usable by this queue, NULL otherwise. + */ +static struct xsk_buff_pool *ixgbevf_xsk_pool_from_q(struct ixgbevf_ring *ring) +{ + struct xsk_buff_pool *pool = + xsk_get_pool_from_qid(ring->netdev, ring->queue_index); + + if (!READ_ONCE(ring->xdp_prog) && !ring_is_xdp(ring)) + return NULL; + + return (pool && pool->dev) ? pool : NULL; +} + /** * ixgbevf_configure_tx_ring - Configure 82599 VF Tx ring after Reset * @adapter: board private structure @@ -1138,8 +1157,8 @@ static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) * * Configure the Tx descriptor ring after a reset. **/ -static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; u64 tdba = ring->dma; @@ -1197,6 +1216,12 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, num_possible_cpus() > adapter->num_xdp_queues); } + ring->xsk_pool = ixgbevf_xsk_pool_from_q(ring); + if (ring_is_xdp(ring) && ring->xsk_pool) + set_ring_xsk(ring); + else + clear_ring_xsk(ring); + clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); @@ -1266,8 +1291,8 @@ static void ixgbevf_setup_psrtype(struct ixgbevf_adapter *adapter) } #define IXGBEVF_MAX_RX_DESC_POLL 10 -static void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; int wait_loop = IXGBEVF_MAX_RX_DESC_POLL; @@ -1291,10 +1316,15 @@ static void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, if (!wait_loop) pr_err("RXDCTL.ENABLE queue %d not cleared while polling\n", reg_idx); + + /* Specification calls for 100 usec of delay after + * RXDCTL.ENABLE is cleared + */ + usleep_range(100, 200); } -static void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; int wait_loop = IXGBEVF_MAX_RX_DESC_POLL; @@ -1370,14 +1400,14 @@ static void ixgbevf_setup_vfmrqc(struct ixgbevf_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_VFMRQC, vfmrqc); } -static void ixgbevf_rx_destroy_pp(struct ixgbevf_ring *rx_ring) +void ixgbevf_rx_destroy_pp(struct ixgbevf_ring *rx_ring) { struct libeth_fq fq = { .pp = rx_ring->pp, .fqes = rx_ring->rx_fqes, }; - if (!fq.pp) + if (!fq.pp && !rx_ring->xsk_fqes) return; if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) { @@ -1385,6 +1415,21 @@ static void ixgbevf_rx_destroy_pp(struct ixgbevf_ring *rx_ring) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); } + if (test_and_clear_bit(__IXGBEVF_RXTX_XSK_RING, &rx_ring->state)) { + struct libeth_xskfq xskfq = { + .fqes = rx_ring->xsk_fqes, + }; + + libeth_xskfq_destroy(&xskfq); + rx_ring->xsk_fqes = NULL; + rx_ring->pending = xskfq.pending; + rx_ring->thresh = xskfq.thresh; + rx_ring->rx_buf_len = xskfq.buf_len; + rx_ring->xsk_pool = NULL; + + return; + } + libeth_rx_fq_destroy(&fq); rx_ring->rx_fqes = NULL; rx_ring->pp = NULL; @@ -1414,9 +1459,44 @@ static int ixgbevf_rx_create_pp(struct ixgbevf_ring *rx_ring) LIBETH_XDP_HEADROOM : LIBETH_SKB_HEADROOM), }; + struct xsk_buff_pool *pool; u32 frame_size; int ret; + pool = ixgbevf_xsk_pool_from_q(rx_ring); + if (pool) { + u32 frag_sz = xsk_pool_get_rx_frag_step(pool); + struct libeth_xskfq xskfq = { + .nid = numa_node_id(), + .count = rx_ring->count, + .pool = pool, + }; + + ret = libeth_xskfq_create(&xskfq); + if (ret) + return ret; + + rx_ring->xsk_pool = xskfq.pool; + rx_ring->xsk_fqes = xskfq.fqes; + rx_ring->pending = xskfq.count - 1; + rx_ring->thresh = xskfq.thresh; + rx_ring->rx_buf_len = xskfq.buf_len; + set_ring_xsk(rx_ring); + + ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, 0, frag_sz); + if (ret) + goto err; + + ret = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + rx_ring->xsk_pool); + if (ret) + goto err; + + return 0; + } + /* Some HW requires DMA write sizes to be aligned to 1K, * which warrants fake header split usage, but this is * not an issue if the frame size is at its maximum of 3K @@ -1467,8 +1547,8 @@ static int ixgbevf_rx_create_pp(struct ixgbevf_ring *rx_ring) return ret; } -static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; union ixgbe_adv_rx_desc *rx_desc; @@ -1509,6 +1589,7 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, /* reset ntu and ntc to place SW in sync with hardwdare */ ring->next_to_clean = 0; ring->next_to_use = 0; + ring->pending = ixgbevf_desc_unused(ring); err = ixgbevf_rx_create_pp(ring); if (err) { @@ -1526,7 +1607,8 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | IXGBE_RXDCTL_RLPML_EN); if (pkt_len <= IXGBE_RXDCTL_RLPMLMASK) { rxdctl |= pkt_len | IXGBE_RXDCTL_RLPML_EN; - rlpml_valid = true; + if (pkt_len <= ring->rx_buf_len) + rlpml_valid = true; } } @@ -1536,7 +1618,11 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl); ixgbevf_rx_desc_queue_enable(adapter, ring); - ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring)); + + if (ring_is_xsk(ring)) + ixgbevf_xsk_alloc_rx_bufs(ring, ring->pending); + else + ixgbevf_alloc_rx_buffers(ring, ring->pending); } /** @@ -1925,8 +2011,13 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) * ixgbevf_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from **/ -static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) +void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) { + if (ring_is_xsk(rx_ring)) { + ixgbevf_rx_xsk_ring_free_buffs(rx_ring); + goto reset; + } + /* Free Rx ring sk_buff */ libeth_xdp_return_stash(&rx_ring->xdp_stash); @@ -1944,15 +2035,17 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) i = 0; } +reset: rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + rx_ring->pending = 0; } /** * ixgbevf_clean_tx_ring - Free Tx Buffers * @tx_ring: ring to be cleaned **/ -static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) +void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) { u16 i = tx_ring->next_to_clean; struct ixgbevf_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; @@ -2035,10 +2128,17 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter) ixgbevf_clean_xdp_ring(adapter->xdp_ring[i]); } +void ixgbevf_flush_tx_queue(struct ixgbevf_ring *ring) +{ + u8 reg_idx = ring->reg_idx; + + IXGBE_WRITE_REG(&ring->q_vector->adapter->hw, IXGBE_VFTXDCTL(reg_idx), + IXGBE_TXDCTL_SWFLSH); +} + void ixgbevf_down(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; - struct ixgbe_hw *hw = &adapter->hw; int i; /* signal that we are down to the interrupt handler */ @@ -2064,19 +2164,11 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) timer_delete_sync(&adapter->service_timer); /* disable transmits in the hardware now that interrupts are off */ - for (i = 0; i < adapter->num_tx_queues; i++) { - u8 reg_idx = adapter->tx_ring[i]->reg_idx; - - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), - IXGBE_TXDCTL_SWFLSH); - } - - for (i = 0; i < adapter->num_xdp_queues; i++) { - u8 reg_idx = adapter->xdp_ring[i]->reg_idx; + for (i = 0; i < adapter->num_tx_queues; i++) + ixgbevf_flush_tx_queue(adapter->tx_ring[i]); - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), - IXGBE_TXDCTL_SWFLSH); - } + for (i = 0; i < adapter->num_xdp_queues; i++) + ixgbevf_flush_tx_queue(adapter->xdp_ring[i]); if (!pci_channel_offline(adapter->pdev)) ixgbevf_reset(adapter); @@ -4047,6 +4139,9 @@ static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return ixgbevf_xdp_setup(dev, xdp->prog, xdp->extack); + case XDP_SETUP_XSK_POOL: + return ixgbevf_setup_xsk_pool(netdev_priv(dev), xdp->xsk.pool, + xdp->xsk.queue_id); default: return -EINVAL; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c new file mode 100644 index 00000000000000..134e7670018700 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2026 Intel Corporation */ + +#include + +#include "ixgbevf.h" +#include "ixgbevf_xsk.h" + +/** + * ixgbevf_single_irq_disable - Mask off interrupt generation on a single vector + * @adapter: board private structure + * @vidx: vector id + **/ +static void ixgbevf_single_irq_disable(struct ixgbevf_adapter *adapter, + u16 vidx) +{ + struct ixgbe_hw *hw = &adapter->hw; + + IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, + adapter->eims_enable_mask & ~BIT(vidx)); + IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, BIT(vidx)); + IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, + adapter->eims_enable_mask & ~BIT(vidx)); + + IXGBE_WRITE_FLUSH(hw); + + synchronize_irq(adapter->msix_entries[vidx].vector); +} + +static void ixgbevf_qp_dis(struct ixgbevf_adapter *adapter, u16 qid) +{ + struct ixgbevf_ring *tx_ring, *rx_ring = adapter->rx_ring[qid]; + struct ixgbevf_q_vector *q_vector = rx_ring->q_vector; + + netif_stop_subqueue(adapter->netdev, qid); + ixgbevf_single_irq_disable(adapter, q_vector->v_idx); + napi_disable(&q_vector->napi); + + ixgbevf_disable_rx_queue(adapter, adapter->rx_ring[qid]); + ixgbevf_clean_rx_ring(rx_ring); + ixgbevf_rx_destroy_pp(rx_ring); + + /* Clean both XDP and normal Tx queue */ + ixgbevf_for_each_ring(tx_ring, q_vector->tx) { + ixgbevf_flush_tx_queue(tx_ring); + if (ring_is_xdp(tx_ring)) + ixgbevf_clean_xdp_ring(tx_ring); + else + ixgbevf_clean_tx_ring(tx_ring); + } +} + +static void ixgbevf_qp_ena(struct ixgbevf_adapter *adapter, u16 qid) +{ + struct ixgbevf_ring *tx_ring, *rx_ring = adapter->rx_ring[qid]; + struct ixgbevf_q_vector *q_vector = rx_ring->q_vector; + + ixgbevf_configure_rx_ring(adapter, rx_ring); + ixgbevf_for_each_ring(tx_ring, q_vector->tx) + ixgbevf_configure_tx_ring(adapter, tx_ring); + + napi_enable(&q_vector->napi); + ixgbevf_irq_enable(adapter); + netif_start_subqueue(adapter->netdev, qid); +} + +int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, + struct xsk_buff_pool *pool, u16 qid) +{ + bool running = !test_bit(__IXGBEVF_DOWN, &adapter->state) && + adapter->xdp_prog; + int err; + + if (running) + ixgbevf_qp_dis(adapter, qid); + + err = libeth_xsk_setup_pool(adapter->netdev, qid, !!pool); + + if (running) + ixgbevf_qp_ena(adapter, qid); + + return err; +} + +static void ixgbevf_fill_rx_xsk_desc(const struct libeth_xskfq_fp *fq, u32 i) +{ + union ixgbe_adv_rx_desc *rx_desc = + &((union ixgbe_adv_rx_desc *)fq->descs)[i]; + + rx_desc->read.pkt_addr = + cpu_to_le64(libeth_xsk_buff_xdp_get_dma(fq->fqes[i])); + rx_desc->wb.upper.length = 0; +} + +void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num) +{ + struct libeth_xskfq_fp fq = { + .count = rx_ring->count, + .descs = rx_ring->desc, + .fqes = rx_ring->xsk_fqes, + .ntu = rx_ring->next_to_use, + .pool = rx_ring->xsk_pool, + }; + u32 done; + + done = libeth_xskfqe_alloc(&fq, num, ixgbevf_fill_rx_xsk_desc); + if (likely(done)) { + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(rx_ring, fq.ntu); + } + + rx_ring->next_to_use = fq.ntu; + rx_ring->pending -= done; +} + +void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring) +{ + u32 ntc = rx_ring->next_to_clean; + + if (rx_ring->xsk_xdp_head) + xsk_buff_free(&rx_ring->xsk_xdp_head->base); + + rx_ring->xsk_xdp_head = NULL; + + while (ntc != rx_ring->next_to_use) { + xsk_buff_free(&rx_ring->xsk_fqes[ntc]->base); + ntc++; + ntc = ntc == rx_ring->count ? 0 : ntc; + } +} diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h new file mode 100644 index 00000000000000..1cbcea803509b6 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2026 Intel Corporation */ + +#ifndef _IXGBEVF_XSK_H_ +#define _IXGBEVF_XSK_H_ + +int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, + struct xsk_buff_pool *pool, u16 qid); +void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num); +void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring); + +#endif /* _IXGBEVF_XSK_H_ */ From a0ee7f6cbdaad3e1f44354bda77a5f0f9499b2cd Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 27 Oct 2025 14:21:11 +0100 Subject: [PATCH 12/15] ixgbevf: implement AF_XDP zero-copy Tx Add code that handles Tx ZC queues inside of napi_poll(), utilize libeth. As NIC's multiple buffer conventions do not play nicely with AF_XDP's, leave handling of segments for later. Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 14 +++++--- .../ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h | 20 ++++++++--- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.c | 34 ++++++++++++++++++- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.h | 5 +++ 4 files changed, 64 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d5dc52d04d30e3..ab7223aa70cd9a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -733,10 +733,13 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) bool clean_complete = true; ixgbevf_for_each_ring(ring, q_vector->tx) { - if (ring_is_xdp(ring)) - continue; - if (!ixgbevf_clean_tx_irq(q_vector, ring, budget)) - clean_complete = false; + if (ring_is_xsk(ring)) + clean_complete &= + ixgbevf_clean_xsk_tx_irq(q_vector, ring, + budget); + else if (!ring_is_xdp(ring)) + clean_complete &= + ixgbevf_clean_tx_irq(q_vector, ring, budget); } if (budget <= 0) @@ -1222,6 +1225,9 @@ void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, else clear_ring_xsk(ring); + ring->thresh = ring_is_xsk(ring) ? IXGBEVF_XSK_TX_CLEAN_THRESH(ring) : + XDP_BULK_QUEUE_SIZE; + clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h index da4f397944ed88..08b0c427d1a3b5 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h @@ -4,7 +4,7 @@ #ifndef _IXGBEVF_XDP_LIB_H_ #define _IXGBEVF_XDP_LIB_H_ -#include +#include #include "ixgbevf.h" @@ -40,6 +40,7 @@ static inline void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, u16 to_clean) { struct libeth_xdpsq_napi_stats stats = { }; + bool xsk_ring = ring_is_xsk(xdp_ring); u32 ntc = xdp_ring->next_to_clean; struct xdp_frame_bulk cbulk; struct libeth_cq_pp cp = { @@ -48,11 +49,14 @@ static inline void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, .xss = &stats, .napi = in_napi, }; + u32 xsk_frames = 0; xdp_frame_bulk_init(&cbulk); xdp_ring->pending -= to_clean; while (likely(to_clean--)) { + xsk_frames += xsk_ring && + likely(!xdp_ring->xdp_sqes[ntc].type) ? 1 : 0; libeth_xdp_complete_tx(&xdp_ring->xdp_sqes[ntc], &cp); ntc++; ntc = unlikely(ntc == xdp_ring->count) ? 0 : ntc; @@ -60,6 +64,8 @@ static inline void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, xdp_ring->next_to_clean = ntc; xdp_flush_frame_bulk(&cbulk); + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); } static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) @@ -67,8 +73,8 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) struct ixgbevf_ring *xdp_ring = xdpsq; libeth_xdpsq_lock(&xdp_ring->xdpq_lock); - if (unlikely(ixgbevf_desc_unused(xdp_ring) < LIBETH_XDP_TX_BULK)) { - u16 to_clean = ixgbevf_tx_get_num_sent(xdp_ring); + if (unlikely(ixgbevf_desc_unused(xdp_ring) < xdp_ring->thresh)) { + u16 to_clean = ixgbevf_tx_get_num_sent(xdpsq); if (likely(to_clean)) ixgbevf_clean_xdp_num(xdp_ring, true, to_clean); @@ -91,6 +97,7 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) xdp_ring->next_to_use = 1; xdp_ring->pending = 1; + xdp_ring->xdp_sqes[0].type = LIBETH_SQE_CTX; /* Finish descriptor writes before bumping tail */ wmb(); @@ -103,7 +110,7 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) .lock = &xdp_ring->xdpq_lock, .ntu = &xdp_ring->next_to_use, .pending = &xdp_ring->pending, - .pool = NULL, + .pool = xdp_ring->xsk_pool, .sqes = xdp_ring->xdp_sqes, }; @@ -123,6 +130,11 @@ static inline void ixgbevf_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) goto unlock; ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; + + /* We will not get DD on a context descriptor */ + if (unlikely(xdp_ring->xdp_sqes[ltu].type == LIBETH_SQE_CTX)) + goto unlock; + desc = IXGBEVF_TX_DESC(xdp_ring, ltu); desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c index 134e7670018700..e1873ebfe9a0a5 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -3,7 +3,8 @@ #include -#include "ixgbevf.h" +#include "ixgbevf_txrx_lib.h" +#include "ixgbevf_xdp_lib.h" #include "ixgbevf_xsk.h" /** @@ -129,3 +130,34 @@ void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring) ntc = ntc == rx_ring->count ? 0 : ntc; } } + +static void ixgbevf_xsk_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, u64 priv) +{ + union ixgbe_adv_tx_desc *tx_desc = + &((union ixgbe_adv_tx_desc *)sq->descs)[i]; + + u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS | + IXGBE_TXD_CMD_EOP | + desc.len; + + tx_desc->read.olinfo_status = + cpu_to_le32((desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + + tx_desc->read.buffer_addr = cpu_to_le64(desc.addr); + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); +} + +bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *tx_ring, int napi_budget) +{ + u32 budget = min_t(u32, napi_budget, tx_ring->thresh); + + return libeth_xsk_xmit_do_bulk(tx_ring->xsk_pool, tx_ring, budget, + NULL, ixgbevf_prep_xdp_sq, + ixgbevf_xsk_xmit_desc, + ixgbevf_xdp_rs_and_bump); +} diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h index 1cbcea803509b6..eda3e9b9554763 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h @@ -4,9 +4,14 @@ #ifndef _IXGBEVF_XSK_H_ #define _IXGBEVF_XSK_H_ +/* Process completions as soon as possible */ +#define IXGBEVF_XSK_TX_CLEAN_THRESH(r) ((r)->count - 1) + int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, struct xsk_buff_pool *pool, u16 qid); void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num); void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring); +bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *tx_ring, int napi_budget); #endif /* _IXGBEVF_XSK_H_ */ From bcdbbddf29b27e9d6684f0f4a6921803585c8db4 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 27 Oct 2025 14:24:57 +0100 Subject: [PATCH 13/15] ixgbevf: implement AF_XDP zero-copy Rx Add code that handles AF_XDP ZC Rx queues inside of napi_poll(), utilize libeth helpers. Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 5 +- .../ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h | 1 + .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.c | 118 +++++++++++++++++- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.h | 4 +- 4 files changed, 125 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index ab7223aa70cd9a..6d074825217cba 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -754,7 +754,10 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) per_ring_budget = budget; ixgbevf_for_each_ring(ring, q_vector->rx) { - int cleaned = ixgbevf_clean_rx_irq(q_vector, ring, + int cleaned = ring_is_xsk(ring) ? + ixgbevf_clean_xsk_rx_irq(q_vector, ring, + per_ring_budget) : + ixgbevf_clean_rx_irq(q_vector, ring, per_ring_budget); work_done += cleaned; if (cleaned >= per_ring_budget) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h index 851d1c054d9810..ac36fac9f79717 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h @@ -24,6 +24,7 @@ static inline bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, /* fetch, update, and store next to clean */ ntc = (ntc < rx_ring->count) ? ntc : 0; rx_ring->next_to_clean = ntc; + rx_ring->pending++; prefetch(IXGBEVF_RX_DESC(rx_ring, ntc)); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c index e1873ebfe9a0a5..992fbb955fde35 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -63,6 +63,7 @@ static void ixgbevf_qp_ena(struct ixgbevf_adapter *adapter, u16 qid) napi_enable(&q_vector->napi); ixgbevf_irq_enable(adapter); netif_start_subqueue(adapter->netdev, qid); + napi_schedule(&q_vector->napi); } int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, @@ -93,7 +94,7 @@ static void ixgbevf_fill_rx_xsk_desc(const struct libeth_xskfq_fp *fq, u32 i) rx_desc->wb.upper.length = 0; } -void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num) +bool ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num) { struct libeth_xskfq_fp fq = { .count = rx_ring->count, @@ -113,6 +114,8 @@ void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num) rx_ring->next_to_use = fq.ntu; rx_ring->pending -= done; + + return done == num; } void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring) @@ -151,6 +154,119 @@ static void ixgbevf_xsk_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); } +LIBETH_XDP_DEFINE_START(); +LIBETH_XSK_DEFINE_FLUSH_TX(static ixgbevf_xsk_flush_tx, ixgbevf_prep_xdp_sq, + ixgbevf_xsk_xmit_desc); +LIBETH_XSK_DEFINE_RUN_PROG(static ixgbevf_xsk_run_prog, ixgbevf_xsk_flush_tx); +LIBETH_XSK_DEFINE_FINALIZE(static ixgbevf_xsk_finalize_xdp_napi, + ixgbevf_xsk_flush_tx, ixgbevf_xdp_rs_and_bump); +LIBETH_XDP_DEFINE_END(); + +u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *rx_ring, int budget) +{ + struct ixgbevf_adapter *adapter = q_vector->adapter; + u32 total_rx_bytes = 0, total_rx_packets = 0; + LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); + struct libeth_xdp_buff *head_xdp; + bool failure = false, wake; + struct sk_buff *skb; + + wake = xsk_uses_need_wakeup(rx_ring->xsk_pool); + if (wake) + xsk_clear_rx_need_wakeup(rx_ring->xsk_pool); + + head_xdp = rx_ring->xsk_xdp_head; + libeth_xsk_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, adapter->xdp_ring, + adapter->num_xdp_queues); + + while (likely(total_rx_packets < budget)) { + union ixgbe_adv_rx_desc *rx_desc; + struct libeth_xdp_buff *rx_buffer; + unsigned int size; + u32 xdp_result; + + rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean); + size = le16_to_cpu(rx_desc->wb.upper.length); + if (unlikely(!size)) + break; + + /* Avoid reading other descriptor fields before checking size */ + rmb(); + + rx_buffer = rx_ring->xsk_fqes[rx_ring->next_to_clean]; + head_xdp = libeth_xsk_process_buff(head_xdp, rx_buffer, size); + if (unlikely(!head_xdp) || ixgbevf_is_non_eop(rx_ring, rx_desc)) + continue; + + total_rx_packets++; + total_rx_bytes += xdp_get_buff_len(&head_xdp->base); + + xdp_result = ixgbevf_xsk_run_prog(head_xdp, &xdp_tx_bulk); + if (xdp_result) { + head_xdp = NULL; + if (likely(xdp_result != LIBETH_XDP_ABORTED)) + continue; + failure = true; + break; + } + + skb = xdp_build_skb_from_zc(&head_xdp->base); + + if (unlikely(!skb)) { + libeth_xdp_return_buff_slow(head_xdp); + head_xdp = NULL; + rx_ring->rx_stats.alloc_rx_buff_failed++; + break; + } + + head_xdp = NULL; + + if (unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { + skb = NULL; + continue; + } + + if (unlikely((skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) && + ether_addr_equal(rx_ring->netdev->dev_addr, + eth_hdr(skb)->h_source))) { + dev_kfree_skb_irq(skb); + continue; + } + + /* populate checksum, VLAN, and protocol */ + ixgbevf_process_skb_fields(rx_ring, rx_desc, skb); + + napi_gro_receive(&q_vector->napi, skb); + } + + if (rx_ring->pending >= rx_ring->thresh) + failure |= !ixgbevf_xsk_alloc_rx_bufs(rx_ring, + rx_ring->pending); + + /* place incomplete frames back on ring for completion */ + rx_ring->xsk_xdp_head = head_xdp; + + ixgbevf_xsk_finalize_xdp_napi(&xdp_tx_bulk); + + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->stats.packets += total_rx_packets; + rx_ring->stats.bytes += total_rx_bytes; + u64_stats_update_end(&rx_ring->syncp); + q_vector->rx.total_packets += total_rx_packets; + q_vector->rx.total_bytes += total_rx_bytes; + + if (likely(!failure)) + return total_rx_packets; + + if (wake) + xsk_set_rx_need_wakeup(rx_ring->xsk_pool); + + return budget; +} + bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *tx_ring, int napi_budget) { diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h index eda3e9b9554763..042a90cfa9131c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h @@ -9,8 +9,10 @@ int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, struct xsk_buff_pool *pool, u16 qid); -void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num); +bool ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num); void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring); +u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *rx_ring, int budget); bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *tx_ring, int napi_budget); From e828d3a83d1060e21e9b8a03e054879eac88d1e5 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 27 Oct 2025 14:26:46 +0100 Subject: [PATCH 14/15] ixgbevf: implement .ndo_xsk_wakeup() and set features To finalize basic AF_XDP implementation, set features and add .ndo_xsk_wakeup() handler. Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 ++- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.c | 24 +++++++++++++++++++ .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.h | 1 + 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 6d074825217cba..4e99c36c6b574d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4171,6 +4171,7 @@ static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_features_check = ixgbevf_features_check, .ndo_bpf = ixgbevf_xdp, .ndo_xdp_xmit = ixgbevf_xdp_xmit, + .ndo_xsk_wakeup = ixgbevf_xsk_wakeup, }; static void ixgbevf_assign_netdev_ops(struct net_device *dev) @@ -4303,7 +4304,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; - libeth_xdp_set_features_noredir(netdev, NULL, 0, NULL); + libeth_xdp_set_features_noredir(netdev, NULL, 1, NULL); /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c index 992fbb955fde35..c32f4b93d03c0e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -277,3 +277,27 @@ bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, ixgbevf_xsk_xmit_desc, ixgbevf_xdp_rs_and_bump); } + +int ixgbevf_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) +{ + struct ixgbevf_adapter *adapter = netdev_priv(dev); + struct ixgbevf_q_vector *q_vector; + struct ixgbevf_ring *rx_ring; + + if (unlikely(test_bit(__IXGBEVF_DOWN, &adapter->state))) + return -ENETDOWN; + + if (unlikely(queue_id >= adapter->num_xdp_queues)) + return -EINVAL; + + rx_ring = adapter->rx_ring[queue_id]; + if (unlikely(!ring_is_xsk(rx_ring))) + return -EINVAL; + + q_vector = rx_ring->q_vector; + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEICS, + BIT(q_vector->v_idx)); + + return 0; +} diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h index 042a90cfa9131c..7af14c78ead978 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h @@ -15,5 +15,6 @@ u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget); bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *tx_ring, int napi_budget); +int ixgbevf_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); #endif /* _IXGBEVF_XSK_H_ */ From 3176c418fc5a96c6ec980ae239b33780e2038a18 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 19 Dec 2025 14:48:57 +0100 Subject: [PATCH 15/15] ixgbevf: multi-buffer AF_XDP Tx Transmitting multi-buffer AF_XDP packets is not very straightforward given HW limitations in ixgbevf, namely that the first data descriptor must contain the length of the whole packet. Use private data of an sqe to store the length of an unfinished packet so far and the first descriptor index. Once EoP zero-copy descriptor is processed, write the accumulated length into the saved first descriptor. Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- .../ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h | 3 + .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.c | 65 ++++++++++++++++--- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.h | 1 + 4 files changed, 62 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 4e99c36c6b574d..3006d6dae7147d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4304,7 +4304,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; - libeth_xdp_set_features_noredir(netdev, NULL, 1, NULL); + libeth_xdp_set_features_noredir(netdev, NULL, IXGBEVF_XSK_MAX_ZC_FRAGS); /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h index 08b0c427d1a3b5..ad1e09ae6aa1c3 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xdp_lib.h @@ -141,6 +141,9 @@ static inline void ixgbevf_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) xdp_ring->xdp_sqes[xdp_ring->cached_ntu].rs_idx = ltu + 1; xdp_ring->cached_ntu = xdp_ring->next_to_use; + /* In case the packet was interrupted, discard it */ + xdp_ring->xdp_sqes[ltu].priv = 0; + /* Finish descriptor writes before bumping tail */ wmb(); ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c index c32f4b93d03c0e..a16e88048f8313 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -134,24 +134,73 @@ void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring) } } +struct ixgbevf_zc_sqe_priv { + u16 first_desc; + u16 len; +}; + +static_assert(sizeof(struct ixgbevf_zc_sqe_priv) <= + sizeof_field(struct libeth_sqe, priv)); + static void ixgbevf_xsk_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, const struct libeth_xdpsq *sq, u64 priv) { - union ixgbe_adv_tx_desc *tx_desc = - &((union ixgbe_adv_tx_desc *)sq->descs)[i]; + union ixgbe_adv_tx_desc *descs = sq->descs, *tx_desc = &descs[i]; + u32 ltu = (i ? : sq->count) - 1; u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DCMD_IFCS | - IXGBE_TXD_CMD_EOP | desc.len; - tx_desc->read.olinfo_status = - cpu_to_le32((desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT) | - IXGBE_ADVTXD_CC); - tx_desc->read.buffer_addr = cpu_to_le64(desc.addr); - tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + + if (likely((desc.flags & LIBETH_XDP_TX_LAST) && !sq->sqes[ltu].priv)) { + tx_desc->read.olinfo_status = + cpu_to_le32((desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + tx_desc->read.cmd_type_len = + cpu_to_le32(cmd_type | IXGBE_TXD_CMD_EOP); + return; + } + + /* No previous packet */ + if (!sq->sqes[ltu].priv) { + struct ixgbevf_zc_sqe_priv *sqe_priv = + (void *)&sq->sqes[i].priv; + + sqe_priv->first_desc = i; + sqe_priv->len = desc.len; + + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + + return; + } + + if (sq->sqes[ltu].priv) { + struct ixgbevf_zc_sqe_priv *sqe_priv = + (void *)&sq->sqes[i].priv; + + sq->sqes[i].priv = sq->sqes[ltu].priv; + sq->sqes[ltu].priv = 0; + sqe_priv->len += desc.len; + + if (desc.flags & LIBETH_XDP_TX_LAST) { + union ixgbe_adv_tx_desc *first_desc = + &descs[sqe_priv->first_desc]; + + first_desc->read.olinfo_status = + cpu_to_le32((sqe_priv->len << + IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + tx_desc->read.cmd_type_len = + cpu_to_le32(cmd_type | IXGBE_TXD_CMD_EOP); + cmd_type |= IXGBE_TXD_CMD_EOP; + sq->sqes[i].priv = 0; + } + + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + } } LIBETH_XDP_DEFINE_START(); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h index 7af14c78ead978..2bb39735b10efd 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h @@ -6,6 +6,7 @@ /* Process completions as soon as possible */ #define IXGBEVF_XSK_TX_CLEAN_THRESH(r) ((r)->count - 1) +#define IXGBEVF_XSK_MAX_ZC_FRAGS min(18, MAX_SKB_FRAGS) int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, struct xsk_buff_pool *pool, u16 qid);