| /* |
| * Copyright (c) 2018, Mellanox Technologies. All rights reserved. |
| * |
| * This software is available to you under a choice of one of two |
| * licenses. You may choose to be licensed under the terms of the GNU |
| * General Public License (GPL) Version 2, available from the file |
| * COPYING in the main directory of this source tree, or the |
| * OpenIB.org BSD license below: |
| * |
| * Redistribution and use in source and binary forms, with or |
| * without modification, are permitted provided that the following |
| * conditions are met: |
| * |
| * - Redistributions of source code must retain the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer. |
| * |
| * - Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following |
| * disclaimer in the documentation and/or other materials |
| * provided with the distribution. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| |
| #include <linux/bpf_trace.h> |
| #include "en/xdp.h" |
| |
| int mlx5e_xdp_max_mtu(struct mlx5e_params *params) |
| { |
| int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; |
| |
| /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). |
| * The condition checked in mlx5e_rx_is_linear_skb is: |
| * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) |
| * (Note that hw_mtu == sw_mtu + hard_mtu.) |
| * What is returned from this function is: |
| * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) |
| * After assigning sw_mtu := max_mtu, the left side of (1) turns to |
| * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, |
| * because both PAGE_SIZE and S are already aligned. Any number greater |
| * than max_mtu would make the left side of (1) greater than PAGE_SIZE, |
| * so max_mtu is the maximum MTU allowed. |
| */ |
| |
| return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); |
| } |
| |
| static inline bool |
| mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, |
| struct xdp_buff *xdp) |
| { |
| struct mlx5e_xdp_info xdpi; |
| |
| xdpi.xdpf = convert_to_xdp_frame(xdp); |
| if (unlikely(!xdpi.xdpf)) |
| return false; |
| xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf); |
| dma_sync_single_for_device(sq->pdev, xdpi.dma_addr, |
| xdpi.xdpf->len, PCI_DMA_TODEVICE); |
| xdpi.di = *di; |
| |
| return sq->xmit_xdp_frame(sq, &xdpi); |
| } |
| |
| /* returns true if packet was consumed by xdp */ |
| bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, |
| void *va, u16 *rx_headroom, u32 *len) |
| { |
| struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); |
| struct xdp_buff xdp; |
| u32 act; |
| int err; |
| |
| if (!prog) |
| return false; |
| |
| xdp.data = va + *rx_headroom; |
| xdp_set_data_meta_invalid(&xdp); |
| xdp.data_end = xdp.data + *len; |
| xdp.data_hard_start = va; |
| xdp.rxq = &rq->xdp_rxq; |
| |
| act = bpf_prog_run_xdp(prog, &xdp); |
| switch (act) { |
| case XDP_PASS: |
| *rx_headroom = xdp.data - xdp.data_hard_start; |
| *len = xdp.data_end - xdp.data; |
| return false; |
| case XDP_TX: |
| if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp))) |
| goto xdp_abort; |
| __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ |
| return true; |
| case XDP_REDIRECT: |
| /* When XDP enabled then page-refcnt==1 here */ |
| err = xdp_do_redirect(rq->netdev, &xdp, prog); |
| if (unlikely(err)) |
| goto xdp_abort; |
| __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); |
| rq->xdpsq.redirect_flush = true; |
| mlx5e_page_dma_unmap(rq, di); |
| rq->stats->xdp_redirect++; |
| return true; |
| default: |
| bpf_warn_invalid_xdp_action(act); |
| /* fall through */ |
| case XDP_ABORTED: |
| xdp_abort: |
| trace_xdp_exception(rq->netdev, prog, act); |
| /* fall through */ |
| case XDP_DROP: |
| rq->stats->xdp_drop++; |
| return true; |
| } |
| } |
| |
| static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) |
| { |
| struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; |
| struct mlx5_wq_cyc *wq = &sq->wq; |
| u8 wqebbs; |
| u16 pi; |
| |
| mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); |
| |
| prefetchw(session->wqe->data); |
| session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; |
| |
| pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); |
| |
| /* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS |
| * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. |
| * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a |
| * full-session WQE be cache-aligned. |
| */ |
| #if L1_CACHE_BYTES < 128 |
| #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) |
| #else |
| #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) |
| #endif |
| |
| wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi), |
| MLX5E_XDP_MPW_MAX_WQEBBS); |
| |
| session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; |
| } |
| |
| static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) |
| { |
| struct mlx5_wq_cyc *wq = &sq->wq; |
| struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; |
| struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; |
| u16 ds_count = session->ds_count; |
| u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); |
| struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; |
| |
| cseg->opmod_idx_opcode = |
| cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); |
| cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); |
| |
| wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); |
| wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT; |
| |
| sq->pc += wi->num_wqebbs; |
| |
| sq->doorbell_cseg = cseg; |
| |
| session->wqe = NULL; /* Close session */ |
| } |
| |
| static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, |
| struct mlx5e_xdp_info *xdpi) |
| { |
| struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; |
| struct mlx5e_xdpsq_stats *stats = sq->stats; |
| |
| dma_addr_t dma_addr = xdpi->dma_addr; |
| struct xdp_frame *xdpf = xdpi->xdpf; |
| unsigned int dma_len = xdpf->len; |
| |
| if (unlikely(sq->hw_mtu < dma_len)) { |
| stats->err++; |
| return false; |
| } |
| |
| if (unlikely(!session->wqe)) { |
| if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, |
| MLX5_SEND_WQE_MAX_WQEBBS))) { |
| /* SQ is full, ring doorbell */ |
| mlx5e_xmit_xdp_doorbell(sq); |
| stats->full++; |
| return false; |
| } |
| |
| mlx5e_xdp_mpwqe_session_start(sq); |
| } |
| |
| mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len); |
| |
| if (unlikely(session->ds_count == session->max_ds_count)) |
| mlx5e_xdp_mpwqe_complete(sq); |
| |
| mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); |
| stats->xmit++; |
| return true; |
| } |
| |
| static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) |
| { |
| struct mlx5_wq_cyc *wq = &sq->wq; |
| u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); |
| struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); |
| |
| struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; |
| struct mlx5_wqe_eth_seg *eseg = &wqe->eth; |
| struct mlx5_wqe_data_seg *dseg = wqe->data; |
| |
| struct xdp_frame *xdpf = xdpi->xdpf; |
| dma_addr_t dma_addr = xdpi->dma_addr; |
| unsigned int dma_len = xdpf->len; |
| |
| struct mlx5e_xdpsq_stats *stats = sq->stats; |
| |
| prefetchw(wqe); |
| |
| if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { |
| stats->err++; |
| return false; |
| } |
| |
| if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { |
| /* SQ is full, ring doorbell */ |
| mlx5e_xmit_xdp_doorbell(sq); |
| stats->full++; |
| return false; |
| } |
| |
| cseg->fm_ce_se = 0; |
| |
| /* copy the inline part if required */ |
| if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { |
| memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE); |
| eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); |
| dma_len -= MLX5E_XDP_MIN_INLINE; |
| dma_addr += MLX5E_XDP_MIN_INLINE; |
| dseg++; |
| } |
| |
| /* write the dma part */ |
| dseg->addr = cpu_to_be64(dma_addr); |
| dseg->byte_count = cpu_to_be32(dma_len); |
| |
| cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); |
| |
| sq->pc++; |
| |
| sq->doorbell_cseg = cseg; |
| |
| mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); |
| stats->xmit++; |
| return true; |
| } |
| |
| bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) |
| { |
| struct mlx5e_xdp_info_fifo *xdpi_fifo; |
| struct mlx5e_xdpsq *sq; |
| struct mlx5_cqe64 *cqe; |
| bool is_redirect; |
| u16 sqcc; |
| int i; |
| |
| sq = container_of(cq, struct mlx5e_xdpsq, cq); |
| |
| if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) |
| return false; |
| |
| cqe = mlx5_cqwq_get_cqe(&cq->wq); |
| if (!cqe) |
| return false; |
| |
| is_redirect = !rq; |
| xdpi_fifo = &sq->db.xdpi_fifo; |
| |
| /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), |
| * otherwise a cq overrun may occur |
| */ |
| sqcc = sq->cc; |
| |
| i = 0; |
| do { |
| u16 wqe_counter; |
| bool last_wqe; |
| |
| mlx5_cqwq_pop(&cq->wq); |
| |
| wqe_counter = be16_to_cpu(cqe->wqe_counter); |
| |
| if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) |
| netdev_WARN_ONCE(sq->channel->netdev, |
| "Bad OP in XDPSQ CQE: 0x%x\n", |
| get_cqe_opcode(cqe)); |
| |
| do { |
| struct mlx5e_xdp_wqe_info *wi; |
| u16 ci, j; |
| |
| last_wqe = (sqcc == wqe_counter); |
| ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); |
| wi = &sq->db.wqe_info[ci]; |
| |
| sqcc += wi->num_wqebbs; |
| |
| for (j = 0; j < wi->num_ds; j++) { |
| struct mlx5e_xdp_info xdpi = |
| mlx5e_xdpi_fifo_pop(xdpi_fifo); |
| |
| if (is_redirect) { |
| dma_unmap_single(sq->pdev, xdpi.dma_addr, |
| xdpi.xdpf->len, DMA_TO_DEVICE); |
| xdp_return_frame(xdpi.xdpf); |
| } else { |
| /* Recycle RX page */ |
| mlx5e_page_release(rq, &xdpi.di, true); |
| } |
| } |
| } while (!last_wqe); |
| } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); |
| |
| sq->stats->cqes += i; |
| |
| mlx5_cqwq_update_db_record(&cq->wq); |
| |
| /* ensure cq space is freed before enabling more cqes */ |
| wmb(); |
| |
| sq->cc = sqcc; |
| return (i == MLX5E_TX_CQ_POLL_BUDGET); |
| } |
| |
| void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) |
| { |
| struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; |
| bool is_redirect = !rq; |
| |
| while (sq->cc != sq->pc) { |
| struct mlx5e_xdp_wqe_info *wi; |
| u16 ci, i; |
| |
| ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); |
| wi = &sq->db.wqe_info[ci]; |
| |
| sq->cc += wi->num_wqebbs; |
| |
| for (i = 0; i < wi->num_ds; i++) { |
| struct mlx5e_xdp_info xdpi = |
| mlx5e_xdpi_fifo_pop(xdpi_fifo); |
| |
| if (is_redirect) { |
| dma_unmap_single(sq->pdev, xdpi.dma_addr, |
| xdpi.xdpf->len, DMA_TO_DEVICE); |
| xdp_return_frame(xdpi.xdpf); |
| } else { |
| /* Recycle RX page */ |
| mlx5e_page_release(rq, &xdpi.di, false); |
| } |
| } |
| } |
| } |
| |
| int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, |
| u32 flags) |
| { |
| struct mlx5e_priv *priv = netdev_priv(dev); |
| struct mlx5e_xdpsq *sq; |
| int drops = 0; |
| int sq_num; |
| int i; |
| |
| /* this flag is sufficient, no need to test internal sq state */ |
| if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) |
| return -ENETDOWN; |
| |
| if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) |
| return -EINVAL; |
| |
| sq_num = smp_processor_id(); |
| |
| if (unlikely(sq_num >= priv->channels.num)) |
| return -ENXIO; |
| |
| sq = &priv->channels.c[sq_num]->xdpsq; |
| |
| for (i = 0; i < n; i++) { |
| struct xdp_frame *xdpf = frames[i]; |
| struct mlx5e_xdp_info xdpi; |
| |
| xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len, |
| DMA_TO_DEVICE); |
| if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) { |
| xdp_return_frame_rx_napi(xdpf); |
| drops++; |
| continue; |
| } |
| |
| xdpi.xdpf = xdpf; |
| |
| if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) { |
| dma_unmap_single(sq->pdev, xdpi.dma_addr, |
| xdpf->len, DMA_TO_DEVICE); |
| xdp_return_frame_rx_napi(xdpf); |
| drops++; |
| } |
| } |
| |
| if (flags & XDP_XMIT_FLUSH) { |
| if (sq->mpwqe.wqe) |
| mlx5e_xdp_mpwqe_complete(sq); |
| mlx5e_xmit_xdp_doorbell(sq); |
| } |
| |
| return n - drops; |
| } |
| |
| void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) |
| { |
| struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; |
| |
| if (xdpsq->mpwqe.wqe) |
| mlx5e_xdp_mpwqe_complete(xdpsq); |
| |
| mlx5e_xmit_xdp_doorbell(xdpsq); |
| |
| if (xdpsq->redirect_flush) { |
| xdp_do_flush_map(); |
| xdpsq->redirect_flush = false; |
| } |
| } |
| |
| void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) |
| { |
| sq->xmit_xdp_frame = is_mpw ? |
| mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; |
| } |
| |