diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5')
22 files changed, 2423 insertions, 1201 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index dad326ccd4dd..0343725d7f44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o + fs_counters.o rl.o lag.o dev.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c new file mode 100644 index 000000000000..a9dbc28f6b97 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +static LIST_HEAD(intf_list); +static LIST_HEAD(mlx5_dev_list); +/* intf dev list mutex */ +static DEFINE_MUTEX(mlx5_intf_mutex); + +struct mlx5_device_context { + struct list_head list; + struct mlx5_interface *intf; + void *context; + unsigned long state; +}; + +enum { + MLX5_INTERFACE_ADDED, + MLX5_INTERFACE_ATTACHED, +}; + +void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + if (!mlx5_lag_intf_add(intf, priv)) + return; + + dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL); + if (!dev_ctx) + return; + + dev_ctx->intf = intf; + dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + if (intf->attach) + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + + if (dev_ctx->context) { + spin_lock_irq(&priv->ctx_lock); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + spin_unlock_irq(&priv->ctx_lock); + } else { + kfree(dev_ctx); + } +} + +static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, + struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf == intf) + return dev_ctx; + return NULL; +} + +void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + spin_lock_irq(&priv->ctx_lock); + list_del(&dev_ctx->list); + spin_unlock_irq(&priv->ctx_lock); + + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + intf->remove(dev, dev_ctx->context); + + kfree(dev_ctx); +} + +static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->attach) { + if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + return; + intf->attach(dev, dev_ctx->context); + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +void mlx5_attach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_attach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->detach) { + if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + return; + intf->detach(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + intf->remove(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +void mlx5_detach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_detach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +bool mlx5_device_registered(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv; + bool found = false; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + if (priv == &dev->priv) + found = true; + mutex_unlock(&mlx5_intf_mutex); + + return found; +} + +int mlx5_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_add_tail(&priv->dev_list, &mlx5_dev_list); + list_for_each_entry(intf, &intf_list, list) + mlx5_add_device(intf, priv); + mutex_unlock(&mlx5_intf_mutex); + + return 0; +} + +void mlx5_unregister_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_remove_device(intf, priv); + list_del(&priv->dev_list); + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_register_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + if (!intf->add || !intf->remove) + return -EINVAL; + + mutex_lock(&mlx5_intf_mutex); + list_add_tail(&intf->list, &intf_list); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + mlx5_add_device(intf, priv); + mutex_unlock(&mlx5_intf_mutex); + + return 0; +} +EXPORT_SYMBOL(mlx5_register_interface); + +void mlx5_unregister_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + mlx5_remove_device(intf, priv); + list_del(&intf->list); + mutex_unlock(&mlx5_intf_mutex); +} +EXPORT_SYMBOL(mlx5_unregister_interface); + +void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) +{ + struct mlx5_priv *priv = &mdev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + void *result = NULL; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) + if ((dev_ctx->intf->protocol == protocol) && + dev_ctx->intf->get_dev) { + result = dev_ctx->intf->get_dev(dev_ctx->context); + break; + } + + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + return result; +} +EXPORT_SYMBOL(mlx5_get_protocol_dev); + +/* Must be called with intf_mutex held */ +void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_add_device(intf, &dev->priv); + break; + } +} + +/* Must be called with intf_mutex held */ +void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_remove_device(intf, &dev->priv); + break; + } +} + +static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev) +{ + return (u16)((dev->pdev->bus->number << 8) | + PCI_SLOT(dev->pdev->devfn)); +} + +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) +{ + u16 pci_id = mlx5_gen_pci_id(dev); + struct mlx5_core_dev *res = NULL; + struct mlx5_core_dev *tmp_dev; + struct mlx5_priv *priv; + + list_for_each_entry(priv, &mlx5_dev_list, dev_list) { + tmp_dev = container_of(priv, struct mlx5_core_dev, priv); + if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) { + res = tmp_dev; + break; + } + } + + return res; +} + +void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, + unsigned long param) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf->event) + dev_ctx->intf->event(dev, dev_ctx->context, event, param); + + spin_unlock_irqrestore(&priv->ctx_lock, flags); +} + +void mlx5_dev_list_lock(void) +{ + mutex_lock(&mlx5_intf_mutex); +} + +void mlx5_dev_list_unlock(void) +{ + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_dev_list_trylock(void) +{ + return mutex_trylock(&mlx5_intf_mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 96995609f205..460363b66cb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -62,12 +62,14 @@ #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 -#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 +#define MLX5_RX_HEADROOM NET_SKB_PAD + #define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ -#define MLX5_MPWRQ_LOG_WQE_SZ 17 +#define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) @@ -99,6 +101,18 @@ #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 +#define MLX5E_ICOSQ_MAX_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) + +#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) +#define MLX5E_XDP_IHS_DS_COUNT \ + DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) +#define MLX5E_XDP_TX_DS_COUNT \ + (MLX5E_XDP_IHS_DS_COUNT + \ + (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) +#define MLX5E_XDP_TX_WQEBBS \ + DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) + #define MLX5E_NUM_MAIN_GROUPS 9 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) @@ -287,29 +301,53 @@ struct mlx5e_rx_am { /* Adaptive Moderation */ u8 tired; }; +/* a single cache unit is capable to serve one napi call (for non-striding rq) + * or a MPWQE (for striding rq). + */ +#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ + MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT) +#define MLX5E_CACHE_SIZE (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) +struct mlx5e_page_cache { + u32 head; + u32 tail; + struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE]; +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; - u32 wqe_sz; - struct sk_buff **skb; - struct mlx5e_mpw_info *wqe_info; + + union { + struct mlx5e_dma_info *dma_info; + struct { + struct mlx5e_mpw_info *info; + void *mtt_no_align; + u32 mtt_offset; + } mpwqe; + }; + struct { + u8 page_order; + u32 wqe_sz; /* wqe data buffer size */ + u8 map_dir; /* dma map direction */ + } buff; __be32 mkey_be; - __be32 umr_mkey_be; struct device *pdev; struct net_device *netdev; struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; + struct mlx5e_page_cache page_cache; + mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_alloc_wqe alloc_wqe; mlx5e_fp_dealloc_wqe dealloc_wqe; unsigned long state; int ix; - u32 mpwqe_mtt_offset; struct mlx5e_rx_am am; /* Adaptive Moderation */ + struct bpf_prog *xdp_prog; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -323,32 +361,15 @@ struct mlx5e_rq { struct mlx5e_umr_dma_info { __be64 *mtt; - __be64 *mtt_no_align; dma_addr_t mtt_addr; - struct mlx5e_dma_info *dma_info; + struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; + struct mlx5e_umr_wqe wqe; }; struct mlx5e_mpw_info { - union { - struct mlx5e_dma_info dma_info; - struct mlx5e_umr_dma_info umr; - }; + struct mlx5e_umr_dma_info umr; u16 consumed_strides; u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; - - void (*dma_pre_sync)(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len); - void (*add_skb_frag)(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, u32 len); - void (*copy_skb_header)(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen); - void (*free_wqe)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); }; struct mlx5e_tx_wqe_info { @@ -373,11 +394,17 @@ enum { MLX5E_SQ_STATE_BF_ENABLE, }; -struct mlx5e_ico_wqe_info { +struct mlx5e_sq_wqe_info { u8 opcode; u8 num_wqebbs; }; +enum mlx5e_sq_type { + MLX5E_SQ_TXQ, + MLX5E_SQ_ICO, + MLX5E_SQ_XDP +}; + struct mlx5e_sq { /* data path */ @@ -395,10 +422,20 @@ struct mlx5e_sq { struct mlx5e_cq cq; - /* pointers to per packet info: write@xmit, read@completion */ - struct sk_buff **skb; - struct mlx5e_sq_dma *dma_fifo; - struct mlx5e_tx_wqe_info *wqe_info; + /* pointers to per tx element info: write@xmit, read@completion */ + union { + struct { + struct sk_buff **skb; + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } txq; + struct mlx5e_sq_wqe_info *ico_wqe; + struct { + struct mlx5e_sq_wqe_info *wqe_info; + struct mlx5e_dma_info *di; + bool doorbell; + } xdp; + } db; /* read only */ struct mlx5_wq_cyc wq; @@ -420,8 +457,8 @@ struct mlx5e_sq { struct mlx5_uar uar; struct mlx5e_channel *channel; int tc; - struct mlx5e_ico_wqe_info *ico_wqe_info; u32 rate_limit; + u8 type; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) @@ -437,8 +474,10 @@ enum channel_flags { struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; + struct mlx5e_sq xdp_sq; struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_sq icosq; /* internal control operations */ + bool xdp; struct napi_struct napi; struct device *pdev; struct net_device *netdev; @@ -620,6 +659,7 @@ struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_sq **txq_to_sq_map; int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; + struct bpf_prog *xdp_prog; /* priv data path fields - end */ unsigned long state; @@ -666,30 +706,19 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); -void mlx5e_free_tx_descs(struct mlx5e_sq *sq); +void mlx5e_free_sq_descs(struct mlx5e_sq *sq); +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); -void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq); -void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, - u16 byte_cnt, - struct mlx5e_mpw_info *wi, - struct sk_buff *skb); -void mlx5e_complete_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, - u16 byte_cnt, - struct mlx5e_mpw_info *wi, - struct sk_buff *skb); -void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi); -void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi); +void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq); +void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_rx_am(struct mlx5e_rq *rq); @@ -776,6 +805,12 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); } +static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) +{ + return rq->mpwqe.mtt_offset + + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); +} + static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return min_t(int, mdev->priv.eq_table.num_comp_vectors, @@ -834,6 +869,7 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); @@ -844,9 +880,12 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); -void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, void *ppriv); +struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, + void *ppriv); void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); +int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); +void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 847a8f3ac2b2..13dc388667b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -273,7 +273,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, &priv->mdev->pdev->dev); - if (IS_ERR_OR_NULL(tstamp->ptp)) { + if (IS_ERR(tstamp->ptp)) { mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n", PTR_ERR(tstamp->ptp)); tstamp->ptp = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d1cd1564e9b9..27ff401cec20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -331,7 +331,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, if (mlx5e_query_global_pause_combined(priv)) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], - pport_per_prio_pfc_stats_desc, 0); + pport_per_prio_pfc_stats_desc, i); } } @@ -659,9 +659,10 @@ out: static void ptys2ethtool_supported_link(unsigned long *supported_modes, u32 eth_proto_cap) { + unsigned long proto_cap = eth_proto_cap; int proto; - for_each_set_bit(proto, (unsigned long *)ð_proto_cap, MLX5E_LINK_MODES_NUMBER) + for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) bitmap_or(supported_modes, supported_modes, ptys2ethtool_table[proto].supported, __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -670,9 +671,10 @@ static void ptys2ethtool_supported_link(unsigned long *supported_modes, static void ptys2ethtool_adver_link(unsigned long *advertising_modes, u32 eth_proto_cap) { + unsigned long proto_cap = eth_proto_cap; int proto; - for_each_set_bit(proto, (unsigned long *)ð_proto_cap, MLX5E_LINK_MODES_NUMBER) + for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) bitmap_or(advertising_modes, advertising_modes, ptys2ethtool_table[proto].advertised, __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 1587a9fd5724..36fbc6b21a33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -294,6 +294,36 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, return 0; } +static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + } + + if (priv->fs.vlan.filter_disabled && + !(priv->netdev->flags & IFF_PROMISC)) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + +static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + } + + if (priv->fs.vlan.filter_disabled && + !(priv->netdev->flags & IFF_PROMISC)) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) @@ -1024,14 +1054,10 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) if (err) goto err_free_g; - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - if (err) - goto err_destroy_vlan_flow_groups; + mlx5e_add_vlan_rules(priv); return 0; -err_destroy_vlan_flow_groups: - mlx5e_destroy_groups(ft); err_free_g: kfree(ft->g); err_destroy_vlan_table: @@ -1043,6 +1069,7 @@ err_destroy_vlan_table: static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) { + mlx5e_del_vlan_rules(priv); mlx5e_destroy_flow_table(&priv->fs.vlan.ft); } @@ -1100,7 +1127,6 @@ err_destroy_arfs_tables: void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) { - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 03586ee68fc4..b58cfe37dead 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -34,6 +34,7 @@ #include <net/pkt_cls.h> #include <linux/mlx5/fs.h> #include <net/vxlan.h> +#include <linux/bpf.h> #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -50,7 +51,7 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; u16 max_inline; u8 min_inline_mode; - bool icosq; + enum mlx5e_sq_type type; }; struct mlx5e_cq_param { @@ -63,12 +64,55 @@ struct mlx5e_cq_param { struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; + struct mlx5e_sq_param xdp_sq; struct mlx5e_sq_param icosq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; struct mlx5e_cq_param icosq_cq; }; +static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, striding_rq) && + MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); +} + +static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) +{ + priv->params.rq_wq_type = rq_type; + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.mpwqe_log_stride_sz = priv->params.rx_cqe_compress ? + MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : + MLX5_MPWRQ_LOG_STRIDE_SIZE; + priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + priv->params.mpwqe_log_stride_sz; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + } + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); + + mlx5_core_info(priv->mdev, + "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(priv->params.log_rq_size), + BIT(priv->params.mpwqe_log_stride_sz), + priv->params.rx_cqe_compress_admin); +} + +static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) +{ + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) && + !priv->xdp_prog ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; + mlx5e_set_rq_type_params(priv, rq_type); +} + static void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -136,12 +180,18 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; + s->rx_xdp_tx += rq_stats->xdp_tx; + s->rx_xdp_tx_full += rq_stats->xdp_tx_full; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; - s->rx_mpwqe_frag += rq_stats->mpwqe_frag; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; + s->rx_cache_reuse += rq_stats->cache_reuse; + s->rx_cache_full += rq_stats->cache_full; + s->rx_cache_empty += rq_stats->cache_empty; + s->rx_cache_busy += rq_stats->cache_busy; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; @@ -295,6 +345,117 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv) #define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) #define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) +static inline int mlx5e_get_wqe_mtt_sz(void) +{ + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the mtt array, we allocate + * a little more. + */ + return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), + MLX5_UMR_MTT_ALIGNMENT); +} + +static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, + struct mlx5e_umr_wqe *wqe, u16 ix) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + struct mlx5_wqe_data_seg *dseg = &wqe->data; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; + u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); + u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); + + cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->imm = rq->mkey_be; + + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; + ucseg->klm_octowords = + cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); + ucseg->bsf_octowords = + cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + + dseg->lkey = sq->mkey_be; + dseg->addr = cpu_to_be64(wi->umr.mtt_addr); +} + +static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, + struct mlx5e_channel *c) +{ + int wq_sz = mlx5_wq_ll_get_size(&rq->wq); + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; + int i; + + rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->mpwqe.info) + goto err_out; + + /* We allocate more than mtt_sz as we will align the pointer */ + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, + cpu_to_node(c->cpu)); + if (unlikely(!rq->mpwqe.mtt_no_align)) + goto err_free_wqe_info; + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; + + wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc, + MLX5_UMR_ALIGN); + wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz, + PCI_DMA_TODEVICE); + if (unlikely(dma_mapping_error(c->pdev, wi->umr.mtt_addr))) + goto err_unmap_mtts; + + mlx5e_build_umr_wqe(rq, &c->icosq, &wi->umr.wqe, i); + } + + return 0; + +err_unmap_mtts: + while (--i >= 0) { + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; + + dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz, + PCI_DMA_TODEVICE); + } + kfree(rq->mpwqe.mtt_no_align); +err_free_wqe_info: + kfree(rq->mpwqe.info); + +err_out: + return -ENOMEM; +} + +static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) +{ + int wq_sz = mlx5_wq_ll_get_size(&rq->wq); + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int i; + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; + + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, + PCI_DMA_TODEVICE); + } + kfree(rq->mpwqe.mtt_no_align); + kfree(rq->mpwqe.info); +} + +static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; + + if (rep && rep->vport != FDB_UPLINK_VPORT) + return true; + + return false; +} + static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -304,6 +465,8 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 byte_count; + u32 frag_sz; + int npages; int wq_sz; int err; int i; @@ -319,63 +482,92 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_ll_get_size(&rq->wq); + rq->wq_type = priv->params.rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->tstamp = &priv->tstamp; + rq->channel = c; + rq->ix = c->ix; + rq->priv = c->priv; + rq->xdp_prog = priv->xdp_prog; + + rq->buff.map_dir = DMA_FROM_DEVICE; + if (rq->xdp_prog) + rq->buff.map_dir = DMA_BIDIRECTIONAL; + switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), - GFP_KERNEL, cpu_to_node(c->cpu)); - if (!rq->wqe_info) { - err = -ENOMEM; + if (mlx5e_is_vf_vport_rep(priv)) { + err = -EINVAL; goto err_rq_wq_destroy; } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - rq->mpwqe_mtt_offset = c->ix * + rq->mpwqe.mtt_offset = c->ix * MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size)); rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); - rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; - byte_count = rq->wqe_sz; + + rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; + byte_count = rq->buff.wqe_sz; + rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key); + err = mlx5e_rq_alloc_mpwqe_info(rq, c); + if (err) + goto err_rq_wq_destroy; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!rq->skb) { + rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->dma_info) { err = -ENOMEM; goto err_rq_wq_destroy; } - rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + + if (mlx5e_is_vf_vport_rep(priv)) + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep; + else + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - rq->wqe_sz = (priv->params.lro_en) ? + rq->buff.wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); - byte_count = rq->wqe_sz; + byte_count = rq->buff.wqe_sz; + + /* calc the required page order */ + frag_sz = MLX5_RX_HEADROOM + + byte_count /* packet data */ + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + frag_sz = SKB_DATA_ALIGN(frag_sz); + + npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE); + rq->buff.page_order = order_base_2(npages); + byte_count |= MLX5_HW_START_PADDING; + rq->mkey_be = c->mkey_be; } for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); wqe->data.byte_count = cpu_to_be32(byte_count); + wqe->data.lkey = rq->mkey_be; } INIT_WORK(&rq->am.work, mlx5e_rx_am_work); rq->am.mode = priv->params.rx_cq_period_mode; - rq->wq_type = priv->params.rq_wq_type; - rq->pdev = c->pdev; - rq->netdev = c->netdev; - rq->tstamp = &priv->tstamp; - rq->channel = c; - rq->ix = c->ix; - rq->priv = c->priv; - rq->mkey_be = c->mkey_be; - rq->umr_mkey_be = cpu_to_be32(c->priv->umr_mkey.key); + rq->page_cache.head = 0; + rq->page_cache.tail = 0; + + if (rq->xdp_prog) + bpf_prog_add(rq->xdp_prog, 1); return 0; @@ -387,14 +579,25 @@ err_rq_wq_destroy: static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { + int i; + + if (rq->xdp_prog) + bpf_prog_put(rq->xdp_prog); + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - kfree(rq->wqe_info); + mlx5e_rq_free_mpwqe_info(rq); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - kfree(rq->skb); + kfree(rq->dma_info); } + for (i = rq->page_cache.head; i != rq->page_cache.tail; + i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { + struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i]; + + mlx5e_page_release(rq, dma_info, false); + } mlx5_wq_destroy(&rq->wq_ctrl); } @@ -528,7 +731,7 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) /* UMR WQE (if in progress) is always at wq->head */ if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { wqe_ix_be = *wq->tail_next; @@ -563,8 +766,8 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (param->am_enabled) set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ return 0; @@ -588,26 +791,65 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) mlx5e_destroy_rq(rq); } -static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq) { - kfree(sq->wqe_info); - kfree(sq->dma_fifo); - kfree(sq->skb); + kfree(sq->db.xdp.di); + kfree(sq->db.xdp.wqe_info); } -static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; - sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa); - sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL, - numa); - sq->wqe_info = kzalloc_node(wq_sz * sizeof(*sq->wqe_info), GFP_KERNEL, - numa); + sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz, + GFP_KERNEL, numa); + sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) { + mlx5e_free_sq_xdp_db(sq); + return -ENOMEM; + } + + return 0; +} + +static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.ico_wqe); +} + +static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) +{ + u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - if (!sq->skb || !sq->dma_fifo || !sq->wqe_info) { - mlx5e_free_sq_db(sq); + sq->db.ico_wqe = kzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.ico_wqe) + return -ENOMEM; + + return 0; +} + +static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.txq.wqe_info); + kfree(sq->db.txq.dma_fifo); + kfree(sq->db.txq.skb); +} + +static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb), + GFP_KERNEL, numa); + sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo), + GFP_KERNEL, numa); + sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info), + GFP_KERNEL, numa); + if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) { + mlx5e_free_sq_txq_db(sq); return -ENOMEM; } @@ -616,6 +858,46 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) return 0; } +static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_sq_txq_db(sq); + break; + case MLX5E_SQ_ICO: + mlx5e_free_sq_ico_db(sq); + break; + case MLX5E_SQ_XDP: + mlx5e_free_sq_xdp_db(sq); + break; + } +} + +static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + return mlx5e_alloc_sq_txq_db(sq, numa); + case MLX5E_SQ_ICO: + return mlx5e_alloc_sq_ico_db(sq, numa); + case MLX5E_SQ_XDP: + return mlx5e_alloc_sq_xdp_db(sq, numa); + } + + return 0; +} + +static int mlx5e_sq_get_max_wqebbs(u8 sq_type) +{ + switch (sq_type) { + case MLX5E_SQ_ICO: + return MLX5E_ICOSQ_MAX_WQEBBS; + case MLX5E_SQ_XDP: + return MLX5E_XDP_TX_WQEBBS; + } + return MLX5_SEND_WQE_MAX_WQEBBS; +} + static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, @@ -628,6 +910,13 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); int err; + sq->type = param->type; + sq->pdev = c->pdev; + sq->tstamp = &priv->tstamp; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->tc = tc; + err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); if (err) return err; @@ -656,18 +945,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - if (param->icosq) { - u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - - sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) * - wq_sz, - GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!sq->ico_wqe_info) { - err = -ENOMEM; - goto err_free_sq_db; - } - } else { + if (sq->type == MLX5E_SQ_TXQ) { int txq_ix; txq_ix = c->ix + tc * priv->params.num_channels; @@ -675,19 +953,11 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, priv->txq_to_sq_map[txq_ix] = sq; } - sq->pdev = c->pdev; - sq->tstamp = &priv->tstamp; - sq->mkey_be = c->mkey_be; - sq->channel = c; - sq->tc = tc; - sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; + sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type); sq->bf_budget = MLX5E_SQ_BF_BUDGET; return 0; -err_free_sq_db: - mlx5e_free_sq_db(sq); - err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -702,7 +972,6 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; - kfree(sq->ico_wqe_info); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(priv->mdev, &sq->uar); @@ -731,11 +1000,12 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) memcpy(sqc, param->sqc, sizeof(param->sqc)); - MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? + 0 : priv->tisn[sq->tc]); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); + MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); @@ -850,12 +1120,14 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) netif_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ - if (mlx5e_sq_has_room_for(sq, 1)) + if (mlx5e_sq_has_room_for(sq, 1)) { + sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL; mlx5e_send_nop(sq, true); + } } mlx5e_disable_sq(sq); - mlx5e_free_tx_descs(sq); + mlx5e_free_sq_descs(sq); mlx5e_destroy_sq(sq); } @@ -1216,14 +1488,31 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, } } + if (priv->xdp_prog) { + /* XDP SQ CQ params are same as normal TXQ sq CQ params */ + err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, + priv->params.tx_cq_moderation); + if (err) + goto err_close_sqs; + + err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq); + if (err) { + mlx5e_close_cq(&c->xdp_sq.cq); + goto err_close_sqs; + } + } + + c->xdp = !!priv->xdp_prog; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) - goto err_close_sqs; + goto err_close_xdp_sq; netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); *cp = c; return 0; +err_close_xdp_sq: + mlx5e_close_sq(&c->xdp_sq); err_close_sqs: mlx5e_close_sqs(c); @@ -1252,9 +1541,13 @@ err_napi_del: static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); + if (c->xdp) + mlx5e_close_sq(&c->xdp_sq); mlx5e_close_sqs(c); mlx5e_close_sq(&c->icosq); napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->xdp_sq.cq); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); @@ -1328,6 +1621,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, param->max_inline = priv->params.tx_max_inline; param->min_inline_mode = priv->params.tx_min_inline_mode; + param->type = MLX5E_SQ_TXQ; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1401,7 +1695,22 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, log_wq_size); MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - param->icosq = true; + param->type = MLX5E_SQ_ICO; +} + +static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + + param->max_inline = priv->params.tx_max_inline; + /* FOR XDP SQs will support only L2 inline mode */ + param->min_inline_mode = MLX5_INLINE_MODE_NONE; + param->type = MLX5E_SQ_XDP; } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) @@ -1410,6 +1719,7 @@ static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_chan mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq); mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); @@ -1883,6 +2193,9 @@ int mlx5e_close(struct net_device *netdev) struct mlx5e_priv *priv = netdev_priv(netdev); int err; + if (!netif_device_present(netdev)) + return -ENODEV; + mutex_lock(&priv->state_lock); err = mlx5e_close_locked(netdev); mutex_unlock(&priv->state_lock); @@ -2604,11 +2917,15 @@ static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); } -static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, vlan, qos); } @@ -2785,6 +3102,92 @@ static void mlx5e_tx_timeout(struct net_device *dev) schedule_work(&priv->tx_timeout_work); } +static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct bpf_prog *old_prog; + int err = 0; + bool reset, was_opened; + int i; + + mutex_lock(&priv->state_lock); + + if ((netdev->features & NETIF_F_LRO) && prog) { + netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n"); + err = -EINVAL; + goto unlock; + } + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + /* no need for full reset when exchanging programs */ + reset = (!priv->xdp_prog || !prog); + + if (was_opened && reset) + mlx5e_close_locked(netdev); + + /* exchange programs */ + old_prog = xchg(&priv->xdp_prog, prog); + if (prog) + bpf_prog_add(prog, 1); + if (old_prog) + bpf_prog_put(old_prog); + + if (reset) /* change RQ type according to priv->xdp_prog */ + mlx5e_set_rq_priv_params(priv); + + if (was_opened && reset) + mlx5e_open_locked(netdev); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + goto unlock; + + /* exchanging programs w/o reset, we update ref counts on behalf + * of the channels RQs here. + */ + bpf_prog_add(prog, priv->params.num_channels); + for (i = 0; i < priv->params.num_channels; i++) { + struct mlx5e_channel *c = priv->channel[i]; + + set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + napi_synchronize(&c->napi); + /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ + + old_prog = xchg(&c->rq.xdp_prog, prog); + + clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + /* napi_schedule in case we have missed anything */ + set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); + napi_schedule(&c->napi); + + if (old_prog) + bpf_prog_put(old_prog); + } + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static bool mlx5e_xdp_attached(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return !!priv->xdp_prog; +} + +static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx5e_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = mlx5e_xdp_attached(dev); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -2804,6 +3207,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -2835,6 +3239,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2909,13 +3314,6 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, indirection_rqt[i] = i % num_channels; } -static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) -{ - return MLX5_CAP_GEN(mdev, striding_rq) && - MLX5_CAP_GEN(mdev, umr_ptr_rlky) && - MLX5_CAP_ETH(mdev, reg_umr_sq); -} - static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) { enum pcie_link_width width; @@ -2995,11 +3393,13 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->params.log_sq_size = - MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_LINKED_LIST; + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; + + priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ priv->params.rx_cqe_compress_admin = false; @@ -3012,33 +3412,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.rx_cqe_compress_admin = cqe_compress_heuristic(link_speed, pci_bw); } - priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin; - switch (priv->params.rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - priv->params.mpwqe_log_stride_sz = - priv->params.rx_cqe_compress ? - MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : - MLX5_MPWRQ_LOG_STRIDE_SIZE; - priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - - priv->params.mpwqe_log_stride_sz; + mlx5e_set_rq_priv_params(priv); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) priv->params.lro_en = true; - break; - default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - } - - mlx5_core_info(mdev, - "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - BIT(priv->params.log_rq_size), - BIT(priv->params.mpwqe_log_stride_sz), - priv->params.rx_cqe_compress_admin); - - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); @@ -3058,19 +3436,16 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - + /* Extra room needed for build_skb */ + MLX5_RX_HEADROOM - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /* Initialize pflags */ MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; - #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); #endif @@ -3374,9 +3749,9 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); rep.load = mlx5e_nic_rep_load; rep.unload = mlx5e_nic_rep_unload; - rep.vport = 0; + rep.vport = FDB_UPLINK_VPORT; rep.priv_data = priv; - mlx5_eswitch_register_vport_rep(esw, &rep); + mlx5_eswitch_register_vport_rep(esw, 0, &rep); } } @@ -3401,13 +3776,13 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .max_tc = MLX5E_MAX_NUM_TC, }; -void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, void *ppriv) +struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, + void *ppriv) { + int nch = profile->max_nch(mdev); struct net_device *netdev; struct mlx5e_priv *priv; - int nch = profile->max_nch(mdev); - int err; netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch * profile->max_tc, @@ -3425,12 +3800,31 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, priv->wq = create_singlethread_workqueue("mlx5e"); if (!priv->wq) - goto err_free_netdev; + goto err_cleanup_nic; + + return netdev; + +err_cleanup_nic: + profile->cleanup(priv); + free_netdev(netdev); + + return NULL; +} + +int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +{ + const struct mlx5e_profile *profile; + struct mlx5e_priv *priv; + int err; + + priv = netdev_priv(netdev); + profile = priv->profile; + clear_bit(MLX5E_STATE_DESTROYING, &priv->state); err = mlx5e_create_umr_mkey(priv); if (err) { mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); - goto err_destroy_wq; + goto out; } err = profile->init_tx(priv); @@ -3453,20 +3847,16 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, mlx5e_set_dev_port_mtu(netdev); - err = register_netdev(netdev); - if (err) { - mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_dealloc_q_counters; - } - if (profile->enable) profile->enable(priv); - return priv; + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + netif_device_attach(netdev); + rtnl_unlock(); -err_dealloc_q_counters: - mlx5e_destroy_q_counter(priv); - profile->cleanup_rx(priv); + return 0; err_close_drop_rq: mlx5e_close_drop_rq(priv); @@ -3477,13 +3867,8 @@ err_cleanup_tx: err_destroy_umr_mkey: mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); -err_destroy_wq: - destroy_workqueue(priv->wq); - -err_free_netdev: - free_netdev(netdev); - - return NULL; +out: + return err; } static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) @@ -3505,20 +3890,84 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) rep.unload = mlx5e_vport_rep_unload; rep.vport = vport; ether_addr_copy(rep.hw_id, mac); - mlx5_eswitch_register_vport_rep(esw, &rep); + mlx5_eswitch_register_vport_rep(esw, vport, &rep); } } +void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + const struct mlx5e_profile *profile = priv->profile; + + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (profile->disable) + profile->disable(priv); + + flush_workqueue(priv->wq); + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_close(netdev); + netif_device_detach(netdev); + rtnl_unlock(); + + mlx5e_destroy_q_counter(priv); + profile->cleanup_rx(priv); + mlx5e_close_drop_rq(priv); + profile->cleanup_tx(priv); + mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); + cancel_delayed_work_sync(&priv->update_stats_work); +} + +/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying + * hardware contexts and to connect it to the current netdev. + */ +static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + int err; + + if (netif_device_present(netdev)) + return 0; + + err = mlx5e_create_mdev_resources(mdev); + if (err) + return err; + + err = mlx5e_attach_netdev(mdev, netdev); + if (err) { + mlx5e_destroy_mdev_resources(mdev); + return err; + } + + return 0; +} + +static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + + if (!netif_device_present(netdev)) + return; + + mlx5e_detach_netdev(mdev, netdev); + mlx5e_destroy_mdev_resources(mdev); +} + static void *mlx5e_add(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); void *ppriv = NULL; - void *ret; - - if (mlx5e_check_required_hca_cap(mdev)) - return NULL; + void *priv; + int vport; + int err; + struct net_device *netdev; - if (mlx5e_create_mdev_resources(mdev)) + err = mlx5e_check_required_hca_cap(mdev); + if (err) return NULL; mlx5e_register_vport_rep(mdev); @@ -3526,12 +3975,39 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) if (MLX5_CAP_GEN(mdev, vport_group_manager)) ppriv = &esw->offloads.vport_reps[0]; - ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); - if (!ret) { - mlx5e_destroy_mdev_resources(mdev); - return NULL; + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); + if (!netdev) { + mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); + goto err_unregister_reps; + } + + priv = netdev_priv(netdev); + + err = mlx5e_attach(mdev, priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err); + goto err_destroy_netdev; + } + + err = register_netdev(netdev); + if (err) { + mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + goto err_detach; } - return ret; + + return priv; + +err_detach: + mlx5e_detach(mdev, priv); + +err_destroy_netdev: + mlx5e_destroy_netdev(mdev, priv); + +err_unregister_reps: + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); + + return NULL; } void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) @@ -3539,30 +4015,11 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) const struct mlx5e_profile *profile = priv->profile; struct net_device *netdev = priv->netdev; - set_bit(MLX5E_STATE_DESTROYING, &priv->state); - if (profile->disable) - profile->disable(priv); - - flush_workqueue(priv->wq); - if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { - netif_device_detach(netdev); - mlx5e_close(netdev); - } else { - unregister_netdev(netdev); - } - - mlx5e_destroy_q_counter(priv); - profile->cleanup_rx(priv); - mlx5e_close_drop_rq(priv); - profile->cleanup_tx(priv); - mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); - cancel_delayed_work_sync(&priv->update_stats_work); + unregister_netdev(netdev); destroy_workqueue(priv->wq); if (profile->cleanup) profile->cleanup(priv); - - if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) - free_netdev(netdev); + free_netdev(netdev); } static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) @@ -3572,12 +4029,11 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) struct mlx5e_priv *priv = vpriv; int vport; - mlx5e_destroy_netdev(mdev, priv); - for (vport = 1; vport < total_vfs; vport++) mlx5_eswitch_unregister_vport_rep(esw, vport); - mlx5e_destroy_mdev_resources(mdev); + mlx5e_detach(mdev, vpriv); + mlx5e_destroy_netdev(mdev, priv); } static void *mlx5e_get_netdev(void *vpriv) @@ -3590,6 +4046,8 @@ static void *mlx5e_get_netdev(void *vpriv) static struct mlx5_interface mlx5e_interface = { .add = mlx5e_add, .remove = mlx5e_remove, + .attach = mlx5e_attach, + .detach = mlx5e_detach, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 29db4735182a..3c97da103d30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -413,19 +413,50 @@ static struct mlx5e_profile mlx5e_rep_profile = { int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { - rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); - if (!rep->priv_data) { - mlx5_core_warn(esw->dev, "Failed to create representor for vport %d\n", - rep->vport); + struct net_device *netdev; + int err; + + netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + if (!netdev) { + pr_warn("Failed to create representor netdev for vport %d\n", + rep->vport); return -EINVAL; } + + rep->priv_data = netdev_priv(netdev); + + err = mlx5e_attach_netdev(esw->dev, netdev); + if (err) { + pr_warn("Failed to attach representor netdev for vport %d\n", + rep->vport); + goto err_destroy_netdev; + } + + err = register_netdev(netdev); + if (err) { + pr_warn("Failed to register representor netdev for vport %d\n", + rep->vport); + goto err_detach_netdev; + } + return 0; + +err_detach_netdev: + mlx5e_detach_netdev(esw->dev, netdev); + +err_destroy_netdev: + mlx5e_destroy_netdev(esw->dev, rep->priv_data); + + return err; + } void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct mlx5e_priv *priv = rep->priv_data; + struct net_device *netdev = priv->netdev; + mlx5e_detach_netdev(esw->dev, netdev); mlx5e_destroy_netdev(esw->dev, priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index b6f8ebbdb487..c6de6fba5843 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -36,6 +36,7 @@ #include <net/busy_poll.h> #include "en.h" #include "en_tc.h" +#include "eswitch.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -179,96 +180,111 @@ unlock: mutex_unlock(&priv->state_lock); } -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) -{ - struct sk_buff *skb; - dma_addr_t dma_addr; +#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) - skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz); - if (unlikely(!skb)) - return -ENOMEM; +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); - dma_addr = dma_map_single(rq->pdev, - /* hw start padding */ - skb->data, - /* hw end padding */ - rq->wqe_sz, - DMA_FROM_DEVICE); + if (tail_next == cache->head) { + rq->stats.cache_full++; + return false; + } - if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) - goto err_free_skb; + cache->page_cache[cache->tail] = *dma_info; + cache->tail = tail_next; + return true; +} - *((dma_addr_t *)skb->cb) = dma_addr; - wqe->data.addr = cpu_to_be64(dma_addr); - wqe->data.lkey = rq->mkey_be; +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; - rq->skb[ix] = skb; + if (unlikely(cache->head == cache->tail)) { + rq->stats.cache_empty++; + return false; + } - return 0; + if (page_ref_count(cache->page_cache[cache->head].page) != 1) { + rq->stats.cache_busy++; + return false; + } -err_free_skb: - dev_kfree_skb(skb); + *dma_info = cache->page_cache[cache->head]; + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); + rq->stats.cache_reuse++; - return -ENOMEM; + dma_sync_single_for_device(rq->pdev, dma_info->addr, + RQ_PAGE_SIZE(rq), + DMA_FROM_DEVICE); + return true; } -void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) +static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) { - struct sk_buff *skb = rq->skb[ix]; + struct page *page; + + if (mlx5e_rx_cache_get(rq, dma_info)) + return 0; + + page = dev_alloc_pages(rq->buff.page_order); + if (unlikely(!page)) + return -ENOMEM; - if (skb) { - rq->skb[ix] = NULL; - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); - dev_kfree_skb(skb); + dma_info->page = page; + dma_info->addr = dma_map_page(rq->pdev, page, 0, + RQ_PAGE_SIZE(rq), rq->buff.map_dir); + if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { + put_page(page); + return -ENOMEM; } + + return 0; } -static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle) { - return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; + if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) + return; + + dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq), + rq->buff.map_dir); + put_page(dma_info->page); } -static inline void -mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len) +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { - dma_sync_single_for_cpu(pdev, wi->dma_info.addr + wqe_offset, - len, DMA_FROM_DEVICE); + struct mlx5e_dma_info *di = &rq->dma_info[ix]; + + if (unlikely(mlx5e_page_alloc_mapped(rq, di))) + return -ENOMEM; + + wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM); + return 0; } -static inline void -mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len) +void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { - /* No dma pre sync for fragmented MPWQE */ + struct mlx5e_dma_info *di = &rq->dma_info[ix]; + + mlx5e_page_release(rq, di, true); } -static inline void -mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, - u32 len) +static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) { - unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); - - wi->skbs_frags[page_idx]++; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - &wi->dma_info.page[page_idx], frag_offset, - len, truesize); + return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; } -static inline void -mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, - u32 len) +static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) { unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); @@ -282,24 +298,11 @@ mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq, } static inline void -mlx5e_copy_skb_header_linear_mpwqe(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen) -{ - struct page *page = &wi->dma_info.page[page_idx]; - - skb_copy_to_linear_data(skb, page_address(page) + offset, - ALIGN(headlen, sizeof(long))); -} - -static inline void -mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen) +mlx5e_copy_skb_header_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) { u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx]; @@ -324,46 +327,9 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, } } -static u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) -{ - return rq->mpwqe_mtt_offset + - wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); -} - -static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, - struct mlx5e_sq *sq, - struct mlx5e_umr_wqe *wqe, - u16 ix) -{ - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; - struct mlx5_wqe_data_seg *dseg = &wqe->data; - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); - u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); - - memset(wqe, 0, sizeof(*wqe)); - cseg->opmod_idx_opcode = - cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | - MLX5_OPCODE_UMR); - cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | - ds_cnt); - cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - cseg->imm = rq->umr_mkey_be; - - ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; - ucseg->klm_octowords = - cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); - ucseg->bsf_octowords = - cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); - ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - - dseg->lkey = sq->mkey_be; - dseg->addr = cpu_to_be64(wi->umr.mtt_addr); -} - -static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) +static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) { + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; struct mlx5e_sq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *wqe; @@ -372,135 +338,74 @@ static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) /* fill sq edge with nops to avoid wqe wrap around */ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); } wqe = mlx5_wq_cyc_get_wqe(wq, pi); - mlx5e_build_umr_wqe(rq, sq, wqe, ix); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; - sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; + memcpy(wqe, &wi->umr.wqe, sizeof(*wqe)); + wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline int mlx5e_get_wqe_mtt_sz(void) -{ - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. - * To avoid copying garbage after the mtt array, we allocate - * a little more. - */ - return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), - MLX5_UMR_MTT_ALIGNMENT); -} - -static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi, - int i) +static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) { - struct page *page; - - page = dev_alloc_page(); - if (unlikely(!page)) - return -ENOMEM; - - wi->umr.dma_info[i].page = page; - wi->umr.dma_info[i].addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->umr.dma_info[i].addr))) { - put_page(page); - return -ENOMEM; - } - wi->umr.mtt[i] = cpu_to_be64(wi->umr.dma_info[i].addr | MLX5_EN_WR); - - return 0; -} - -static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, - u16 ix) -{ - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - int mtt_sz = mlx5e_get_wqe_mtt_sz(); + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; + int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + int err; int i; - wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) * - MLX5_MPWRQ_PAGES_PER_WQE, - GFP_ATOMIC); - if (unlikely(!wi->umr.dma_info)) - goto err_out; - - /* We allocate more than mtt_sz as we will align the pointer */ - wi->umr.mtt_no_align = kzalloc(mtt_sz + MLX5_UMR_ALIGN - 1, - GFP_ATOMIC); - if (unlikely(!wi->umr.mtt_no_align)) - goto err_free_umr; - - wi->umr.mtt = PTR_ALIGN(wi->umr.mtt_no_align, MLX5_UMR_ALIGN); - wi->umr.mtt_addr = dma_map_single(rq->pdev, wi->umr.mtt, mtt_sz, - PCI_DMA_TODEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->umr.mtt_addr))) - goto err_free_mtt; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i))) + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; + + err = mlx5e_page_alloc_mapped(rq, dma_info); + if (unlikely(err)) goto err_unmap; - page_ref_add(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq)); + wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR); + page_ref_add(dma_info->page, pg_strides); wi->skbs_frags[i] = 0; } wi->consumed_strides = 0; - wi->dma_pre_sync = mlx5e_dma_pre_sync_fragmented_mpwqe; - wi->add_skb_frag = mlx5e_add_skb_frag_fragmented_mpwqe; - wi->copy_skb_header = mlx5e_copy_skb_header_fragmented_mpwqe; - wi->free_wqe = mlx5e_free_rx_fragmented_mpwqe; - wqe->data.lkey = rq->umr_mkey_be; wqe->data.addr = cpu_to_be64(dma_offset); return 0; err_unmap: while (--i >= 0) { - dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - page_ref_sub(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq)); - put_page(wi->umr.dma_info[i].page); - } - dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); - -err_free_mtt: - kfree(wi->umr.mtt_no_align); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; -err_free_umr: - kfree(wi->umr.dma_info); + page_ref_sub(dma_info->page, pg_strides); + mlx5e_page_release(rq, dma_info, true); + } -err_out: - return -ENOMEM; + return err; } -void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi) +void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) { - int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int pg_strides = mlx5e_mpwqe_strides_per_page(rq); int i; for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - page_ref_sub(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]); - put_page(wi->umr.dma_info[i].page); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; + + page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); + mlx5e_page_release(rq, dma_info, true); } - dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); - kfree(wi->umr.mtt_no_align); - kfree(wi->umr.dma_info); } -void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) +void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); @@ -508,12 +413,11 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { - mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); return; } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); - rq->stats.mpwqe_frag++; /* ensure wqes are visible to device before updating doorbell record */ dma_wmb(); @@ -521,84 +425,23 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, - u16 ix) -{ - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - gfp_t gfp_mask; - int i; - - gfp_mask = GFP_ATOMIC | __GFP_COLD | __GFP_MEMALLOC; - wi->dma_info.page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, - MLX5_MPWRQ_WQE_PAGE_ORDER); - if (unlikely(!wi->dma_info.page)) - return -ENOMEM; - - wi->dma_info.addr = dma_map_page(rq->pdev, wi->dma_info.page, 0, - rq->wqe_sz, PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->dma_info.addr))) { - put_page(wi->dma_info.page); - return -ENOMEM; - } - - /* We split the high-order page into order-0 ones and manage their - * reference counter to minimize the memory held by small skb fragments - */ - split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - page_ref_add(&wi->dma_info.page[i], - mlx5e_mpwqe_strides_per_page(rq)); - wi->skbs_frags[i] = 0; - } - - wi->consumed_strides = 0; - wi->dma_pre_sync = mlx5e_dma_pre_sync_linear_mpwqe; - wi->add_skb_frag = mlx5e_add_skb_frag_linear_mpwqe; - wi->copy_skb_header = mlx5e_copy_skb_header_linear_mpwqe; - wi->free_wqe = mlx5e_free_rx_linear_mpwqe; - wqe->data.lkey = rq->mkey_be; - wqe->data.addr = cpu_to_be64(wi->dma_info.addr); - - return 0; -} - -void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi) -{ - int i; - - dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, - PCI_DMA_FROMDEVICE); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - page_ref_sub(&wi->dma_info.page[i], - mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]); - put_page(&wi->dma_info.page[i]); - } -} - int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { int err; - err = mlx5e_alloc_rx_linear_mpwqe(rq, wqe, ix); - if (unlikely(err)) { - err = mlx5e_alloc_rx_fragmented_mpwqe(rq, wqe, ix); - if (unlikely(err)) - return err; - set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); - mlx5e_post_umr_wqe(rq, ix); - return -EBUSY; - } - - return 0; + err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix); + if (unlikely(err)) + return err; + set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + mlx5e_post_umr_wqe(rq, ix); + return -EBUSY; } void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - wi->free_wqe(rq, wi); + mlx5e_free_rx_mpwqe(rq, wi); } #define RQ_CANNOT_POST(rq) \ @@ -617,9 +460,10 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) int err; err = rq->alloc_wqe(rq, wqe, wq->head); + if (err == -EBUSY) + return true; if (unlikely(err)) { - if (err != -EBUSY) - rq->stats.buff_alloc_err++; + rq->stats.buff_alloc_err++; break; } @@ -637,24 +481,32 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { - struct ethhdr *eth = (struct ethhdr *)(skb->data); - struct iphdr *ipv4 = (struct iphdr *)(skb->data + ETH_HLEN); - struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + ETH_HLEN); + struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct iphdr *ipv4; + struct ipv6hdr *ipv6; struct tcphdr *tcp; + int network_depth = 0; + __be16 proto; + u16 tot_len; u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); - u16 tot_len = cqe_bcnt - ETH_HLEN; + skb->mac_len = ETH_HLEN; + proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); - if (eth->h_proto == htons(ETH_P_IP)) { - tcp = (struct tcphdr *)(skb->data + ETH_HLEN + + ipv4 = (struct iphdr *)(skb->data + network_depth); + ipv6 = (struct ipv6hdr *)(skb->data + network_depth); + tot_len = cqe_bcnt - network_depth; + + if (proto == htons(ETH_P_IP)) { + tcp = (struct tcphdr *)(skb->data + network_depth + sizeof(struct iphdr)); ipv6 = NULL; skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; } else { - tcp = (struct tcphdr *)(skb->data + ETH_HLEN + + tcp = (struct tcphdr *)(skb->data + network_depth + sizeof(struct ipv6hdr)); ipv4 = NULL; skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; @@ -778,40 +630,207 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, rq->stats.packets++; rq->stats.bytes += cqe_bcnt; mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); - napi_gro_receive(rq->cq.napi, skb); +} + +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_wqe *wqe; + u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */ + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); +} + +static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, + unsigned int data_offset, + int len) +{ + struct mlx5e_sq *sq = &rq->channel->xdp_sq; + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_sq_wqe_info *wi = &sq->db.xdp.wqe_info[pi]; + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + dma_addr_t dma_addr = di->addr + data_offset + MLX5E_XDP_MIN_INLINE; + unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE; + void *data = page_address(di->page) + data_offset; + + if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { + if (sq->db.xdp.doorbell) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->db.xdp.doorbell = false; + } + rq->stats.xdp_tx_full++; + mlx5e_page_release(rq, di, true); + return; + } + + dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, + PCI_DMA_TODEVICE); + + memset(wqe, 0, sizeof(*wqe)); + + /* copy the inline part */ + memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE); + eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + + dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1); + + /* write the dma part */ + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT); + + sq->db.xdp.di[pi] = *di; + wi->opcode = MLX5_OPCODE_SEND; + wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; + sq->pc += MLX5E_XDP_TX_WQEBBS; + + sq->db.xdp.doorbell = true; + rq->stats.xdp_tx++; +} + +/* returns true if packet was consumed by xdp */ +static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, + const struct bpf_prog *prog, + struct mlx5e_dma_info *di, + void *data, u16 len) +{ + struct xdp_buff xdp; + u32 act; + + if (!prog) + return false; + + xdp.data = data; + xdp.data_end = xdp.data + len; + act = bpf_prog_run_xdp(prog, &xdp); + switch (act) { + case XDP_PASS: + return false; + case XDP_TX: + mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len); + return true; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + return true; + } +} + +static inline +struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + u16 wqe_counter, u32 cqe_bcnt) +{ + struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog); + struct mlx5e_dma_info *di; + struct sk_buff *skb; + void *va, *data; + + di = &rq->dma_info[wqe_counter]; + va = page_address(di->page); + data = va + MLX5_RX_HEADROOM; + + dma_sync_single_range_for_cpu(rq->pdev, + di->addr, + MLX5_RX_HEADROOM, + rq->buff.wqe_sz, + DMA_FROM_DEVICE); + prefetch(data); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + mlx5e_page_release(rq, di, true); + return NULL; + } + + if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt)) + return NULL; /* page/packet was consumed by XDP */ + + skb = build_skb(va, RQ_PAGE_SIZE(rq)); + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; + mlx5e_page_release(rq, di, true); + return NULL; + } + + /* queue up for recycling ..*/ + page_ref_inc(di->page); + mlx5e_page_release(rq, di, true); + + skb_reserve(skb, MLX5_RX_HEADROOM); + skb_put(skb, cqe_bcnt); + + return skb; } void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct mlx5e_rx_wqe *wqe; - struct sk_buff *skb; __be16 wqe_counter_be; + struct sk_buff *skb; u16 wqe_counter; u32 cqe_bcnt; wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); - skb = rq->skb[wqe_counter]; - prefetch(skb->data); - rq->skb[wqe_counter] = NULL; + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { - rq->stats.wqe_err++; - dev_kfree_skb(skb); + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) goto wq_ll_pop; - } - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb_put(skb, cqe_bcnt); + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct net_device *netdev = rq->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rx_wqe *wqe; + struct sk_buff *skb; + __be16 wqe_counter_be; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) + goto wq_ll_pop; mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (rep->vlan && skb_vlan_tag_present(skb)) + skb_vlan_pop(skb); + + napi_gro_receive(rq->cq.napi, skb); + wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); @@ -823,7 +842,6 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, u32 cqe_bcnt, struct sk_buff *skb) { - u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz); u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); @@ -837,21 +855,20 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, page_idx++; frag_offset -= PAGE_SIZE; } - wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes); while (byte_cnt) { u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); - wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset, - pg_consumed_bytes); + mlx5e_add_skb_frag_mpwqe(rq, skb, wi, page_idx, frag_offset, + pg_consumed_bytes); byte_cnt -= pg_consumed_bytes; frag_offset = 0; page_idx++; } /* copy header */ - wi->copy_skb_header(rq->pdev, skb, wi, head_page_idx, head_offset, - headlen); + mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, wi, head_page_idx, + head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -861,7 +878,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); - struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); struct sk_buff *skb; u16 cqe_bcnt; @@ -891,18 +908,20 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); mpwrq_cqe_out: if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) return; - wi->free_wqe(rq, wi); + mlx5e_free_rx_mpwqe(rq, wi); mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index); } int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); + struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq; int work_done = 0; if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) @@ -929,6 +948,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } + if (xdp_sq->db.xdp.doorbell) { + mlx5e_xmit_xdp_doorbell(xdp_sq); + xdp_sq->db.xdp.doorbell = false; + } + mlx5_cqwq_update_db_record(&cq->wq); /* ensure cq space is freed before enabling more cqes */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 499487ce3b53..57452fdc5154 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -65,6 +65,9 @@ struct mlx5e_sw_stats { u64 rx_csum_none; u64 rx_csum_complete; u64 rx_csum_unnecessary_inner; + u64 rx_xdp_drop; + u64 rx_xdp_tx; + u64 rx_xdp_tx_full; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -73,10 +76,13 @@ struct mlx5e_sw_stats { u64 tx_xmit_more; u64 rx_wqe_err; u64 rx_mpwqe_filler; - u64 rx_mpwqe_frag; u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; + u64 rx_cache_reuse; + u64 rx_cache_full; + u64 rx_cache_empty; + u64 rx_cache_busy; /* Special handling counters */ u64 link_down_events_phy; @@ -97,6 +103,9 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -105,10 +114,13 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; @@ -272,12 +284,18 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 xdp_drop; + u64 xdp_tx; + u64 xdp_tx_full; u64 wqe_err; u64 mpwqe_filler; - u64 mpwqe_frag; u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; + u64 cache_reuse; + u64 cache_full; + u64 cache_empty; + u64 cache_busy; }; static const struct counter_desc rq_stats_desc[] = { @@ -286,14 +304,20 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, }; struct mlx5e_sq_stats { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 22cfc4ac1837..a350b7171e3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -39,6 +39,7 @@ #include <linux/rhashtable.h> #include <net/switchdev.h> #include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_vlan.h> #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -47,6 +48,7 @@ struct mlx5e_tc_flow { struct rhash_head node; u64 cookie; struct mlx5_flow_rule *rule; + struct mlx5_esw_flow_attr *attr; }; #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 @@ -114,27 +116,30 @@ err_create_ft: static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - u32 action, u32 dst_vport) + struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; - u32 src_vport; + int err; - if (rep->vport) /* set source vport for the flow */ - src_vport = rep->vport; - else - src_vport = FDB_UPLINK_VPORT; + err = mlx5_eswitch_add_vlan_action(esw, attr); + if (err) + return ERR_PTR(err); - return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, - struct mlx5_flow_rule *rule) + struct mlx5_flow_rule *rule, + struct mlx5_esw_flow_attr *attr) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_fc *counter = NULL; counter = mlx5_flow_rule_counter(rule); + if (esw && esw->mode == SRIOV_OFFLOADS) + mlx5_eswitch_del_vlan_action(esw, attr); + mlx5_del_flow_rule(rule); mlx5_fc_destroy(priv->mdev, counter); @@ -159,6 +164,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS))) { @@ -222,6 +228,24 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec key->src); } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + if (mask->vlan_id) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + } + } + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { struct flow_dissector_key_ipv4_addrs *key = skb_flow_dissector_target(f->dissector, @@ -361,7 +385,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *dest_vport) + struct mlx5_esw_flow_attr *attr) { const struct tc_action *a; LIST_HEAD(actions); @@ -369,17 +393,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (tc_no_actions(exts)) return -EINVAL; - *action = 0; + memset(attr, 0, sizeof(*attr)); + attr->in_rep = priv->ppriv; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { - /* Only support a single action per rule */ - if (*action) - return -EINVAL; - if (is_tcf_gact_shot(a)) { - *action = MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; continue; } @@ -387,7 +408,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; struct mlx5e_priv *out_priv; - struct mlx5_eswitch_rep *out_rep; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -397,13 +417,22 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; out_priv = netdev_priv(out_dev); - out_rep = out_priv->ppriv; - if (out_rep->vport == 0) - *dest_vport = FDB_UPLINK_VPORT; - else - *dest_vport = out_rep->vport; - *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->out_rep = out_priv->ppriv; + continue; + } + + if (is_tcf_vlan(a)) { + if (tcf_vlan_action(a) == VLAN_F_POP) { + attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + } else if (tcf_vlan_action(a) == VLAN_F_PUSH) { + if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q)) + return -EOPNOTSUPP; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + attr->vlan = tcf_vlan_push_vid(a); + } continue; } @@ -417,18 +446,29 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, { struct mlx5e_tc_table *tc = &priv->fs.tc; int err = 0; - u32 flow_tag, action, dest_vport = 0; + bool fdb_flow = false; + u32 flow_tag, action; struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; + struct mlx5_esw_flow_attr *old_attr; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + if (esw && esw->mode == SRIOV_OFFLOADS) + fdb_flow = true; + flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, tc->ht_params); - if (flow) + if (flow) { old = flow->rule; - else - flow = kzalloc(sizeof(*flow), GFP_KERNEL); + old_attr = flow->attr; + } else { + if (fdb_flow) + flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr), + GFP_KERNEL); + else + flow = kzalloc(sizeof(*flow), GFP_KERNEL); + } spec = mlx5_vzalloc(sizeof(*spec)); if (!spec || !flow) { @@ -442,11 +482,12 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err < 0) goto err_free; - if (esw && esw->mode == SRIOV_OFFLOADS) { - err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport); + if (fdb_flow) { + flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); + err = parse_tc_fdb_actions(priv, f->exts, flow->attr); if (err < 0) goto err_free; - flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport); + flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr); } else { err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); if (err < 0) @@ -465,7 +506,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, goto err_del_rule; if (old) - mlx5e_tc_del_flow(priv, old); + mlx5e_tc_del_flow(priv, old, old_attr); goto out; @@ -493,7 +534,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); - mlx5e_tc_del_flow(priv, flow->rule); + mlx5e_tc_del_flow(priv, flow->rule, flow->attr); kfree(flow); @@ -550,7 +591,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg) struct mlx5e_tc_flow *flow = ptr; struct mlx5e_priv *priv = arg; - mlx5e_tc_del_flow(priv, flow->rule); + mlx5e_tc_del_flow(priv, flow->rule, flow->attr); kfree(flow); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 988eca99ee0f..70a717382357 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -52,7 +52,6 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); - sq->skb[pi] = NULL; sq->pc++; sq->stats.nop++; @@ -82,15 +81,17 @@ static inline void mlx5e_dma_push(struct mlx5e_sq *sq, u32 size, enum mlx5e_dma_map_type map_type) { - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type; + u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask; + + sq->db.txq.dma_fifo[i].addr = addr; + sq->db.txq.dma_fifo[i].size = size; + sq->db.txq.dma_fifo[i].type = map_type; sq->dma_fifo_pc++; } static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) { - return &sq->dma_fifo[i & sq->dma_fifo_mask]; + return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask]; } static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma) @@ -221,7 +222,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_tx_wqe_info *wi = &sq->wqe_info[pi]; + struct mlx5e_tx_wqe_info *wi = &sq->db.txq.wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; @@ -341,7 +342,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->skb[pi] = skb; + sq->db.txq.skb[pi] = skb; wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += wi->num_wqebbs; @@ -356,6 +357,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) sq->stats.stopped++; } + sq->stats.xmit_more += skb->xmit_more; if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { int bf_sz = 0; @@ -367,15 +369,16 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) } /* fill sq edge with nops to avoid wqe wrap around */ - while ((sq->pc & wq->sz_m1) > sq->edge) + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.txq.skb[pi] = NULL; mlx5e_send_nop(sq, false); + } if (bf) sq->bf_budget--; sq->stats.packets++; sq->stats.bytes += num_bytes; - sq->stats.xmit_more += skb->xmit_more; return NETDEV_TX_OK; dma_unmap_wqe_err: @@ -442,8 +445,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) last_wqe = (sqcc == wqe_counter); ci = sqcc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (unlikely(!skb)) { /* nop */ sqcc++; @@ -492,7 +495,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; @@ -501,8 +504,8 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (!skb) { /* nop */ sq->cc++; @@ -520,3 +523,37 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) sq->cc += wi->num_wqebbs; } } + +static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (wi->opcode == MLX5_OPCODE_NOP) { + sq->cc++; + continue; + } + + sq->cc += wi->num_wqebbs; + + mlx5e_page_release(&sq->channel->rq, di, false); + } +} + +void mlx5e_free_sq_descs(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_txq_sq_descs(sq); + break; + case MLX5E_SQ_XDP: + mlx5e_free_xdp_sq_descs(sq); + break; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 9bf33bb69210..5703f19a6a24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -72,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) do { u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci]; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; mlx5_cqwq_pop(&cq->wq); sqcc += icowi->num_wqebbs; @@ -87,7 +87,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) case MLX5_OPCODE_NOP: break; case MLX5_OPCODE_UMR: - mlx5e_post_rx_fragmented_mpwqe(&sq->channel->rq); + mlx5e_post_rx_mpwqe(&sq->channel->rq); break; default: WARN_ONCE(true, @@ -105,6 +105,66 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) sq->cc = sqcc; } +static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_sq *sq; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_sq, cq); + + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + return false; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { + struct mlx5_cqe64 *cqe; + u16 wqe_counter; + bool last_wqe; + + cqe = mlx5e_get_cqe(cq); + if (!cqe) + break; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = sqcc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) { + sqcc++; + continue; + } + + sqcc += wi->num_wqebbs; + /* Recycle RX page */ + mlx5e_page_release(&sq->channel->rq, di, true); + } while (!last_wqe); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -121,6 +181,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; + if (c->xdp) + busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq); + mlx5e_poll_ico_cq(&c->icosq.cq); busy |= mlx5e_post_rx_wqes(&c->rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 101430571d6d..abbf2c369923 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -81,9 +81,6 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); - static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { @@ -116,57 +113,6 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, } /* E-Switch vport context HW commands */ -static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport, - u32 *out, int outlen) -{ - u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {0}; - - MLX5_SET(query_nic_vport_context_in, in, opcode, - MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); - MLX5_SET(query_esw_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_esw_vport_context_in, in, other_vport, 1); - return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); -} - -static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, - u16 *vlan, u8 *qos) -{ - u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {0}; - int err; - bool cvlan_strip; - bool cvlan_insert; - - *vlan = 0; - *qos = 0; - - if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || - !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) - return -ENOTSUPP; - - err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out)); - if (err) - goto out; - - cvlan_strip = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.vport_cvlan_strip); - - cvlan_insert = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.vport_cvlan_insert); - - if (cvlan_strip || cvlan_insert) { - *vlan = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.cvlan_id); - *qos = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.cvlan_pcp); - } - - esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n", - vport, *vlan, *qos); -out: - return err; -} - static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, void *in, int inlen) { @@ -181,7 +127,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, } static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, - u16 vlan, u8 qos, bool set) + u16 vlan, u8 qos, u8 set_flags) { u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; @@ -189,14 +135,18 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) return -ENOTSUPP; - esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", - vport, vlan, qos, set); - if (set) { + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n", + vport, vlan, qos, set_flags); + + if (set_flags & SET_VLAN_STRIP) MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.vport_cvlan_strip, 1); + + if (set_flags & SET_VLAN_INSERT) { /* insert only if no vlan in packet */ MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.vport_cvlan_insert, 1); + MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.cvlan_pcp, qos); MLX5_SET(modify_esw_vport_context_in, in, @@ -921,7 +871,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n", vport_num, promisc_all, promisc_mc); - if (!vport->trusted || !vport->enabled) { + if (!vport->info.trusted || !vport->enabled) { promisc_uc = 0; promisc_mc = 0; promisc_all = 0; @@ -1257,30 +1207,20 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { struct mlx5_flow_spec *spec; - u8 smac[ETH_ALEN]; int err = 0; u8 *smac_v; - if (vport->spoofchk) { - err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, smac); - if (err) { - esw_warn(esw->dev, - "vport[%d] configure ingress rules failed, query smac failed, err(%d)\n", - vport->vport, err); - return err; - } + if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) { + mlx5_core_warn(esw->dev, + "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", + vport->vport); + return -EPERM; - if (!is_valid_ether_addr(smac)) { - mlx5_core_warn(esw->dev, - "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", - vport->vport); - return -EPERM; - } } esw_vport_cleanup_ingress_rules(esw, vport); - if (!vport->vlan && !vport->qos && !vport->spoofchk) { + if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { esw_vport_disable_ingress_acl(esw, vport); return 0; } @@ -1289,7 +1229,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, esw_debug(esw->dev, "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", - vport->vport, vport->vlan, vport->qos); + vport->vport, vport->info.vlan, vport->info.qos); spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) { @@ -1299,16 +1239,16 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, goto out; } - if (vport->vlan || vport->qos) + if (vport->info.vlan || vport->info.qos) MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); - if (vport->spoofchk) { + if (vport->info.spoofchk) { MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); smac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16); - ether_addr_copy(smac_v, smac); + ether_addr_copy(smac_v, vport->info.mac); } spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; @@ -1354,7 +1294,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, esw_vport_cleanup_egress_rules(esw, vport); - if (!vport->vlan && !vport->qos) { + if (!vport->info.vlan && !vport->info.qos) { esw_vport_disable_egress_acl(esw, vport); return 0; } @@ -1363,7 +1303,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, esw_debug(esw->dev, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", - vport->vport, vport->vlan, vport->qos); + vport->vport, vport->info.vlan, vport->info.qos); spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) { @@ -1377,7 +1317,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; vport->egress.allowed_vlan = @@ -1411,6 +1351,41 @@ out: return err; } +static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + +static void esw_apply_vport_conf(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int vport_num = vport->vport; + + if (!vport_num) + return; + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + vport->info.link_state); + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid); + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, + (vport->info.vlan || vport->info.qos)); + + /* Only legacy mode needs ACLs */ + if (esw->mode == SRIOV_LEGACY) { + esw_vport_ingress_config(esw, vport); + esw_vport_egress_config(esw, vport); + } +} static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, int enable_events) { @@ -1421,23 +1396,17 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); - /* Only VFs need ACLs for VST and spoofchk filtering */ - if (vport_num && esw->mode == SRIOV_LEGACY) { - esw_vport_ingress_config(esw, vport); - esw_vport_egress_config(esw, vport); - } - - mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport_num, - MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + /* Restore old vport configuration */ + esw_apply_vport_conf(esw, vport); /* Sync with current vport context */ vport->enabled_events = enable_events; vport->enabled = true; /* only PF is trusted by default */ - vport->trusted = (vport_num) ? false : true; + if (!vport_num) + vport->info.trusted = true; + esw_vport_change_handle_locked(vport); esw->enabled_vports++; @@ -1457,11 +1426,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) vport->enabled = false; synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC)); - - mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport_num, - MLX5_ESW_VPORT_ADMIN_STATE_DOWN); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ @@ -1473,7 +1437,12 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; + if (vport_num && esw->mode == SRIOV_LEGACY) { + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } @@ -1524,6 +1493,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) abort: esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + esw->mode = SRIOV_NONE; return err; } @@ -1559,6 +1529,25 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_enable_vport(esw, 0, UC_ADDR_CHANGE); } +void mlx5_eswitch_attach(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + /* VF Vports will be enabled when SRIOV is enabled */ +} + +void mlx5_eswitch_detach(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_disable_vport(esw, 0); +} + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); @@ -1626,6 +1615,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) struct mlx5_vport *vport = &esw->vports[vport_num]; vport->vport = vport_num; + vport->info.link_state = MLX5_ESW_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); @@ -1636,8 +1626,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->mode = SRIOV_NONE; dev->priv.eswitch = esw; - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); - /* VF Vports will be enabled when SRIOV is enabled */ return 0; abort: if (esw->work_queue) @@ -1656,7 +1644,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) return; esw_info(esw->dev, "cleanup\n"); - esw_disable_vport(esw, 0); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); @@ -1689,18 +1676,6 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) -static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) -{ - ((u8 *)node_guid)[7] = mac[0]; - ((u8 *)node_guid)[6] = mac[1]; - ((u8 *)node_guid)[5] = mac[2]; - ((u8 *)node_guid)[4] = 0xff; - ((u8 *)node_guid)[3] = 0xfe; - ((u8 *)node_guid)[2] = mac[3]; - ((u8 *)node_guid)[1] = mac[4]; - ((u8 *)node_guid)[0] = mac[5]; -} - int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]) { @@ -1713,13 +1688,15 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - if (evport->spoofchk && !is_valid_ether_addr(mac)) { + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) { mlx5_core_warn(esw->dev, "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", vport); - return -EPERM; + err = -EPERM; + goto unlock; } err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); @@ -1727,7 +1704,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, mlx5_core_warn(esw->dev, "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", vport, err); - return err; + goto unlock; } node_guid_gen_from_mac(&node_guid, mac); @@ -1737,9 +1714,12 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", vport, err); - mutex_lock(&esw->state_lock); + ether_addr_copy(evport->info.mac, mac); + evport->info.node_guid = node_guid; if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); + +unlock: mutex_unlock(&esw->state_lock); return err; } @@ -1747,22 +1727,38 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int vport, int link_state) { + struct mlx5_vport *evport; + int err = 0; + if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - return mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport, link_state); + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + + err = mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport, link_state); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to set vport %d link state, err = %d", + vport, err); + goto unlock; + } + + evport->info.link_state = link_state; + +unlock: + mutex_unlock(&esw->state_lock); + return 0; } int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi) { struct mlx5_vport *evport; - u16 vlan; - u8 qos; if (!ESW_ALLOWED(esw)) return -EPERM; @@ -1774,54 +1770,61 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, memset(ivi, 0, sizeof(*ivi)); ivi->vf = vport - 1; - mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac); - ivi->linkstate = mlx5_query_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport); - query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); - ivi->vlan = vlan; - ivi->qos = qos; - ivi->spoofchk = evport->spoofchk; + mutex_lock(&esw->state_lock); + ether_addr_copy(ivi->mac, evport->info.mac); + ivi->linkstate = evport->info.link_state; + ivi->vlan = evport->info.vlan; + ivi->qos = evport->info.qos; + ivi->spoofchk = evport->info.spoofchk; + ivi->trusted = evport->info.trusted; + mutex_unlock(&esw->state_lock); return 0; } -int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos) +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags) { struct mlx5_vport *evport; int err = 0; - int set = 0; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) return -EINVAL; - if (vlan || qos) - set = 1; - + mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); if (err) - return err; + goto unlock; - mutex_lock(&esw->state_lock); - evport->vlan = vlan; - evport->qos = qos; + evport->info.vlan = vlan; + evport->info.qos = qos; if (evport->enabled && esw->mode == SRIOV_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) - goto out; + goto unlock; err = esw_vport_egress_config(esw, evport); } -out: +unlock: mutex_unlock(&esw->state_lock); return err; } +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + u8 set_flags = 0; + + if (vlan || qos) + set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; + + return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); +} + int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, int vport, bool spoofchk) { @@ -1834,16 +1837,14 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - evport = &esw->vports[vport]; - mutex_lock(&esw->state_lock); - pschk = evport->spoofchk; - evport->spoofchk = spoofchk; - if (evport->enabled && esw->mode == SRIOV_LEGACY) { + evport = &esw->vports[vport]; + pschk = evport->info.spoofchk; + evport->info.spoofchk = spoofchk; + if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); - if (err) - evport->spoofchk = pschk; - } + if (err) + evport->info.spoofchk = pschk; mutex_unlock(&esw->state_lock); return err; @@ -1859,10 +1860,9 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - evport = &esw->vports[vport]; - mutex_lock(&esw->state_lock); - evport->trusted = setting; + evport = &esw->vports[vport]; + evport->info.trusted = setting; if (evport->enabled) esw_vport_change_handle_locked(evport); mutex_unlock(&esw->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a96140971d77..2e2938e08cda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -109,6 +109,16 @@ struct vport_egress { struct mlx5_flow_rule *drop_rule; }; +struct mlx5_vport_info { + u8 mac[ETH_ALEN]; + u16 vlan; + u8 qos; + u64 node_guid; + int link_state; + bool spoofchk; + bool trusted; +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; @@ -121,10 +131,8 @@ struct mlx5_vport { struct vport_ingress ingress; struct vport_egress egress; - u16 vlan; - u8 qos; - bool spoofchk; - bool trusted; + struct mlx5_vport_info info; + bool enabled; u16 enabled_events; }; @@ -149,6 +157,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; struct mlx5_flow_rule *miss_rule; + int vlan_push_pop_refcount; } offloads; }; }; @@ -170,11 +179,14 @@ struct mlx5_eswitch_rep { void (*unload)(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); u16 vport; - struct mlx5_flow_rule *vport_rx_rule; + u8 hw_id[ETH_ALEN]; void *priv_data; + + struct mlx5_flow_rule *vport_rx_rule; struct list_head vport_sqs_list; + u16 vlan; + u32 vlan_refcount; bool valid; - u8 hw_id[ETH_ALEN]; }; struct mlx5_esw_offload { @@ -201,9 +213,14 @@ struct mlx5_eswitch { int mode; }; +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); + /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_attach(struct mlx5_eswitch *esw); +void mlx5_eswitch_detach(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); @@ -224,14 +241,32 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats); struct mlx5_flow_spec; +struct mlx5_esw_flow_attr; struct mlx5_flow_rule * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - u32 action, u32 src_vport, u32 dst_vport); + struct mlx5_esw_flow_attr *attr); struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); +enum { + SET_VLAN_STRIP = BIT(0), + SET_VLAN_INSERT = BIT(1) +}; + +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 + +struct mlx5_esw_flow_attr { + struct mlx5_eswitch_rep *in_rep; + struct mlx5_eswitch_rep *out_rep; + + int action; + u16 vlan; + bool vlan_handled; +}; + int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u16 *sqns_array, int sqns_num); @@ -241,9 +276,17 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + int vport_index, struct mlx5_eswitch_rep *rep); void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport); + int vport_index); + +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3dc83a9459a4..c55ad8d00c05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -46,19 +46,22 @@ enum { struct mlx5_flow_rule * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - u32 action, u32 src_vport, u32 dst_vport) + struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest = { 0 }; struct mlx5_fc *counter = NULL; struct mlx5_flow_rule *rule; void *misc; + int action; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); + action = attr->action; + if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport_num = dst_vport; + dest.vport_num = attr->out_rep->vport; action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw->dev, true); @@ -69,7 +72,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, src_vport); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); @@ -86,6 +89,186 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; } +static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) +{ + struct mlx5_eswitch_rep *rep; + int vf_vport, err = 0; + + esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); + for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { + rep = &esw->offloads.vport_reps[vf_vport]; + if (!rep->valid) + continue; + + err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); + if (err) + goto out; + } + +out: + return err; +} + +static struct mlx5_eswitch_rep * +esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL; + + in_rep = attr->in_rep; + out_rep = attr->out_rep; + + if (push) + vport = in_rep; + else if (pop) + vport = out_rep; + else + vport = in_rep; + + return vport; +} + +static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, + bool push, bool pop, bool fwd) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep; + + if ((push || pop) && !fwd) + goto out_notsupp; + + in_rep = attr->in_rep; + out_rep = attr->out_rep; + + if (push && in_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + if (pop && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ + if (!push && !pop && fwd) + if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* protects against (1) setting rules with different vlans to push and + * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0) + */ + if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan)) + goto out_notsupp; + + return 0; + +out_notsupp: + return -ENOTSUPP; +} + +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + err = esw_add_vlan_action_check(attr, push, pop, fwd); + if (err) + return err; + + attr->vlan_handled = false; + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep->vport == FDB_UPLINK_VPORT) { + vport->vlan_refcount++; + attr->vlan_handled = true; + } + + return 0; + } + + if (!push && !pop) + return 0; + + if (!(offloads->vlan_push_pop_refcount)) { + /* it's the 1st vlan rule, apply global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP); + if (err) + goto out; + } + offloads->vlan_push_pop_refcount++; + + if (push) { + if (vport->vlan_refcount) + goto skip_set_push; + + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan, 0, + SET_VLAN_INSERT | SET_VLAN_STRIP); + if (err) + goto out; + vport->vlan = attr->vlan; +skip_set_push: + vport->vlan_refcount++; + } +out: + if (!err) + attr->vlan_handled = true; + return err; +} + +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + if (!attr->vlan_handled) + return 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep->vport == FDB_UPLINK_VPORT) + vport->vlan_refcount--; + + return 0; + } + + if (push) { + vport->vlan_refcount--; + if (vport->vlan_refcount) + goto skip_unset_push; + + vport->vlan = 0; + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, + 0, 0, SET_VLAN_STRIP); + if (err) + goto out; + } + +skip_unset_push: + offloads->vlan_push_pop_refcount--; + if (offloads->vlan_push_pop_refcount) + return 0; + + /* no more vlan rules, stop global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, 0); + +out: + return err; +} + static struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { @@ -144,16 +327,12 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, { struct mlx5_flow_rule *flow_rule; struct mlx5_esw_sq *esw_sq; - int vport; int err; int i; if (esw->mode != SRIOV_OFFLOADS) return 0; - vport = rep->vport == 0 ? - FDB_UPLINK_VPORT : rep->vport; - for (i = 0; i < sqns_num; i++) { esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL); if (!esw_sq) { @@ -163,7 +342,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, /* Add re-inject rule to the PF/representor sqs */ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, - vport, + rep->vport, sqns_array[i]); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -446,7 +625,7 @@ out: static int esw_offloads_start(struct mlx5_eswitch *esw) { - int err, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; if (esw->mode != SRIOV_LEGACY) { esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); @@ -455,8 +634,12 @@ static int esw_offloads_start(struct mlx5_eswitch *esw) mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); - if (err) - esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err); + } return err; } @@ -508,12 +691,16 @@ create_ft_err: static int esw_offloads_stop(struct mlx5_eswitch *esw) { - int err, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); - if (err) - esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err); + } return err; } @@ -612,27 +799,36 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) } void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) + int vport_index, + struct mlx5_eswitch_rep *__rep) { struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + rep = &offloads->vport_reps[vport_index]; + + memset(rep, 0, sizeof(*rep)); - memcpy(&offloads->vport_reps[rep->vport], rep, - sizeof(struct mlx5_eswitch_rep)); + rep->load = __rep->load; + rep->unload = __rep->unload; + rep->vport = __rep->vport; + rep->priv_data = __rep->priv_data; + ether_addr_copy(rep->hw_id, __rep->hw_id); - INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list); - offloads->vport_reps[rep->vport].valid = true; + INIT_LIST_HEAD(&rep->vport_sqs_list); + rep->valid = true; } void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport) + int vport_index) { struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - rep = &offloads->vport_reps[vport]; + rep = &offloads->vport_reps[vport_index]; - if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled) + if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) rep->unload(esw, rep); - offloads->vport_reps[vport].valid = false; + rep->valid = false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 7a0415e6d339..113c32326333 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -401,11 +401,11 @@ struct mlx5_cmd_fc_bulk * mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) { struct mlx5_cmd_fc_bulk *b; - int outlen = sizeof(*b) + + int outlen = MLX5_ST_SZ_BYTES(query_flow_counter_out) + MLX5_ST_SZ_BYTES(traffic_counter) * num; - b = kzalloc(outlen, GFP_KERNEL); + b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL); if (!b) return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 92c3e0dbcbdc..55957246c0e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -277,7 +277,7 @@ static void mlx5_do_bond_work(struct work_struct *work) bond_work); int status; - status = mutex_trylock(&mlx5_intf_mutex); + status = mlx5_dev_list_trylock(); if (!status) { /* 1 sec delay. */ mlx5_queue_bond_work(ldev, HZ); @@ -285,7 +285,7 @@ static void mlx5_do_bond_work(struct work_struct *work) } mlx5_do_bond(ldev); - mutex_unlock(&mlx5_intf_mutex); + mlx5_dev_list_unlock(); } static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, @@ -466,35 +466,21 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, mutex_unlock(&lag_mutex); } -static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev) -{ - return (u16)((dev->pdev->bus->number << 8) | - PCI_SLOT(dev->pdev->devfn)); -} /* Must be called with intf_mutex held */ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; - struct mlx5_priv *priv; - u16 pci_id; if (!MLX5_CAP_GEN(dev, vport_group_manager) || !MLX5_CAP_GEN(dev, lag_master) || (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)) return; - pci_id = mlx5_gen_pci_id(dev); - - mlx5_core_for_each_priv(priv) { - tmp_dev = container_of(priv, struct mlx5_core_dev, priv); - if ((dev != tmp_dev) && - (mlx5_gen_pci_id(tmp_dev) == pci_id)) { - ldev = tmp_dev->priv.lag; - break; - } - } + tmp_dev = mlx5_get_next_phys_dev(dev); + if (tmp_dev) + ldev = tmp_dev->priv.lag; if (!ldev) { ldev = mlx5_lag_dev_alloc(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c132ef1faefe..d9c3c70b29e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -72,17 +72,6 @@ static int prof_sel = MLX5_DEFAULT_PROF; module_param_named(prof_sel, prof_sel, int, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); -static LIST_HEAD(intf_list); - -LIST_HEAD(mlx5_dev_list); -DEFINE_MUTEX(mlx5_intf_mutex); - -struct mlx5_device_context { - struct list_head list; - struct mlx5_interface *intf; - void *context; -}; - enum { MLX5_ATOMIC_REQ_MODE_BE = 0x0, MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, @@ -778,147 +767,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return -ENOTSUPP; } -static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - if (!mlx5_lag_intf_add(intf, priv)) - return; - - dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); - if (!dev_ctx) - return; - - dev_ctx->intf = intf; - dev_ctx->context = intf->add(dev); - - if (dev_ctx->context) { - spin_lock_irq(&priv->ctx_lock); - list_add_tail(&dev_ctx->list, &priv->ctx_list); - spin_unlock_irq(&priv->ctx_lock); - } else { - kfree(dev_ctx); - } -} - -static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf == intf) { - spin_lock_irq(&priv->ctx_lock); - list_del(&dev_ctx->list); - spin_unlock_irq(&priv->ctx_lock); - - intf->remove(dev, dev_ctx->context); - kfree(dev_ctx); - return; - } -} - -static int mlx5_register_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&mlx5_intf_mutex); - list_add_tail(&priv->dev_list, &mlx5_dev_list); - list_for_each_entry(intf, &intf_list, list) - mlx5_add_device(intf, priv); - mutex_unlock(&mlx5_intf_mutex); - - return 0; -} - -static void mlx5_unregister_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(intf, &intf_list, list) - mlx5_remove_device(intf, priv); - list_del(&priv->dev_list); - mutex_unlock(&mlx5_intf_mutex); -} - -int mlx5_register_interface(struct mlx5_interface *intf) -{ - struct mlx5_priv *priv; - - if (!intf->add || !intf->remove) - return -EINVAL; - - mutex_lock(&mlx5_intf_mutex); - list_add_tail(&intf->list, &intf_list); - list_for_each_entry(priv, &mlx5_dev_list, dev_list) - mlx5_add_device(intf, priv); - mutex_unlock(&mlx5_intf_mutex); - - return 0; -} -EXPORT_SYMBOL(mlx5_register_interface); - -void mlx5_unregister_interface(struct mlx5_interface *intf) -{ - struct mlx5_priv *priv; - - mutex_lock(&mlx5_intf_mutex); - list_for_each_entry(priv, &mlx5_dev_list, dev_list) - mlx5_remove_device(intf, priv); - list_del(&intf->list); - mutex_unlock(&mlx5_intf_mutex); -} -EXPORT_SYMBOL(mlx5_unregister_interface); - -void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) -{ - struct mlx5_priv *priv = &mdev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - void *result = NULL; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) - if ((dev_ctx->intf->protocol == protocol) && - dev_ctx->intf->get_dev) { - result = dev_ctx->intf->get_dev(dev_ctx->context); - break; - } - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - return result; -} -EXPORT_SYMBOL(mlx5_get_protocol_dev); - -/* Must be called with intf_mutex held */ -void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) -{ - struct mlx5_interface *intf; - - list_for_each_entry(intf, &intf_list, list) - if (intf->protocol == protocol) { - mlx5_add_device(intf, &dev->priv); - break; - } -} - -/* Must be called with intf_mutex held */ -void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) -{ - struct mlx5_interface *intf; - - list_for_each_entry(intf, &intf_list, list) - if (intf->protocol == protocol) { - mlx5_remove_device(intf, &dev->priv); - break; - } -} static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { @@ -991,8 +839,102 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) debugfs_remove(priv->dbg_root); } -#define MLX5_IB_MOD "mlx5_ib" -static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + err = mlx5_query_hca_caps(dev); + if (err) { + dev_err(&pdev->dev, "query hca failed\n"); + goto out; + } + + err = mlx5_query_board_id(dev); + if (err) { + dev_err(&pdev->dev, "query board id failed\n"); + goto out; + } + + err = mlx5_eq_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize eq\n"); + goto out; + } + + MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); + + err = mlx5_init_cq_table(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize cq table\n"); + goto err_eq_cleanup; + } + + mlx5_init_qp_table(dev); + + mlx5_init_srq_table(dev); + + mlx5_init_mkey_table(dev); + + err = mlx5_init_rl_table(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init rate limiting\n"); + goto err_tables_cleanup; + } + +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init eswitch %d\n", err); + goto err_rl_cleanup; + } +#endif + + err = mlx5_sriov_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init sriov %d\n", err); + goto err_eswitch_cleanup; + } + + return 0; + +err_eswitch_cleanup: +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); + +err_rl_cleanup: +#endif + mlx5_cleanup_rl_table(dev); + +err_tables_cleanup: + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + +err_eq_cleanup: + mlx5_eq_cleanup(dev); + +out: + return err; +} + +static void mlx5_cleanup_once(struct mlx5_core_dev *dev) +{ + mlx5_sriov_cleanup(dev); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif + mlx5_cleanup_rl_table(dev); + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + mlx5_eq_cleanup(dev); +} + +static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool boot) { struct pci_dev *pdev = dev->pdev; int err; @@ -1025,12 +967,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out_err; } - mlx5_pagealloc_init(dev); - err = mlx5_core_enable_hca(dev, 0); if (err) { dev_err(&pdev->dev, "enable hca failed\n"); - goto err_pagealloc_cleanup; + goto err_cmd_cleanup; } err = mlx5_core_set_issi(dev); @@ -1083,34 +1023,21 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_start_health_poll(dev); - err = mlx5_query_hca_caps(dev); - if (err) { - dev_err(&pdev->dev, "query hca failed\n"); - goto err_stop_poll; - } - - err = mlx5_query_board_id(dev); - if (err) { - dev_err(&pdev->dev, "query board id failed\n"); + if (boot && mlx5_init_once(dev, priv)) { + dev_err(&pdev->dev, "sw objs init failed\n"); goto err_stop_poll; } err = mlx5_enable_msix(dev); if (err) { dev_err(&pdev->dev, "enable msix failed\n"); - goto err_stop_poll; - } - - err = mlx5_eq_init(dev); - if (err) { - dev_err(&pdev->dev, "failed to initialize eq\n"); - goto disable_msix; + goto err_cleanup_once; } err = mlx5_alloc_uuars(dev, &priv->uuari); if (err) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); - goto err_eq_cleanup; + goto err_disable_msix; } err = mlx5_start_eqs(dev); @@ -1126,15 +1053,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } err = mlx5_irq_set_affinity_hints(dev); - if (err) + if (err) { dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - - MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); - - mlx5_init_cq_table(dev); - mlx5_init_qp_table(dev); - mlx5_init_srq_table(dev); - mlx5_init_mkey_table(dev); + goto err_affinity_hints; + } err = mlx5_init_fs(dev); if (err) { @@ -1142,36 +1064,26 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_fs; } - err = mlx5_init_rl_table(dev); - if (err) { - dev_err(&pdev->dev, "Failed to init rate limiting\n"); - goto err_rl; - } - #ifdef CONFIG_MLX5_CORE_EN - err = mlx5_eswitch_init(dev); - if (err) { - dev_err(&pdev->dev, "eswitch init failed %d\n", err); - goto err_reg_dev; - } + mlx5_eswitch_attach(dev->priv.eswitch); #endif - err = mlx5_sriov_init(dev); + err = mlx5_sriov_attach(dev); if (err) { dev_err(&pdev->dev, "sriov init failed %d\n", err); goto err_sriov; } - err = mlx5_register_device(dev); - if (err) { - dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); - goto err_reg_dev; + if (mlx5_device_registered(dev)) { + mlx5_attach_device(dev); + } else { + err = mlx5_register_device(dev); + if (err) { + dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); + goto err_reg_dev; + } } - err = request_module_nowait(MLX5_IB_MOD); - if (err) - pr_info("failed request module on %s\n", MLX5_IB_MOD); - clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: @@ -1179,23 +1091,19 @@ out: return 0; -err_sriov: - if (mlx5_sriov_cleanup(dev)) - dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); +err_reg_dev: + mlx5_sriov_detach(dev); +err_sriov: #ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_eswitch_detach(dev->priv.eswitch); #endif -err_reg_dev: - mlx5_cleanup_rl_table(dev); -err_rl: mlx5_cleanup_fs(dev); + err_fs: - mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); - mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); + +err_affinity_hints: free_comp_eqs(dev); err_stop_eqs: @@ -1204,12 +1112,13 @@ err_stop_eqs: err_free_uar: mlx5_free_uuars(dev, &priv->uuari); -err_eq_cleanup: - mlx5_eq_cleanup(dev); - -disable_msix: +err_disable_msix: mlx5_disable_msix(dev); +err_cleanup_once: + if (boot) + mlx5_cleanup_once(dev); + err_stop_poll: mlx5_stop_health_poll(dev); if (mlx5_cmd_teardown_hca(dev)) { @@ -1226,8 +1135,7 @@ reclaim_boot_pages: err_disable_hca: mlx5_core_disable_hca(dev, 0); -err_pagealloc_cleanup: - mlx5_pagealloc_cleanup(dev); +err_cmd_cleanup: mlx5_cmd_cleanup(dev); out_err: @@ -1237,40 +1145,35 @@ out_err: return err; } -static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool cleanup) { int err = 0; - err = mlx5_sriov_cleanup(dev); - if (err) { - dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n", - __func__); - return err; - } - mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__); + if (cleanup) + mlx5_cleanup_once(dev); goto out; } - mlx5_unregister_device(dev); + + if (mlx5_device_registered(dev)) + mlx5_detach_device(dev); + + mlx5_sriov_detach(dev); #ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_eswitch_detach(dev->priv.eswitch); #endif - - mlx5_cleanup_rl_table(dev); mlx5_cleanup_fs(dev); - mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); - mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_free_uuars(dev, &priv->uuari); - mlx5_eq_cleanup(dev); mlx5_disable_msix(dev); + if (cleanup) + mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev); err = mlx5_cmd_teardown_hca(dev); if (err) { @@ -1280,7 +1183,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); - mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); out: @@ -1290,22 +1192,6 @@ out: return err; } -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event) - dev_ctx->intf->event(dev, dev_ctx->context, event, param); - - spin_unlock_irqrestore(&priv->ctx_lock, flags); -} - struct mlx5_core_event_handler { void (*event)(struct mlx5_core_dev *dev, enum mlx5_dev_event event, @@ -1319,6 +1205,7 @@ static const struct devlink_ops mlx5_devlink_ops = { #endif }; +#define MLX5_IB_MOD "mlx5_ib" static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -1365,12 +1252,18 @@ static int init_one(struct pci_dev *pdev, goto close_pci; } - err = mlx5_load_one(dev, priv); + mlx5_pagealloc_init(dev); + + err = mlx5_load_one(dev, priv, true); if (err) { dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); goto clean_health; } + err = request_module_nowait(MLX5_IB_MOD); + if (err) + pr_info("failed request module on %s\n", MLX5_IB_MOD); + err = devlink_register(devlink, &pdev->dev); if (err) goto clean_load; @@ -1378,8 +1271,9 @@ static int init_one(struct pci_dev *pdev, return 0; clean_load: - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, true); clean_health: + mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); @@ -1397,11 +1291,15 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_priv *priv = &dev->priv; devlink_unregister(devlink); - if (mlx5_unload_one(dev, priv)) { + mlx5_unregister_device(dev); + + if (mlx5_unload_one(dev, priv, true)) { dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); return; } + + mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(pdev, NULL); @@ -1416,7 +1314,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); mlx5_enter_error_state(dev); - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, false); pci_save_state(pdev); mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? @@ -1488,7 +1386,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev) dev_info(&pdev->dev, "%s was called\n", __func__); - err = mlx5_load_one(dev, priv); + err = mlx5_load_one(dev, priv, false); if (err) dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" , __func__, err); @@ -1510,7 +1408,7 @@ static void shutdown(struct pci_dev *pdev) dev_info(&pdev->dev, "Shutdown was called\n"); /* Notify mlx5 clients that the kernel is being shut down */ set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state); - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, false); mlx5_pci_disable_device(dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 714b71bed2be..3d0cfb9f18f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -46,9 +46,6 @@ extern int mlx5_core_debug_mask; -extern struct list_head mlx5_dev_list; -extern struct mutex mlx5_intf_mutex; - #define mlx5_core_dbg(__dev, format, ...) \ dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ (__dev)->priv.name, __func__, __LINE__, current->pid, \ @@ -73,9 +70,6 @@ do { \ #define mlx5_core_info(__dev, format, ...) \ dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) -#define mlx5_core_for_each_priv(__priv) \ - list_for_each_entry(__priv, &mlx5_dev_list, dev_list) - enum { MLX5_CMD_DATA, /* print command payload only */ MLX5_CMD_TIME, /* print command execution time */ @@ -89,6 +83,10 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_sriov_init(struct mlx5_core_dev *dev); +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); +int mlx5_sriov_attach(struct mlx5_core_dev *dev); +void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); @@ -102,8 +100,19 @@ void mlx5_cq_tasklet_cb(unsigned long data); void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv); +void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); +void mlx5_attach_device(struct mlx5_core_dev *dev); +void mlx5_detach_device(struct mlx5_core_dev *dev); +bool mlx5_device_registered(struct mlx5_core_dev *dev); +int mlx5_register_device(struct mlx5_core_dev *dev); +void mlx5_unregister_device(struct mlx5_core_dev *dev); void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); +void mlx5_dev_list_lock(void); +void mlx5_dev_list_unlock(void); +int mlx5_dev_list_trylock(void); bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 673a7c96479a..d4585154151d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -326,6 +326,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, { struct fw_page *fwp; struct rb_node *p; + u32 func_id; u32 npages; u32 i = 0; @@ -334,12 +335,16 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, /* No hard feelings, we want our pages back! */ npages = MLX5_GET(manage_pages_in, in, input_num_entries); + func_id = MLX5_GET(manage_pages_in, in, function_id); p = rb_first(&dev->priv.page_root); while (p && i < npages) { fwp = rb_entry(p, struct fw_page, rb_node); - MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr); p = rb_next(p); + if (fwp->func_id != func_id) + continue; + + MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr); i++; } @@ -540,6 +545,12 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); int prev_vfs_pages = dev->priv.vfs_pages; + /* In case of internal error we will free the pages manually later */ + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_warn(dev, "Skipping wait for vf pages stage"); + return 0; + } + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, dev->priv.name); while (dev->priv.vfs_pages) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 78e789245183..e08627785590 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -44,108 +44,132 @@ bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return !!sriov->num_vfs; } -static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) +static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err; int vf; - for (vf = 1; vf <= num_vfs; vf++) { - err = mlx5_core_enable_hca(dev, vf); + if (sriov->enabled_vfs) { + mlx5_core_warn(dev, + "failed to enable SRIOV on device, already enabled with %d vfs\n", + sriov->enabled_vfs); + return -EBUSY; + } + +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); + if (err) { + mlx5_core_warn(dev, + "failed to enable eswitch SRIOV (%d)\n", err); + return err; + } +#endif + + for (vf = 0; vf < num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf + 1); if (err) { - mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1); - } else { - sriov->vfs_ctx[vf - 1].enabled = 1; - mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1); + mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err); + continue; } + sriov->vfs_ctx[vf].enabled = 1; + sriov->enabled_vfs++; + mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); + } + + return 0; } -static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs) +static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; int vf; - for (vf = 1; vf <= num_vfs; vf++) { - if (sriov->vfs_ctx[vf - 1].enabled) { - if (mlx5_core_disable_hca(dev, vf)) - mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1); - else - sriov->vfs_ctx[vf - 1].enabled = 0; + if (!sriov->enabled_vfs) + return; + + for (vf = 0; vf < sriov->num_vfs; vf++) { + if (!sriov->vfs_ctx[vf].enabled) + continue; + err = mlx5_core_disable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to disable VF %d\n", vf); + continue; } + sriov->vfs_ctx[vf].enabled = 0; + sriov->enabled_vfs--; } + +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_disable_sriov(dev->priv.eswitch); +#endif + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } -static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs) +static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - int err; - - if (pci_num_vf(pdev)) - pci_disable_sriov(pdev); - - enable_vfs(dev, num_vfs); + int err = 0; - err = pci_enable_sriov(pdev, num_vfs); - if (err) { - dev_warn(&pdev->dev, "enable sriov failed %d\n", err); - goto ex; + if (pci_num_vf(pdev)) { + mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n"); + return -EBUSY; } - return 0; + err = pci_enable_sriov(pdev, num_vfs); + if (err) + mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); -ex: - disable_vfs(dev, num_vfs); return err; } -static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs) +static void mlx5_pci_disable_sriov(struct pci_dev *pdev) +{ + pci_disable_sriov(pdev); +} + +static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err; + int err = 0; - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC); - if (!sriov->vfs_ctx) - return -ENOMEM; + err = mlx5_device_enable_sriov(dev, num_vfs); + if (err) { + mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err); + return err; + } - sriov->enabled_vfs = num_vfs; - err = mlx5_core_create_vfs(pdev, num_vfs); + err = mlx5_pci_enable_sriov(pdev, num_vfs); if (err) { - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = NULL; + mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err); + mlx5_device_disable_sriov(dev); return err; } + sriov->num_vfs = num_vfs; + return 0; } -static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs) +static void mlx5_sriov_disable(struct pci_dev *pdev) { + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_sriov *sriov = &dev->priv.sriov; - sriov->num_vfs = num_vfs; -} - -static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev) -{ - struct mlx5_core_sriov *sriov; - - sriov = &dev->priv.sriov; - disable_vfs(dev, sriov->num_vfs); - - if (mlx5_wait_for_vf_pages(dev)) - mlx5_core_warn(dev, "timeout claiming VFs pages\n"); - + mlx5_pci_disable_sriov(pdev); + mlx5_device_disable_sriov(dev); sriov->num_vfs = 0; } int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err; + int err = 0; mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs); if (!mlx5_core_is_pf(dev)) @@ -156,92 +180,57 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) return -EINVAL; } - mlx5_core_cleanup_vfs(dev); + if (num_vfs) + err = mlx5_sriov_enable(pdev, num_vfs); + else + mlx5_sriov_disable(pdev); - if (!num_vfs) { -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_disable_sriov(dev->priv.eswitch); -#endif - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = NULL; - if (!pci_vfs_assigned(pdev)) - pci_disable_sriov(pdev); - else - mlx5_core_info(dev, "unloading PF driver while leaving orphan VFs\n"); - return 0; - } + return err ? err : num_vfs; +} - err = mlx5_core_sriov_enable(pdev, num_vfs); - if (err) { - mlx5_core_warn(dev, "mlx5_core_sriov_enable failed %d\n", err); - return err; - } +int mlx5_sriov_attach(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; - mlx5_core_init_vfs(dev, num_vfs); -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); -#endif + if (!mlx5_core_is_pf(dev) || !sriov->num_vfs) + return 0; - return num_vfs; + /* If sriov VFs exist in PCI level, enable them in device level */ + return mlx5_device_enable_sriov(dev, sriov->num_vfs); } -static int sync_required(struct pci_dev *pdev) +void mlx5_sriov_detach(struct mlx5_core_dev *dev) { - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int cur_vfs = pci_num_vf(pdev); - - if (cur_vfs != sriov->num_vfs) { - mlx5_core_warn(dev, "current VFs %d, registered %d - sync needed\n", - cur_vfs, sriov->num_vfs); - return 1; - } + if (!mlx5_core_is_pf(dev)) + return; - return 0; + mlx5_device_disable_sriov(dev); } int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; struct pci_dev *pdev = dev->pdev; - int cur_vfs; + int total_vfs; if (!mlx5_core_is_pf(dev)) return 0; - if (!sync_required(dev->pdev)) - return 0; - - cur_vfs = pci_num_vf(pdev); - sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->num_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) return -ENOMEM; - sriov->enabled_vfs = cur_vfs; - - mlx5_core_init_vfs(dev, cur_vfs); -#ifdef CONFIG_MLX5_CORE_EN - if (cur_vfs) - mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs, - SRIOV_LEGACY); -#endif - - enable_vfs(dev, cur_vfs); - return 0; } -int mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) { - struct pci_dev *pdev = dev->pdev; - int err; + struct mlx5_core_sriov *sriov = &dev->priv.sriov; if (!mlx5_core_is_pf(dev)) - return 0; + return; - err = mlx5_core_sriov_configure(pdev, 0); - if (err) - return err; - - return 0; + kfree(sriov->vfs_ctx); } |
