diff options
Diffstat (limited to 'drivers/net/ethernet/intel')
21 files changed, 626 insertions, 214 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 80c5cecaf2b5..55c6bce5da61 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -854,6 +854,10 @@ struct i40e_vsi { u64 tx_force_wb; u64 rx_buf_failed; u64 rx_page_failed; + u64 rx_page_reuse; + u64 rx_page_alloc; + u64 rx_page_waive; + u64 rx_page_busy; /* These are containers of ring pointers, allocated at run-time */ struct i40e_ring **rx_rings; diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 7abef88801fb..42439f725aa4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -769,7 +769,7 @@ static bool i40e_asq_done(struct i40e_hw *hw) } /** - * i40e_asq_send_command_atomic - send command to Admin Queue + * i40e_asq_send_command_atomic_exec - send command to Admin Queue * @hw: pointer to the hw struct * @desc: prefilled descriptor describing the command (non DMA mem) * @buff: buffer to use for indirect commands @@ -780,11 +780,13 @@ static bool i40e_asq_done(struct i40e_hw *hw) * This is the main send command driver routine for the Admin Queue send * queue. It runs the queue, cleans the queue, etc **/ -i40e_status -i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, - void *buff, /* can be NULL */ u16 buff_size, - struct i40e_asq_cmd_details *cmd_details, - bool is_atomic_context) +static i40e_status +i40e_asq_send_command_atomic_exec(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context) { i40e_status status = 0; struct i40e_dma_mem *dma_buff = NULL; @@ -794,8 +796,6 @@ i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, u16 retval = 0; u32 val = 0; - mutex_lock(&hw->aq.asq_mutex); - if (hw->aq.asq.count == 0) { i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: Admin queue not initialized.\n"); @@ -969,6 +969,36 @@ i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, } asq_send_command_error: + return status; +} + +/** + * i40e_asq_send_command_atomic - send command to Admin Queue + * @hw: pointer to the hw struct + * @desc: prefilled descriptor describing the command (non DMA mem) + * @buff: buffer to use for indirect commands + * @buff_size: size of buffer for indirect commands + * @cmd_details: pointer to command details structure + * @is_atomic_context: is the function called in an atomic context? + * + * Acquires the lock and calls the main send command execution + * routine. + **/ +i40e_status +i40e_asq_send_command_atomic(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context) +{ + i40e_status status; + + mutex_lock(&hw->aq.asq_mutex); + status = i40e_asq_send_command_atomic_exec(hw, desc, buff, buff_size, + cmd_details, + is_atomic_context); + mutex_unlock(&hw->aq.asq_mutex); return status; } @@ -983,6 +1013,52 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc, } /** + * i40e_asq_send_command_atomic_v2 - send command to Admin Queue + * @hw: pointer to the hw struct + * @desc: prefilled descriptor describing the command (non DMA mem) + * @buff: buffer to use for indirect commands + * @buff_size: size of buffer for indirect commands + * @cmd_details: pointer to command details structure + * @is_atomic_context: is the function called in an atomic context? + * @aq_status: pointer to Admin Queue status return value + * + * Acquires the lock and calls the main send command execution + * routine. Returns the last Admin Queue status in aq_status + * to avoid race conditions in access to hw->aq.asq_last_status. + **/ +i40e_status +i40e_asq_send_command_atomic_v2(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context, + enum i40e_admin_queue_err *aq_status) +{ + i40e_status status; + + mutex_lock(&hw->aq.asq_mutex); + status = i40e_asq_send_command_atomic_exec(hw, desc, buff, + buff_size, + cmd_details, + is_atomic_context); + if (aq_status) + *aq_status = hw->aq.asq_last_status; + mutex_unlock(&hw->aq.asq_mutex); + return status; +} + +i40e_status +i40e_asq_send_command_v2(struct i40e_hw *hw, struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status) +{ + return i40e_asq_send_command_atomic_v2(hw, desc, buff, buff_size, + cmd_details, true, aq_status); +} + +/** * i40e_fill_default_direct_cmd_desc - AQ descriptor helper function * @desc: pointer to the temp descriptor (non DMA mem) * @opcode: the opcode can be used to decide which flags to turn off or on diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 9ddeb015eb7e..6aefffd83615 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1899,8 +1899,9 @@ i40e_status i40e_aq_add_vsi(struct i40e_hw *hw, desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); - status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info, - sizeof(vsi_ctx->info), cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), + cmd_details, true); if (status) goto aq_add_vsi_exit; @@ -2287,8 +2288,9 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw, desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); - status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info, - sizeof(vsi_ctx->info), cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info, + sizeof(vsi_ctx->info), + cmd_details, true); vsi_ctx->vsis_allocated = le16_to_cpu(resp->vsi_used); vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free); @@ -2632,33 +2634,28 @@ get_veb_exit: } /** - * i40e_aq_add_macvlan - * @hw: pointer to the hw struct - * @seid: VSI for the mac address + * i40e_prepare_add_macvlan * @mv_list: list of macvlans to be added + * @desc: pointer to AQ descriptor structure * @count: length of the list - * @cmd_details: pointer to command details structure or NULL + * @seid: VSI for the mac address * - * Add MAC/VLAN addresses to the HW filtering + * Internal helper function that prepares the add macvlan request + * and returns the buffer size. **/ -i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, - struct i40e_aqc_add_macvlan_element_data *mv_list, - u16 count, struct i40e_asq_cmd_details *cmd_details) +static u16 +i40e_prepare_add_macvlan(struct i40e_aqc_add_macvlan_element_data *mv_list, + struct i40e_aq_desc *desc, u16 count, u16 seid) { - struct i40e_aq_desc desc; struct i40e_aqc_macvlan *cmd = - (struct i40e_aqc_macvlan *)&desc.params.raw; - i40e_status status; + (struct i40e_aqc_macvlan *)&desc->params.raw; u16 buf_size; int i; - if (count == 0 || !mv_list || !hw) - return I40E_ERR_PARAM; - buf_size = count * sizeof(*mv_list); /* prep the rest of the request */ - i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_macvlan); + i40e_fill_default_direct_cmd_desc(desc, i40e_aqc_opc_add_macvlan); cmd->num_addresses = cpu_to_le16(count); cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid); cmd->seid[1] = 0; @@ -2669,14 +2666,71 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, mv_list[i].flags |= cpu_to_le16(I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC); - desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + desc->flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); if (buf_size > I40E_AQ_LARGE_BUF) - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + desc->flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); - status = i40e_asq_send_command(hw, &desc, mv_list, buf_size, - cmd_details); + return buf_size; +} - return status; +/** + * i40e_aq_add_macvlan + * @hw: pointer to the hw struct + * @seid: VSI for the mac address + * @mv_list: list of macvlans to be added + * @count: length of the list + * @cmd_details: pointer to command details structure or NULL + * + * Add MAC/VLAN addresses to the HW filtering + **/ +i40e_status +i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_add_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + u16 buf_size; + + if (count == 0 || !mv_list || !hw) + return I40E_ERR_PARAM; + + buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid); + + return i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size, + cmd_details, true); +} + +/** + * i40e_aq_add_macvlan_v2 + * @hw: pointer to the hw struct + * @seid: VSI for the mac address + * @mv_list: list of macvlans to be added + * @count: length of the list + * @cmd_details: pointer to command details structure or NULL + * @aq_status: pointer to Admin Queue status return value + * + * Add MAC/VLAN addresses to the HW filtering. + * The _v2 version returns the last Admin Queue status in aq_status + * to avoid race conditions in access to hw->aq.asq_last_status. + * It also calls _v2 versions of asq_send_command functions to + * get the aq_status on the stack. + **/ +i40e_status +i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_add_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status) +{ + struct i40e_aq_desc desc; + u16 buf_size; + + if (count == 0 || !mv_list || !hw) + return I40E_ERR_PARAM; + + buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid); + + return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size, + cmd_details, true, aq_status); } /** @@ -2715,13 +2769,59 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid, if (buf_size > I40E_AQ_LARGE_BUF) desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); - status = i40e_asq_send_command(hw, &desc, mv_list, buf_size, - cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size, + cmd_details, true); return status; } /** + * i40e_aq_remove_macvlan_v2 + * @hw: pointer to the hw struct + * @seid: VSI for the mac address + * @mv_list: list of macvlans to be removed + * @count: length of the list + * @cmd_details: pointer to command details structure or NULL + * @aq_status: pointer to Admin Queue status return value + * + * Remove MAC/VLAN addresses from the HW filtering. + * The _v2 version returns the last Admin Queue status in aq_status + * to avoid race conditions in access to hw->aq.asq_last_status. + * It also calls _v2 versions of asq_send_command functions to + * get the aq_status on the stack. + **/ +i40e_status +i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_remove_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status) +{ + struct i40e_aqc_macvlan *cmd; + struct i40e_aq_desc desc; + u16 buf_size; + + if (count == 0 || !mv_list || !hw) + return I40E_ERR_PARAM; + + buf_size = count * sizeof(*mv_list); + + /* prep the rest of the request */ + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_macvlan); + cmd = (struct i40e_aqc_macvlan *)&desc.params.raw; + cmd->num_addresses = cpu_to_le16(count); + cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid); + cmd->seid[1] = 0; + cmd->seid[2] = 0; + + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + if (buf_size > I40E_AQ_LARGE_BUF) + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + + return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size, + cmd_details, true, aq_status); +} + +/** * i40e_mirrorrule_op - Internal helper function to add/delete mirror rule * @hw: pointer to the hw struct * @opcode: AQ opcode for add or delete mirror rule @@ -3868,7 +3968,8 @@ i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid, cmd->seid = cpu_to_le16(seid); - status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + status = i40e_asq_send_command_atomic(hw, &desc, NULL, 0, + cmd_details, true); return status; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 1e57cc8c47d7..90fff05fbd2b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -275,9 +275,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->rx_stats.alloc_page_failed, rx_ring->rx_stats.alloc_buff_failed); dev_info(&pf->pdev->dev, - " rx_rings[%i]: rx_stats: realloc_count = %lld, page_reuse_count = %lld\n", + " rx_rings[%i]: rx_stats: realloc_count = 0, page_reuse_count = %lld\n", i, - rx_ring->rx_stats.realloc_count, rx_ring->rx_stats.page_reuse_count); dev_info(&pf->pdev->dev, " rx_rings[%i]: size = %i\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 091f36adbbe1..e48499624d22 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -295,6 +295,10 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { I40E_VSI_STAT("tx_busy", tx_busy), I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed), I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), + I40E_VSI_STAT("rx_cache_reuse", rx_page_reuse), + I40E_VSI_STAT("rx_cache_alloc", rx_page_alloc), + I40E_VSI_STAT("rx_cache_waive", rx_page_waive), + I40E_VSI_STAT("rx_cache_busy", rx_page_busy), }; /* These PF_STATs might look like duplicates of some NETDEV_STATs, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f531bc1df338..9b7ce6d9a92b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -773,6 +773,7 @@ void i40e_update_veb_stats(struct i40e_veb *veb) **/ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) { + u64 rx_page, rx_buf, rx_reuse, rx_alloc, rx_waive, rx_busy; struct i40e_pf *pf = vsi->back; struct rtnl_link_stats64 *ons; struct rtnl_link_stats64 *ns; /* netdev stats */ @@ -780,7 +781,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) struct i40e_eth_stats *es; /* device's eth stats */ u64 tx_restart, tx_busy; struct i40e_ring *p; - u64 rx_page, rx_buf; u64 bytes, packets; unsigned int start; u64 tx_linearize; @@ -806,6 +806,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; rx_page = 0; rx_buf = 0; + rx_reuse = 0; + rx_alloc = 0; + rx_waive = 0; + rx_busy = 0; rcu_read_lock(); for (q = 0; q < vsi->num_queue_pairs; q++) { /* locate Tx ring */ @@ -839,6 +843,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) rx_p += packets; rx_buf += p->rx_stats.alloc_buff_failed; rx_page += p->rx_stats.alloc_page_failed; + rx_reuse += p->rx_stats.page_reuse_count; + rx_alloc += p->rx_stats.page_alloc_count; + rx_waive += p->rx_stats.page_waive_count; + rx_busy += p->rx_stats.page_busy_count; if (i40e_enabled_xdp_vsi(vsi)) { /* locate XDP ring */ @@ -866,6 +874,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) vsi->tx_force_wb = tx_force_wb; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; + vsi->rx_page_reuse = rx_reuse; + vsi->rx_page_alloc = rx_alloc; + vsi->rx_page_waive = rx_waive; + vsi->rx_page_busy = rx_busy; ns->rx_packets = rx_p; ns->rx_bytes = rx_b; @@ -2143,19 +2155,19 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, int num_del, int *retval) { struct i40e_hw *hw = &vsi->back->hw; + enum i40e_admin_queue_err aq_status; i40e_status aq_ret; - int aq_err; - aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, list, num_del, NULL); - aq_err = hw->aq.asq_last_status; + aq_ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, list, num_del, NULL, + &aq_status); /* Explicitly ignore and do not report when firmware returns ENOENT */ - if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) { + if (aq_ret && !(aq_status == I40E_AQ_RC_ENOENT)) { *retval = -EIO; dev_info(&vsi->back->pdev->dev, "ignoring delete macvlan error on %s, err %s, aq_err %s\n", vsi_name, i40e_stat_str(hw, aq_ret), - i40e_aq_str(hw, aq_err)); + i40e_aq_str(hw, aq_status)); } } @@ -2178,10 +2190,10 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, int num_add) { struct i40e_hw *hw = &vsi->back->hw; - int aq_err, fcnt; + enum i40e_admin_queue_err aq_status; + int fcnt; - i40e_aq_add_macvlan(hw, vsi->seid, list, num_add, NULL); - aq_err = hw->aq.asq_last_status; + i40e_aq_add_macvlan_v2(hw, vsi->seid, list, num_add, NULL, &aq_status); fcnt = i40e_update_filter_state(num_add, list, add_head); if (fcnt != num_add) { @@ -2189,17 +2201,19 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, promiscuous mode forced on\n", - i40e_aq_str(hw, aq_err), vsi_name); + i40e_aq_str(hw, aq_status), vsi_name); } else if (vsi->type == I40E_VSI_SRIOV || vsi->type == I40E_VSI_VMDQ1 || vsi->type == I40E_VSI_VMDQ2) { dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n", - i40e_aq_str(hw, aq_err), vsi_name, vsi_name); + i40e_aq_str(hw, aq_status), vsi_name, + vsi_name); } else { dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, incorrect VSI type: %i.\n", - i40e_aq_str(hw, aq_err), vsi_name, vsi->type); + i40e_aq_str(hw, aq_status), vsi_name, + vsi->type); } } } @@ -12722,7 +12736,8 @@ static int i40e_set_features(struct net_device *netdev, else i40e_vlan_stripping_disable(vsi); - if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) { + if (!(features & NETIF_F_HW_TC) && + (netdev->features & NETIF_F_HW_TC) && pf->num_cloud_filters) { dev_err(&pf->pdev->dev, "Offloaded tc filters active, can't turn hw_tc_offload off"); return -EINVAL; @@ -13478,6 +13493,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; + netdev->features &= ~NETIF_F_HW_TC; + if (vsi->type == I40E_VSI_MAIN) { SET_NETDEV_DEV(netdev, &pf->pdev->dev); ether_addr_copy(mac_addr, hw->mac.perm_addr); diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 9241b6005ad3..ebdcde6f1aeb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -27,10 +27,25 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc, void *buff, /* can be NULL */ u16 buff_size, struct i40e_asq_cmd_details *cmd_details); i40e_status +i40e_asq_send_command_v2(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status); +i40e_status i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, void *buff, /* can be NULL */ u16 buff_size, struct i40e_asq_cmd_details *cmd_details, bool is_atomic_context); +i40e_status +i40e_asq_send_command_atomic_v2(struct i40e_hw *hw, + struct i40e_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct i40e_asq_cmd_details *cmd_details, + bool is_atomic_context, + enum i40e_admin_queue_err *aq_status); /* debug function for adminq */ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, @@ -150,9 +165,19 @@ i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw, i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id, struct i40e_aqc_add_macvlan_element_data *mv_list, u16 count, struct i40e_asq_cmd_details *cmd_details); +i40e_status +i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_add_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status); i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id, struct i40e_aqc_remove_macvlan_element_data *mv_list, u16 count, struct i40e_asq_cmd_details *cmd_details); +i40e_status +i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_remove_macvlan_element_data *mv_list, + u16 count, struct i40e_asq_cmd_details *cmd_details, + enum i40e_admin_queue_err *aq_status); i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list, struct i40e_asq_cmd_details *cmd_details, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 66cc79500c10..0eae5858f2fe 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -830,8 +830,6 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) i40e_clean_tx_ring(tx_ring); kfree(tx_ring->tx_bi); tx_ring->tx_bi = NULL; - kfree(tx_ring->xsk_descs); - tx_ring->xsk_descs = NULL; if (tx_ring->desc) { dma_free_coherent(tx_ring->dev, tx_ring->size, @@ -1382,8 +1380,6 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, new_buff->page_offset = old_buff->page_offset; new_buff->pagecnt_bias = old_buff->pagecnt_bias; - rx_ring->rx_stats.page_reuse_count++; - /* clear contents of buffer_info */ old_buff->page = NULL; } @@ -1433,13 +1429,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) if (!tx_ring->tx_bi) goto err; - if (ring_is_xdp(tx_ring)) { - tx_ring->xsk_descs = kcalloc(I40E_MAX_NUM_DESCRIPTORS, sizeof(*tx_ring->xsk_descs), - GFP_KERNEL); - if (!tx_ring->xsk_descs) - goto err; - } - u64_stats_init(&tx_ring->syncp); /* round up to nearest 4K */ @@ -1463,8 +1452,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) return 0; err: - kfree(tx_ring->xsk_descs); - tx_ring->xsk_descs = NULL; kfree(tx_ring->tx_bi); tx_ring->tx_bi = NULL; return -ENOMEM; @@ -1675,6 +1662,8 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, return false; } + rx_ring->rx_stats.page_alloc_count++; + /* map page for use */ dma = dma_map_page_attrs(rx_ring->dev, page, 0, i40e_rx_pg_size(rx_ring), @@ -1982,32 +1971,43 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, /** * i40e_can_reuse_rx_page - Determine if page can be reused for another Rx * @rx_buffer: buffer containing the page + * @rx_stats: rx stats structure for the rx ring * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call * * If page is reusable, we have a green light for calling i40e_reuse_rx_page, * which will assign the current buffer to the buffer that next_to_alloc is * pointing to; otherwise, the DMA mapping needs to be destroyed and - * page freed + * page freed. + * + * rx_stats will be updated to indicate whether the page was waived + * or busy if it could not be reused. */ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + struct i40e_rx_queue_stats *rx_stats, int rx_buffer_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; /* Is any reuse possible? */ - if (!dev_page_is_reusable(page)) + if (!dev_page_is_reusable(page)) { + rx_stats->page_waive_count++; return false; + } #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) + if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) { + rx_stats->page_busy_count++; return false; + } #else #define I40E_LAST_OFFSET \ (SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048) - if (rx_buffer->page_offset > I40E_LAST_OFFSET) + if (rx_buffer->page_offset > I40E_LAST_OFFSET) { + rx_stats->page_busy_count++; return false; + } #endif /* If we have drained the page fragment pool we need to update @@ -2237,7 +2237,7 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, int rx_buffer_pgcnt) { - if (i40e_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { + if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats, rx_buffer_pgcnt)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); } else { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index bfc2845c99d1..c471c2da313c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -298,7 +298,9 @@ struct i40e_rx_queue_stats { u64 alloc_page_failed; u64 alloc_buff_failed; u64 page_reuse_count; - u64 realloc_count; + u64 page_alloc_count; + u64 page_waive_count; + u64 page_busy_count; }; enum i40e_ring_state_t { @@ -390,7 +392,6 @@ struct i40e_ring { u16 rx_offset; struct xdp_rxq_info xdp_rxq; struct xsk_buff_pool *xsk_pool; - struct xdp_desc *xsk_descs; /* For storing descriptors in the AF_XDP ZC path */ } ____cacheline_internodealigned_in_smp; static inline bool ring_uses_build_skb(struct i40e_ring *ring) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 67e9844e2076..5a997b0d07d8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -471,11 +471,11 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) **/ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) { - struct xdp_desc *descs = xdp_ring->xsk_descs; + struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; u32 nb_pkts, nb_processed = 0; unsigned int total_bytes = 0; - nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget); + nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); if (!nb_pkts) return true; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index e2e3ef7fba7f..e3df0134dc77 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -2803,6 +2803,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, /* clone ring and setup updated count */ xdp_rings[i] = *vsi->xdp_rings[i]; xdp_rings[i].count = new_tx_cnt; + xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1; + xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1; xdp_rings[i].desc = NULL; xdp_rings[i].tx_buf = NULL; err = ice_setup_tx_ring(&xdp_rings[i]); diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index fc3580167e7b..263a2e7577a2 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -227,6 +227,11 @@ void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos) for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) qos->tc_info[i].rel_bw = dcbx_cfg->etscfg.tcbwtable[i]; + + qos->pfc_mode = dcbx_cfg->pfc_mode; + if (qos->pfc_mode == IIDC_DSCP_PFC_MODE) + for (i = 0; i < IIDC_MAX_DSCP_MAPPING; i++) + qos->dscp_map[i] = dcbx_cfg->dscp_map[i]; } EXPORT_SYMBOL_GPL(ice_get_qos_params); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f46af3b34074..63f43400a146 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2495,10 +2495,10 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; xdp_ring->vsi = vsi; xdp_ring->netdev = NULL; - xdp_ring->next_dd = ICE_TX_THRESH - 1; - xdp_ring->next_rs = ICE_TX_THRESH - 1; xdp_ring->dev = dev; xdp_ring->count = vsi->num_tx_desc; + xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1; + xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1; WRITE_ONCE(vsi->xdp_rings[i], xdp_ring); if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index c2258bee8ecb..7b9b3b750bf0 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -173,6 +173,8 @@ tx_skip_free: tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; + tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1; + tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1; if (!tx_ring->netdev) return; @@ -1467,7 +1469,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) bool wd; if (tx_ring->xsk_pool) - wd = ice_clean_tx_irq_zc(tx_ring, budget); + wd = ice_xmit_zc(tx_ring, ICE_DESC_UNUSED(tx_ring), budget); else if (ice_ring_is_xdp(tx_ring)) wd = true; else @@ -1520,7 +1522,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) /* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling */ - if (likely(napi_complete_done(napi, work_done))) { + if (napi_complete_done(napi, work_done)) { ice_net_dim(q_vector); ice_enable_interrupt(q_vector); } else { diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index b7b3bd4816f0..466253ac2ee1 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -13,7 +13,6 @@ #define ICE_MAX_CHAINED_RX_BUFS 5 #define ICE_MAX_BUF_TXD 8 #define ICE_MIN_TX_LEN 17 -#define ICE_TX_THRESH 32 /* The size limit for a transmit buffer in a descriptor is (16K - 1). * In order to align with the read requests we will align the value to @@ -111,6 +110,8 @@ static inline int ice_skb_pad(void) (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) +#define ICE_RING_QUARTER(R) ((R)->count >> 2) + #define ICE_TX_FLAGS_TSO BIT(0) #define ICE_TX_FLAGS_HW_VLAN BIT(1) #define ICE_TX_FLAGS_SW_VLAN BIT(2) @@ -321,17 +322,18 @@ struct ice_tx_ring { u16 count; /* Number of descriptors */ u16 q_index; /* Queue number of ring */ /* stats structs */ - struct ice_q_stats stats; - struct u64_stats_sync syncp; struct ice_txq_stats tx_stats; - /* CL3 - 3rd cacheline starts here */ + struct ice_q_stats stats; + struct u64_stats_sync syncp; struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct ice_channel *ch; struct ice_ptp_tx *tx_tstamps; spinlock_t tx_lock; u32 txq_teid; /* Added Tx queue TEID */ + /* CL4 - 4th cacheline starts here */ + u16 xdp_tx_active; #define ICE_TX_FLAGS_RING_XDP BIT(0) u8 flags; u8 dcb_tc; /* Traffic class of ring */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 0e87b98e0966..eb21cec1d772 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -222,6 +222,7 @@ ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) { unsigned int total_bytes = 0, total_pkts = 0; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u16 ntc = xdp_ring->next_to_clean; struct ice_tx_desc *next_dd_desc; u16 next_dd = xdp_ring->next_dd; @@ -233,7 +234,7 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) return; - for (i = 0; i < ICE_TX_THRESH; i++) { + for (i = 0; i < tx_thresh; i++) { tx_buf = &xdp_ring->tx_buf[ntc]; total_bytes += tx_buf->bytecount; @@ -254,9 +255,9 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) } next_dd_desc->cmd_type_offset_bsz = 0; - xdp_ring->next_dd = xdp_ring->next_dd + ICE_TX_THRESH; + xdp_ring->next_dd = xdp_ring->next_dd + tx_thresh; if (xdp_ring->next_dd > xdp_ring->count) - xdp_ring->next_dd = ICE_TX_THRESH - 1; + xdp_ring->next_dd = tx_thresh - 1; xdp_ring->next_to_clean = ntc; ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes); } @@ -269,12 +270,13 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) */ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) { + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); u16 i = xdp_ring->next_to_use; struct ice_tx_desc *tx_desc; struct ice_tx_buf *tx_buf; dma_addr_t dma; - if (ICE_DESC_UNUSED(xdp_ring) < ICE_TX_THRESH) + if (ICE_DESC_UNUSED(xdp_ring) < tx_thresh) ice_clean_xdp_irq(xdp_ring); if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { @@ -300,13 +302,14 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0, size, 0); + xdp_ring->xdp_tx_active++; i++; if (i == xdp_ring->count) { i = 0; tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); tx_desc->cmd_type_offset_bsz |= cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs = ICE_TX_THRESH - 1; + xdp_ring->next_rs = tx_thresh - 1; } xdp_ring->next_to_use = i; @@ -314,7 +317,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); tx_desc->cmd_type_offset_bsz |= cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs += ICE_TX_THRESH; + xdp_ring->next_rs += tx_thresh; } return ICE_XDP_TX; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index feb874bde171..ed430d566274 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -327,6 +327,13 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) bool if_running, pool_present = !!pool; int ret = 0, pool_failure = 0; + if (!is_power_of_2(vsi->rx_rings[qid]->count) || + !is_power_of_2(vsi->tx_rings[qid]->count)) { + netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n"); + pool_failure = -EINVAL; + goto failure; + } + if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi); if (if_running) { @@ -349,6 +356,7 @@ xsk_pool_if_up: netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret); } +failure: if (pool_failure) { netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n", pool_present ? "en" : "dis", pool_failure); @@ -359,33 +367,28 @@ xsk_pool_if_up: } /** - * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers - * @rx_ring: Rx ring + * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it + * @pool: XSK Buffer pool to pull the buffers from + * @xdp: SW ring of xdp_buff that will hold the buffers + * @rx_desc: Pointer to Rx descriptors that will be filled * @count: The number of buffers to allocate * * This function allocates a number of Rx buffers from the fill ring * or the internal recycle mechanism and places them on the Rx ring. * - * Returns true if all allocations were successful, false if any fail. + * Note that ring wrap should be handled by caller of this function. + * + * Returns the amount of allocated Rx descriptors */ -bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, + union ice_32b_rx_flex_desc *rx_desc, u16 count) { - union ice_32b_rx_flex_desc *rx_desc; - u16 ntu = rx_ring->next_to_use; - struct xdp_buff **xdp; - u32 nb_buffs, i; dma_addr_t dma; + u16 buffs; + int i; - rx_desc = ICE_RX_DESC(rx_ring, ntu); - xdp = ice_xdp_buf(rx_ring, ntu); - - nb_buffs = min_t(u16, count, rx_ring->count - ntu); - nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs); - if (!nb_buffs) - return false; - - i = nb_buffs; - while (i--) { + buffs = xsk_buff_alloc_batch(pool, xdp, count); + for (i = 0; i < buffs; i++) { dma = xsk_buff_xdp_get_dma(*xdp); rx_desc->read.pkt_addr = cpu_to_le64(dma); rx_desc->wb.status_error0 = 0; @@ -394,13 +397,77 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) xdp++; } + return buffs; +} + +/** + * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * + * Place the @count of descriptors onto Rx ring. Handle the ring wrap + * for case where space from next_to_use up to the end of ring is less + * than @count. Finally do a tail bump. + * + * Returns true if all allocations were successful, false if any fail. + */ +static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +{ + union ice_32b_rx_flex_desc *rx_desc; + u32 nb_buffs_extra = 0, nb_buffs; + u16 ntu = rx_ring->next_to_use; + u16 total_count = count; + struct xdp_buff **xdp; + + rx_desc = ICE_RX_DESC(rx_ring, ntu); + xdp = ice_xdp_buf(rx_ring, ntu); + + if (ntu + count >= rx_ring->count) { + nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, + rx_desc, + rx_ring->count - ntu); + rx_desc = ICE_RX_DESC(rx_ring, 0); + xdp = ice_xdp_buf(rx_ring, 0); + ntu = 0; + count -= nb_buffs_extra; + ice_release_rx_desc(rx_ring, 0); + } + + nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count); + ntu += nb_buffs; if (ntu == rx_ring->count) ntu = 0; - ice_release_rx_desc(rx_ring, ntu); + if (rx_ring->next_to_use != ntu) + ice_release_rx_desc(rx_ring, ntu); - return count == nb_buffs; + return total_count == (nb_buffs_extra + nb_buffs); +} + +/** + * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers + * @rx_ring: Rx ring + * @count: The number of buffers to allocate + * + * Wrapper for internal allocation routine; figure out how many tail + * bumps should take place based on the given threshold + * + * Returns true if all calls to internal alloc routine succeeded + */ +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +{ + u16 rx_thresh = ICE_RING_QUARTER(rx_ring); + u16 batched, leftover, i, tail_bumps; + + batched = ALIGN_DOWN(count, rx_thresh); + tail_bumps = batched / rx_thresh; + leftover = count & (rx_thresh - 1); + + for (i = 0; i < tail_bumps; i++) + if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh)) + return false; + return __ice_alloc_rx_bufs_zc(rx_ring, leftover); } /** @@ -616,134 +683,221 @@ construct_skb: } /** - * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries + * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer * @xdp_ring: XDP Tx ring - * @budget: max number of frames to xmit + * @tx_buf: Tx buffer to clean + */ +static void +ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) +{ + xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); + xdp_ring->xdp_tx_active--; + dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); +} + +/** + * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring + * @xdp_ring: XDP ring to clean + * @napi_budget: amount of descriptors that NAPI allows us to clean * - * Returns true if cleanup/transmission is done. + * Returns count of cleaned descriptors */ -static bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, int budget) +static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget) { - struct ice_tx_desc *tx_desc = NULL; - bool work_done = true; - struct xdp_desc desc; - dma_addr_t dma; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + int budget = napi_budget / tx_thresh; + u16 next_dd = xdp_ring->next_dd; + u16 ntc, cleared_dds = 0; - while (likely(budget-- > 0)) { + do { + struct ice_tx_desc *next_dd_desc; + u16 desc_cnt = xdp_ring->count; struct ice_tx_buf *tx_buf; + u32 xsk_frames; + u16 i; - if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) { - xdp_ring->tx_stats.tx_busy++; - work_done = false; + next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd); + if (!(next_dd_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) break; - } - - tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use]; - if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc)) - break; - - dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, - desc.len); + cleared_dds++; + xsk_frames = 0; + if (likely(!xdp_ring->xdp_tx_active)) { + xsk_frames = tx_thresh; + goto skip; + } - tx_buf->bytecount = desc.len; + ntc = xdp_ring->next_to_clean; - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use); - tx_desc->buf_addr = cpu_to_le64(dma); - tx_desc->cmd_type_offset_bsz = - ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, desc.len, 0); + for (i = 0; i < tx_thresh; i++) { + tx_buf = &xdp_ring->tx_buf[ntc]; - xdp_ring->next_to_use++; - if (xdp_ring->next_to_use == xdp_ring->count) - xdp_ring->next_to_use = 0; - } - - if (tx_desc) { - ice_xdp_ring_update_tail(xdp_ring); - xsk_tx_release(xdp_ring->xsk_pool); - } + if (tx_buf->raw_buf) { + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + tx_buf->raw_buf = NULL; + } else { + xsk_frames++; + } - return budget > 0 && work_done; + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } +skip: + xdp_ring->next_to_clean += tx_thresh; + if (xdp_ring->next_to_clean >= desc_cnt) + xdp_ring->next_to_clean -= desc_cnt; + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); + next_dd_desc->cmd_type_offset_bsz = 0; + next_dd = next_dd + tx_thresh; + if (next_dd >= desc_cnt) + next_dd = tx_thresh - 1; + } while (budget--); + + xdp_ring->next_dd = next_dd; + + return cleared_dds * tx_thresh; } /** - * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer - * @xdp_ring: XDP Tx ring - * @tx_buf: Tx buffer to clean + * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor + * @xdp_ring: XDP ring to produce the HW Tx descriptor on + * @desc: AF_XDP descriptor to pull the DMA address and length from + * @total_bytes: bytes accumulator that will be used for stats update */ -static void -ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) +static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, + unsigned int *total_bytes) { - xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); - dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), - dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); - dma_unmap_len_set(tx_buf, len, 0); + struct ice_tx_desc *tx_desc; + dma_addr_t dma; + + dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr); + xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len); + + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, + 0, desc->len, 0); + + *total_bytes += desc->len; } /** - * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries - * @xdp_ring: XDP Tx ring - * @budget: NAPI budget - * - * Returns true if cleanup/tranmission is done. + * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from + * @total_bytes: bytes accumulator that will be used for stats update */ -bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget) +static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, + unsigned int *total_bytes) { - int total_packets = 0, total_bytes = 0; - s16 ntc = xdp_ring->next_to_clean; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + u16 ntu = xdp_ring->next_to_use; struct ice_tx_desc *tx_desc; - struct ice_tx_buf *tx_buf; - u32 xsk_frames = 0; - bool xmit_done; + u32 i; - tx_desc = ICE_TX_DESC(xdp_ring, ntc); - tx_buf = &xdp_ring->tx_buf[ntc]; - ntc -= xdp_ring->count; + loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { + dma_addr_t dma; - do { - if (!(tx_desc->cmd_type_offset_bsz & - cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) - break; + dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr); + xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len); - total_bytes += tx_buf->bytecount; - total_packets++; + tx_desc = ICE_TX_DESC(xdp_ring, ntu++); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, + 0, descs[i].len, 0); - if (tx_buf->raw_buf) { - ice_clean_xdp_tx_buf(xdp_ring, tx_buf); - tx_buf->raw_buf = NULL; - } else { - xsk_frames++; - } + *total_bytes += descs[i].len; + } - tx_desc->cmd_type_offset_bsz = 0; - tx_buf++; - tx_desc++; - ntc++; + xdp_ring->next_to_use = ntu; - if (unlikely(!ntc)) { - ntc -= xdp_ring->count; - tx_buf = xdp_ring->tx_buf; - tx_desc = ICE_TX_DESC(xdp_ring, 0); - } + if (xdp_ring->next_to_use > xdp_ring->next_rs) { + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs += tx_thresh; + } +} - prefetch(tx_desc); +/** + * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from + * @nb_pkts: count of packets to be send + * @total_bytes: bytes accumulator that will be used for stats update + */ +static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, + u32 nb_pkts, unsigned int *total_bytes) +{ + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + u32 batched, leftover, i; + + batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH); + leftover = nb_pkts & (PKTS_PER_BATCH - 1); + for (i = 0; i < batched; i += PKTS_PER_BATCH) + ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes); + for (; i < batched + leftover; i++) + ice_xmit_pkt(xdp_ring, &descs[i], total_bytes); + + if (xdp_ring->next_to_use > xdp_ring->next_rs) { + struct ice_tx_desc *tx_desc; + + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs += tx_thresh; + } +} - } while (likely(--budget)); +/** + * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @budget: number of free descriptors on HW Tx ring that can be used + * @napi_budget: amount of descriptors that NAPI allows us to clean + * + * Returns true if there is no more work that needs to be done, false otherwise + */ +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget) +{ + struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; + u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); + u32 nb_pkts, nb_processed = 0; + unsigned int total_bytes = 0; + + if (budget < tx_thresh) + budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget); + + nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); + if (!nb_pkts) + return true; + + if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { + struct ice_tx_desc *tx_desc; + + nb_processed = xdp_ring->count - xdp_ring->next_to_use; + ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes); + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs = tx_thresh - 1; + xdp_ring->next_to_use = 0; + } - ntc += xdp_ring->count; - xdp_ring->next_to_clean = ntc; + ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed, + &total_bytes); - if (xsk_frames) - xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); + ice_xdp_ring_update_tail(xdp_ring); + ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes); if (xsk_uses_need_wakeup(xdp_ring->xsk_pool)) xsk_set_tx_need_wakeup(xdp_ring->xsk_pool); - ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes); - xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK); - - return budget > 0 && xmit_done; + return nb_pkts < budget; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 4c7bd8e9dfc4..0cbb5793b5b8 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -6,19 +6,37 @@ #include "ice_txrx.h" #include "ice.h" +#define PKTS_PER_BATCH 8 + +#ifdef __clang__ +#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for +#elif __GNUC__ >= 4 +#define loop_unrolled_for _Pragma("GCC unroll 8") for +#else +#define loop_unrolled_for for +#endif + struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid); int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget); -bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget); int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget); #else +static inline bool +ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring, + u32 __always_unused budget, + int __always_unused napi_budget) +{ + return false; +} + static inline int ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, struct xsk_buff_pool __always_unused *pool, @@ -35,13 +53,6 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, } static inline bool -ice_clean_tx_irq_zc(struct ice_tx_ring __always_unused *xdp_ring, - int __always_unused budget) -{ - return false; -} - -static inline bool ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring, u16 __always_unused count) { diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 51a2dcaf553d..2a5782063f4c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -965,10 +965,6 @@ static int igb_set_ringparam(struct net_device *netdev, memcpy(&temp_ring[i], adapter->rx_ring[i], sizeof(struct igb_ring)); - /* Clear copied XDP RX-queue info */ - memset(&temp_ring[i].xdp_rxq, 0, - sizeof(temp_ring[i].xdp_rxq)); - temp_ring[i].count = new_rx_count; err = igb_setup_rx_resources(&temp_ring[i]); if (err) { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index bfa321e4003f..34b33b21e0dc 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4345,7 +4345,18 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) { struct igb_adapter *adapter = netdev_priv(rx_ring->netdev); struct device *dev = rx_ring->dev; - int size; + int size, res; + + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, + rx_ring->queue_index, 0); + if (res < 0) { + dev_err(dev, "Failed to register xdp_rxq index %u\n", + rx_ring->queue_index); + return res; + } size = sizeof(struct igb_rx_buffer) * rx_ring->count; @@ -4368,14 +4379,10 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) rx_ring->xdp_prog = adapter->xdp_prog; - /* XDP RX-queue info */ - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, - rx_ring->queue_index, 0) < 0) - goto err; - return 0; err: + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index b965fb809d84..74b2c590ed5d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -505,6 +505,9 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) u8 index = rx_ring->queue_index; int size, desc_len, res; + /* XDP RX-queue info */ + if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, rx_ring->q_vector->napi.napi_id); if (res < 0) { |
