* [PATCH] net/intel: improve Rx descriptor ring size checks @ 2025-12-15 17:35 Bruce Richardson 2025-12-15 17:54 ` Morten Brørup 2025-12-15 18:43 ` [PATCH v2] " Bruce Richardson 0 siblings, 2 replies; 10+ messages in thread From: Bruce Richardson @ 2025-12-15 17:35 UTC (permalink / raw) To: dev Cc: Bruce Richardson, Praveen Shetty, Vladimir Medvedkin, Anatoly Burakov, Jingjing Wu The default Rx ring size checks did not account for the fact that the port would not work correctly if the Rx ring size was only twice the free threshold size or less, so add in a new check for this. This would generally only apply in cases where very small rings sizes are being used, for example, with default Rx free thresh of 32, only ring size of 64 would cause issues. Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> --- drivers/net/intel/cpfl/cpfl_rxtx.c | 7 +++++++ drivers/net/intel/i40e/i40e_rxtx.c | 7 +++++++ drivers/net/intel/iavf/iavf_rxtx.c | 20 +++++--------------- drivers/net/intel/ice/ice_rxtx.c | 7 +++++++ drivers/net/intel/idpf/idpf_rxtx.c | 7 +++++++ drivers/net/intel/ixgbe/ixgbe_rxtx.c | 7 +++++++ 6 files changed, 40 insertions(+), 15 deletions(-) diff --git a/drivers/net/intel/cpfl/cpfl_rxtx.c b/drivers/net/intel/cpfl/cpfl_rxtx.c index 453ec975d5..1479f55d23 100644 --- a/drivers/net/intel/cpfl/cpfl_rxtx.c +++ b/drivers/net/intel/cpfl/cpfl_rxtx.c @@ -362,6 +362,13 @@ cpfl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, if (idpf_qc_rx_thresh_check(nb_desc, rx_free_thresh) != 0) return -EINVAL; + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_free_thresh) { + PMD_INIT_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, 2 * rx_free_thresh); + return -EINVAL; + } + /* Free memory if needed */ if (dev->data->rx_queues[queue_idx] != NULL) { cpfl_rx_queue_release(dev->data->rx_queues[queue_idx]); diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c index 255414dd03..f4a7222bc1 100644 --- a/drivers/net/intel/i40e/i40e_rxtx.c +++ b/drivers/net/intel/i40e/i40e_rxtx.c @@ -2113,6 +2113,13 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_conf->rx_free_thresh) { + PMD_DRV_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, 2 * rx_conf->rx_free_thresh); + return -EINVAL; + } + /* Free memory if needed */ if (dev->data->rx_queues[queue_idx]) { i40e_rx_queue_release(dev->data->rx_queues[queue_idx]); diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c index d8662fd815..f4be309ef8 100644 --- a/drivers/net/intel/iavf/iavf_rxtx.c +++ b/drivers/net/intel/iavf/iavf_rxtx.c @@ -146,20 +146,6 @@ iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) return 0; } -static inline int -check_rx_thresh(uint16_t nb_desc, uint16_t thresh) -{ - /* The following constraints must be satisfied: - * thresh < rxq->nb_rx_desc - */ - if (thresh >= nb_desc) { - PMD_INIT_LOG(ERR, "rx_free_thresh (%u) must be less than %u", - thresh, nb_desc); - return -EINVAL; - } - return 0; -} - static inline int check_tx_thresh(uint16_t nb_desc, uint16_t tx_rs_thresh, uint16_t tx_free_thresh) @@ -589,8 +575,12 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, rx_free_thresh = (rx_conf->rx_free_thresh == 0) ? IAVF_DEFAULT_RX_FREE_THRESH : rx_conf->rx_free_thresh; - if (check_rx_thresh(nb_desc, rx_free_thresh) != 0) + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_free_thresh) { + PMD_INIT_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, 2 * rx_free_thresh); return -EINVAL; + } /* Free memory if needed */ if (dev->data->rx_queues[queue_idx]) { diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c index 74db0fbec9..42f2d5c590 100644 --- a/drivers/net/intel/ice/ice_rxtx.c +++ b/drivers/net/intel/ice/ice_rxtx.c @@ -1295,6 +1295,13 @@ ice_rx_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_conf->rx_free_thresh) { + PMD_INIT_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, 2 * rx_conf->rx_free_thresh); + return -EINVAL; + } + offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; if (mp) diff --git a/drivers/net/intel/idpf/idpf_rxtx.c b/drivers/net/intel/idpf/idpf_rxtx.c index 4796d8b862..136882e084 100644 --- a/drivers/net/intel/idpf/idpf_rxtx.c +++ b/drivers/net/intel/idpf/idpf_rxtx.c @@ -244,6 +244,13 @@ idpf_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, if (idpf_qc_rx_thresh_check(nb_desc, rx_free_thresh) != 0) return -EINVAL; + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_free_thresh) { + PMD_INIT_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, 2 * rx_free_thresh); + return -EINVAL; + } + /* Free memory if needed */ if (dev->data->rx_queues[queue_idx] != NULL) { idpf_qc_rx_queue_release(dev->data->rx_queues[queue_idx]); diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c index a7583c178a..4d2033114e 100644 --- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c @@ -3206,6 +3206,13 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_conf->rx_free_thresh) { + PMD_INIT_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, 2 * rx_conf->rx_free_thresh); + return -EINVAL; + } + /* Free memory prior to re-allocation if needed... */ if (dev->data->rx_queues[queue_idx] != NULL) { ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]); -- 2.51.0 ^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH] net/intel: improve Rx descriptor ring size checks 2025-12-15 17:35 [PATCH] net/intel: improve Rx descriptor ring size checks Bruce Richardson @ 2025-12-15 17:54 ` Morten Brørup 2025-12-15 17:58 ` Bruce Richardson 2025-12-15 18:43 ` [PATCH v2] " Bruce Richardson 1 sibling, 1 reply; 10+ messages in thread From: Morten Brørup @ 2025-12-15 17:54 UTC (permalink / raw) To: Bruce Richardson, dev Cc: Praveen Shetty, Vladimir Medvedkin, Anatoly Burakov, Jingjing Wu > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > Sent: Monday, 15 December 2025 18.36 > > The default Rx ring size checks did not account for the fact that the > port would not work correctly if the Rx ring size was only twice the > free threshold size or less, so add in a new check for this. This would > generally only apply in cases where very small rings sizes are being > used, for example, with default Rx free thresh of 32, only ring size of > 64 would cause issues. > > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> > --- Does dev_info.rx_desc_lim.nb_min returned by rte_eth_dev_info_get() need correction too? ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] net/intel: improve Rx descriptor ring size checks 2025-12-15 17:54 ` Morten Brørup @ 2025-12-15 17:58 ` Bruce Richardson 2025-12-15 18:20 ` Bruce Richardson 0 siblings, 1 reply; 10+ messages in thread From: Bruce Richardson @ 2025-12-15 17:58 UTC (permalink / raw) To: Morten Brørup Cc: dev, Praveen Shetty, Vladimir Medvedkin, Anatoly Burakov, Jingjing Wu On Mon, Dec 15, 2025 at 06:54:50PM +0100, Morten Brørup wrote: > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > Sent: Monday, 15 December 2025 18.36 > > > > The default Rx ring size checks did not account for the fact that the > > port would not work correctly if the Rx ring size was only twice the > > free threshold size or less, so add in a new check for this. This would > > generally only apply in cases where very small rings sizes are being > > used, for example, with default Rx free thresh of 32, only ring size of > > 64 would cause issues. > > > > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> > > --- > > Does dev_info.rx_desc_lim.nb_min returned by rte_eth_dev_info_get() need correction too? > The minimum number of descriptors stays the same, however, if choosing the minimum number of descriptors you may need to reduce the rx_free_thresh value. /Bruce ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] net/intel: improve Rx descriptor ring size checks 2025-12-15 17:58 ` Bruce Richardson @ 2025-12-15 18:20 ` Bruce Richardson 2025-12-15 18:53 ` Morten Brørup 0 siblings, 1 reply; 10+ messages in thread From: Bruce Richardson @ 2025-12-15 18:20 UTC (permalink / raw) To: Morten Brørup Cc: dev, Praveen Shetty, Vladimir Medvedkin, Anatoly Burakov, Jingjing Wu On Mon, Dec 15, 2025 at 05:58:33PM +0000, Bruce Richardson wrote: > On Mon, Dec 15, 2025 at 06:54:50PM +0100, Morten Brørup wrote: > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > > Sent: Monday, 15 December 2025 18.36 > > > > > > The default Rx ring size checks did not account for the fact that the > > > port would not work correctly if the Rx ring size was only twice the > > > free threshold size or less, so add in a new check for this. This would > > > generally only apply in cases where very small rings sizes are being > > > used, for example, with default Rx free thresh of 32, only ring size of > > > 64 would cause issues. > > > > > > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> > > > --- > > > > Does dev_info.rx_desc_lim.nb_min returned by rte_eth_dev_info_get() need correction too? > > > The minimum number of descriptors stays the same, however, if choosing the > minimum number of descriptors you may need to reduce the rx_free_thresh > value. > However, I think you raise a good point. I'll see about adding a specific error message in case the user is using the default threshold and setting the min ring size. /Bruce ^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH] net/intel: improve Rx descriptor ring size checks 2025-12-15 18:20 ` Bruce Richardson @ 2025-12-15 18:53 ` Morten Brørup 2025-12-16 8:48 ` Bruce Richardson 0 siblings, 1 reply; 10+ messages in thread From: Morten Brørup @ 2025-12-15 18:53 UTC (permalink / raw) To: Bruce Richardson Cc: dev, Praveen Shetty, Vladimir Medvedkin, Anatoly Burakov, Jingjing Wu > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > Sent: Monday, 15 December 2025 19.21 > > On Mon, Dec 15, 2025 at 05:58:33PM +0000, Bruce Richardson wrote: > > On Mon, Dec 15, 2025 at 06:54:50PM +0100, Morten Brørup wrote: > > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > > > Sent: Monday, 15 December 2025 18.36 > > > > > > > > The default Rx ring size checks did not account for the fact that > the > > > > port would not work correctly if the Rx ring size was only twice > the > > > > free threshold size or less, so add in a new check for this. This > would > > > > generally only apply in cases where very small rings sizes are > being > > > > used, for example, with default Rx free thresh of 32, only ring > size of > > > > 64 would cause issues. > > > > > > > > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> > > > > --- > > > > > > Does dev_info.rx_desc_lim.nb_min returned by rte_eth_dev_info_get() > need correction too? > > > > > The minimum number of descriptors stays the same, however, if > choosing the > > minimum number of descriptors you may need to reduce the > rx_free_thresh > > value. > > > However, I think you raise a good point. I'll see about adding a > specific > error message in case the user is using the default threshold and > setting > the min ring size. The applications need some generic code sequence that always works, on all NICs. E.g. if an application uses rte_eth_dev_adjust_nb_rx_tx_desc() to move a requested crazy number of descriptors within bounds, and uses the default values for all other parameters, it should work. Nonetheless, more detailed error messages are always helpful. :-) -Morten ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] net/intel: improve Rx descriptor ring size checks 2025-12-15 18:53 ` Morten Brørup @ 2025-12-16 8:48 ` Bruce Richardson 2025-12-16 9:25 ` Morten Brørup 0 siblings, 1 reply; 10+ messages in thread From: Bruce Richardson @ 2025-12-16 8:48 UTC (permalink / raw) To: Morten Brørup Cc: dev, Praveen Shetty, Vladimir Medvedkin, Anatoly Burakov, Jingjing Wu On Mon, Dec 15, 2025 at 07:53:27PM +0100, Morten Brørup wrote: > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > Sent: Monday, 15 December 2025 19.21 > > > > On Mon, Dec 15, 2025 at 05:58:33PM +0000, Bruce Richardson wrote: > > > On Mon, Dec 15, 2025 at 06:54:50PM +0100, Morten Brørup wrote: > > > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > > > > Sent: Monday, 15 December 2025 18.36 > > > > > > > > > > The default Rx ring size checks did not account for the fact that > > the > > > > > port would not work correctly if the Rx ring size was only twice > > the > > > > > free threshold size or less, so add in a new check for this. This > > would > > > > > generally only apply in cases where very small rings sizes are > > being > > > > > used, for example, with default Rx free thresh of 32, only ring > > size of > > > > > 64 would cause issues. > > > > > > > > > > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> > > > > > --- > > > > > > > > Does dev_info.rx_desc_lim.nb_min returned by rte_eth_dev_info_get() > > need correction too? > > > > > > > The minimum number of descriptors stays the same, however, if > > choosing the > > > minimum number of descriptors you may need to reduce the > > rx_free_thresh > > > value. > > > > > However, I think you raise a good point. I'll see about adding a > > specific > > error message in case the user is using the default threshold and > > setting > > the min ring size. > > The applications need some generic code sequence that always works, on all NICs. > > E.g. if an application uses rte_eth_dev_adjust_nb_rx_tx_desc() to move a requested crazy number of descriptors within bounds, and uses the default values for all other parameters, it should work. > This is surprisingly difficult to make working with the way things are set up right now. For example, if the user wants defaults for config settings and passes in NULL to the ethdev API, the ethdev library queries the defaults from the driver and fills those in before calling the relevant ring setup functions. Therefore, the driver level has no knowledge of whether the user explicitly requested a value which happens to match the default, or if the user just wants a working default value. Another option would be to set the default low enough that it would work with any ring size possible, but that would then cause a perf impact for apps which don't need such low values (as an extreme example, think on a theoretical driver which allows ring sizes of as small as 16 or 8, a free threshold to work there is likely suboptimal for more normal ring sizes of e.g. 1k or 2k). > Nonetheless, more detailed error messages are always helpful. :-) > Yes, for now I think giving a meaningful error message in the few cases of really small ring sizes is the best approach I can take. Given the issue has been present for a long time without issue, and given that the error will always repeatedly occur at init time with the offending values, I believe its sufficient. /Bruce ^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH] net/intel: improve Rx descriptor ring size checks 2025-12-16 8:48 ` Bruce Richardson @ 2025-12-16 9:25 ` Morten Brørup 2025-12-16 9:52 ` Bruce Richardson 0 siblings, 1 reply; 10+ messages in thread From: Morten Brørup @ 2025-12-16 9:25 UTC (permalink / raw) To: Bruce Richardson, thomas, andrew.rybchenko Cc: dev, Praveen Shetty, Vladimir Medvedkin, Anatoly Burakov, Jingjing Wu +TO: Ethdev maintainers > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > Sent: Tuesday, 16 December 2025 09.48 > > On Mon, Dec 15, 2025 at 07:53:27PM +0100, Morten Brørup wrote: > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > > Sent: Monday, 15 December 2025 19.21 > > > > > > On Mon, Dec 15, 2025 at 05:58:33PM +0000, Bruce Richardson wrote: > > > > On Mon, Dec 15, 2025 at 06:54:50PM +0100, Morten Brørup wrote: > > > > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > > > > > Sent: Monday, 15 December 2025 18.36 > > > > > > > > > > > > The default Rx ring size checks did not account for the fact > that > > > the > > > > > > port would not work correctly if the Rx ring size was only > twice > > > the > > > > > > free threshold size or less, so add in a new check for this. > This > > > would > > > > > > generally only apply in cases where very small rings sizes > are > > > being > > > > > > used, for example, with default Rx free thresh of 32, only > ring > > > size of > > > > > > 64 would cause issues. > > > > > > > > > > > > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> > > > > > > --- > > > > > > > > > > Does dev_info.rx_desc_lim.nb_min returned by > rte_eth_dev_info_get() > > > need correction too? > > > > > > > > > The minimum number of descriptors stays the same, however, if > > > choosing the > > > > minimum number of descriptors you may need to reduce the > > > rx_free_thresh > > > > value. > > > > > > > However, I think you raise a good point. I'll see about adding a > > > specific > > > error message in case the user is using the default threshold and > > > setting > > > the min ring size. > > > > The applications need some generic code sequence that always works, > on all NICs. > > > > E.g. if an application uses rte_eth_dev_adjust_nb_rx_tx_desc() to > move a requested crazy number of descriptors within bounds, and uses > the default values for all other parameters, it should work. > > > > This is surprisingly difficult to make working with the way things are > set > up right now. For example, if the user wants defaults for config > settings > and passes in NULL to the ethdev API, the ethdev library queries the > defaults from the driver and fills those in before calling the relevant > ring setup functions. Therefore, the driver level has no knowledge of > whether the user explicitly requested a value which happens to match > the > default, or if the user just wants a working default value. > > Another option would be to set the default low enough that it would > work > with any ring size possible, but that would then cause a perf impact > for > apps which don't need such low values (as an extreme example, think on > a > theoretical driver which allows ring sizes of as small as 16 or 8, a > free > threshold to work there is likely suboptimal for more normal ring sizes > of > e.g. 1k or 2k). Small descriptor rings are not theoretical. Our application configures very small descriptor rings on unused ports, to be able to process background traffic (e.g. slow protocols) on those ports, but still conserve memory. E.g. the igb driver reports dev_info.rx_desc_lim.nb_min = 32, but multiple times this value is required with default thresholds. The i40e driver reports dev_info.rx_desc_lim.nb_min = 64, and IIRC more is required with default thresholds. I agree that defaults should remain optimized for performance (maximum packets per second). The problem is rte_eth_dev_adjust_nb_rx_tx_desc() not having sufficient information about all the thresholds to correctly calculate its output values. I'll file a bug report. Updating the drivers to report dev_info.rx/tx_desc_lim.nb_min and dev_info.rx/tx_desc_lim.nb_align that work with default thresholds could be a workaround. > > > Nonetheless, more detailed error messages are always helpful. :-) > > > Yes, for now I think giving a meaningful error message in the few cases > of > really small ring sizes is the best approach I can take. Given the > issue > has been present for a long time without issue, and given that the > error > will always repeatedly occur at init time with the offending values, I > believe its sufficient. > > /Bruce ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] net/intel: improve Rx descriptor ring size checks 2025-12-16 9:25 ` Morten Brørup @ 2025-12-16 9:52 ` Bruce Richardson 2025-12-16 10:48 ` Morten Brørup 0 siblings, 1 reply; 10+ messages in thread From: Bruce Richardson @ 2025-12-16 9:52 UTC (permalink / raw) To: Morten Brørup Cc: thomas, andrew.rybchenko, dev, Praveen Shetty, Vladimir Medvedkin, Anatoly Burakov, Jingjing Wu On Tue, Dec 16, 2025 at 10:25:41AM +0100, Morten Brørup wrote: > +TO: Ethdev maintainers > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > Sent: Tuesday, 16 December 2025 09.48 > > > > On Mon, Dec 15, 2025 at 07:53:27PM +0100, Morten Brørup wrote: > > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > > > Sent: Monday, 15 December 2025 19.21 > > > > > > > > On Mon, Dec 15, 2025 at 05:58:33PM +0000, Bruce Richardson wrote: > > > > > On Mon, Dec 15, 2025 at 06:54:50PM +0100, Morten Brørup wrote: > > > > > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > > > > > > Sent: Monday, 15 December 2025 18.36 > > > > > > > > > > > > > > The default Rx ring size checks did not account for the fact > > that > > > > the > > > > > > > port would not work correctly if the Rx ring size was only > > twice > > > > the > > > > > > > free threshold size or less, so add in a new check for this. > > This > > > > would > > > > > > > generally only apply in cases where very small rings sizes > > are > > > > being > > > > > > > used, for example, with default Rx free thresh of 32, only > > ring > > > > size of > > > > > > > 64 would cause issues. > > > > > > > > > > > > > > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> > > > > > > > --- > > > > > > > > > > > > Does dev_info.rx_desc_lim.nb_min returned by > > rte_eth_dev_info_get() > > > > need correction too? > > > > > > > > > > > The minimum number of descriptors stays the same, however, if > > > > choosing the > > > > > minimum number of descriptors you may need to reduce the > > > > rx_free_thresh > > > > > value. > > > > > > > > > However, I think you raise a good point. I'll see about adding a > > > > specific > > > > error message in case the user is using the default threshold and > > > > setting > > > > the min ring size. > > > > > > The applications need some generic code sequence that always works, > > on all NICs. > > > > > > E.g. if an application uses rte_eth_dev_adjust_nb_rx_tx_desc() to > > move a requested crazy number of descriptors within bounds, and uses > > the default values for all other parameters, it should work. > > > > > > > This is surprisingly difficult to make working with the way things are > > set > > up right now. For example, if the user wants defaults for config > > settings > > and passes in NULL to the ethdev API, the ethdev library queries the > > defaults from the driver and fills those in before calling the relevant > > ring setup functions. Therefore, the driver level has no knowledge of > > whether the user explicitly requested a value which happens to match > > the > > default, or if the user just wants a working default value. > > > > Another option would be to set the default low enough that it would > > work > > with any ring size possible, but that would then cause a perf impact > > for > > apps which don't need such low values (as an extreme example, think on > > a > > theoretical driver which allows ring sizes of as small as 16 or 8, a > > free > > threshold to work there is likely suboptimal for more normal ring sizes > > of > > e.g. 1k or 2k). > > Small descriptor rings are not theoretical. > Our application configures very small descriptor rings on unused ports, to be able to process background traffic (e.g. slow protocols) on those ports, but still conserve memory. > > E.g. the igb driver reports dev_info.rx_desc_lim.nb_min = 32, but multiple times this value is required with default thresholds. > The i40e driver reports dev_info.rx_desc_lim.nb_min = 64, and IIRC more is required with default thresholds. > > I agree that defaults should remain optimized for performance (maximum packets per second). > > The problem is rte_eth_dev_adjust_nb_rx_tx_desc() not having sufficient information about all the thresholds to correctly calculate its output values. I'll file a bug report. > > Updating the drivers to report dev_info.rx/tx_desc_lim.nb_min and dev_info.rx/tx_desc_lim.nb_align that work with default thresholds could be a workaround. > I'm not sure I like that option. How would one then query the limits with non-default thresholds? Also, it doesn't inform user as to which thresholds are causing what limits, vs limits that are hard ones from the HW. Other options may be greater use of e.g. 0 as a sentinal value for allowing the driver to pick, or changing things internally so that the source of the rx_conf is passed to the drivers. However, I actually feel that the best option if we want a "most usable" solution here, is to document that the free_thresh is a hint, and that it may be adjusted by the driver if necessary to accomodate a requested ring size. [We could log a warning on adjustment] /Bruce ^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH] net/intel: improve Rx descriptor ring size checks 2025-12-16 9:52 ` Bruce Richardson @ 2025-12-16 10:48 ` Morten Brørup 0 siblings, 0 replies; 10+ messages in thread From: Morten Brørup @ 2025-12-16 10:48 UTC (permalink / raw) To: Bruce Richardson Cc: thomas, andrew.rybchenko, dev, Praveen Shetty, Vladimir Medvedkin, Anatoly Burakov, Jingjing Wu > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > Sent: Tuesday, 16 December 2025 10.52 > > On Tue, Dec 16, 2025 at 10:25:41AM +0100, Morten Brørup wrote: > > +TO: Ethdev maintainers > > > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > > Sent: Tuesday, 16 December 2025 09.48 > > > > > > On Mon, Dec 15, 2025 at 07:53:27PM +0100, Morten Brørup wrote: > > > > > From: Bruce Richardson [mailto:bruce.richardson@intel.com] > > > > > Sent: Monday, 15 December 2025 19.21 > > > > > > > > > > On Mon, Dec 15, 2025 at 05:58:33PM +0000, Bruce Richardson > wrote: > > > > > > On Mon, Dec 15, 2025 at 06:54:50PM +0100, Morten Brørup > wrote: > > > > > > > > From: Bruce Richardson > [mailto:bruce.richardson@intel.com] > > > > > > > > Sent: Monday, 15 December 2025 18.36 > > > > > > > > > > > > > > > > The default Rx ring size checks did not account for the > fact > > > that > > > > > the > > > > > > > > port would not work correctly if the Rx ring size was > only > > > twice > > > > > the > > > > > > > > free threshold size or less, so add in a new check for > this. > > > This > > > > > would > > > > > > > > generally only apply in cases where very small rings > sizes > > > are > > > > > being > > > > > > > > used, for example, with default Rx free thresh of 32, > only > > > ring > > > > > size of > > > > > > > > 64 would cause issues. > > > > > > > > > > > > > > > > Signed-off-by: Bruce Richardson > <bruce.richardson@intel.com> > > > > > > > > --- > > > > > > > > > > > > > > Does dev_info.rx_desc_lim.nb_min returned by > > > rte_eth_dev_info_get() > > > > > need correction too? > > > > > > > > > > > > > The minimum number of descriptors stays the same, however, if > > > > > choosing the > > > > > > minimum number of descriptors you may need to reduce the > > > > > rx_free_thresh > > > > > > value. > > > > > > > > > > > However, I think you raise a good point. I'll see about adding > a > > > > > specific > > > > > error message in case the user is using the default threshold > and > > > > > setting > > > > > the min ring size. > > > > > > > > The applications need some generic code sequence that always > works, > > > on all NICs. > > > > > > > > E.g. if an application uses rte_eth_dev_adjust_nb_rx_tx_desc() to > > > move a requested crazy number of descriptors within bounds, and > uses > > > the default values for all other parameters, it should work. > > > > > > > > > > This is surprisingly difficult to make working with the way things > are > > > set > > > up right now. For example, if the user wants defaults for config > > > settings > > > and passes in NULL to the ethdev API, the ethdev library queries > the > > > defaults from the driver and fills those in before calling the > relevant > > > ring setup functions. Therefore, the driver level has no knowledge > of > > > whether the user explicitly requested a value which happens to > match > > > the > > > default, or if the user just wants a working default value. > > > > > > Another option would be to set the default low enough that it would > > > work > > > with any ring size possible, but that would then cause a perf > impact > > > for > > > apps which don't need such low values (as an extreme example, think > on > > > a > > > theoretical driver which allows ring sizes of as small as 16 or 8, > a > > > free > > > threshold to work there is likely suboptimal for more normal ring > sizes > > > of > > > e.g. 1k or 2k). > > > > Small descriptor rings are not theoretical. > > Our application configures very small descriptor rings on unused > ports, to be able to process background traffic (e.g. slow protocols) > on those ports, but still conserve memory. > > > > E.g. the igb driver reports dev_info.rx_desc_lim.nb_min = 32, but > multiple times this value is required with default thresholds. > > The i40e driver reports dev_info.rx_desc_lim.nb_min = 64, and IIRC > more is required with default thresholds. > > > > I agree that defaults should remain optimized for performance > (maximum packets per second). > > > > The problem is rte_eth_dev_adjust_nb_rx_tx_desc() not having > sufficient information about all the thresholds to correctly calculate > its output values. I'll file a bug report. > > > > Updating the drivers to report dev_info.rx/tx_desc_lim.nb_min and > dev_info.rx/tx_desc_lim.nb_align that work with default thresholds > could be a workaround. > > > > I'm not sure I like that option. How would one then query the limits > with > non-default thresholds? Also, it doesn't inform user as to which > thresholds > are causing what limits, vs limits that are hard ones from the HW. > > Other options may be greater use of e.g. 0 as a sentinal value for > allowing > the driver to pick, or changing things internally so that the source of > the > rx_conf is passed to the drivers. However, I actually feel that the > best > option if we want a "most usable" solution here, is to document that > the > free_thresh is a hint, and that it may be adjusted by the driver if > necessary to accomodate a requested ring size. [We could log a warning > on > adjustment] +1 to your "must usable" suggestion. (As a workaround for rte_eth_dev_adjust_nb_rx_tx_desc() not having sufficient information.) The Tx side is worse. It has requirements related to both rs_thresh and free_thresh. I suppose they could be treated as hints too. ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2] net/intel: improve Rx descriptor ring size checks 2025-12-15 17:35 [PATCH] net/intel: improve Rx descriptor ring size checks Bruce Richardson 2025-12-15 17:54 ` Morten Brørup @ 2025-12-15 18:43 ` Bruce Richardson 1 sibling, 0 replies; 10+ messages in thread From: Bruce Richardson @ 2025-12-15 18:43 UTC (permalink / raw) To: dev; +Cc: Bruce Richardson The default Rx ring size checks did not account for the fact that the port would not work correctly if the Rx ring size was only twice the free threshold size or less, so add in a new check for this. This would generally only apply in cases where very small rings sizes are being used, for example, with default Rx free thresh of 32, only ring size of 64 would cause issues. Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> --- v2: add in extra error message for the small queue size case, to help make users aware of what setting they need to use to fix things. --- drivers/net/intel/cpfl/cpfl_rxtx.c | 10 ++++++++++ drivers/net/intel/i40e/i40e_rxtx.c | 10 ++++++++++ drivers/net/intel/iavf/iavf_rxtx.c | 23 ++++++++--------------- drivers/net/intel/ice/ice_rxtx.c | 10 ++++++++++ drivers/net/intel/idpf/idpf_rxtx.c | 10 ++++++++++ drivers/net/intel/ixgbe/ixgbe_rxtx.c | 10 ++++++++++ 6 files changed, 58 insertions(+), 15 deletions(-) diff --git a/drivers/net/intel/cpfl/cpfl_rxtx.c b/drivers/net/intel/cpfl/cpfl_rxtx.c index 453ec975d5..d42b1142c2 100644 --- a/drivers/net/intel/cpfl/cpfl_rxtx.c +++ b/drivers/net/intel/cpfl/cpfl_rxtx.c @@ -362,6 +362,16 @@ cpfl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, if (idpf_qc_rx_thresh_check(nb_desc, rx_free_thresh) != 0) return -EINVAL; + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_free_thresh) { + PMD_INIT_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, rx_free_thresh); + if (rx_free_thresh == CPFL_DEFAULT_RX_FREE_THRESH) + PMD_INIT_LOG(ERR, "To use ring sizes of %u or smaller, reduce rx_free_thresh", + CPFL_DEFAULT_RX_FREE_THRESH * 2); + return -EINVAL; + } + /* Free memory if needed */ if (dev->data->rx_queues[queue_idx] != NULL) { cpfl_rx_queue_release(dev->data->rx_queues[queue_idx]); diff --git a/drivers/net/intel/i40e/i40e_rxtx.c b/drivers/net/intel/i40e/i40e_rxtx.c index 255414dd03..430f367eab 100644 --- a/drivers/net/intel/i40e/i40e_rxtx.c +++ b/drivers/net/intel/i40e/i40e_rxtx.c @@ -2113,6 +2113,16 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_conf->rx_free_thresh) { + PMD_DRV_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, rx_conf->rx_free_thresh); + if (nb_desc == I40E_MIN_RING_DESC) + PMD_DRV_LOG(ERR, "To use the minimum ring size (%u), reduce rx_free_thresh to a lower value (recommended %u)", + I40E_MIN_RING_DESC, I40E_MIN_RING_DESC / 4); + return -EINVAL; + } + /* Free memory if needed */ if (dev->data->rx_queues[queue_idx]) { i40e_rx_queue_release(dev->data->rx_queues[queue_idx]); diff --git a/drivers/net/intel/iavf/iavf_rxtx.c b/drivers/net/intel/iavf/iavf_rxtx.c index d8662fd815..a8d7c2fc1d 100644 --- a/drivers/net/intel/iavf/iavf_rxtx.c +++ b/drivers/net/intel/iavf/iavf_rxtx.c @@ -146,20 +146,6 @@ iavf_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) return 0; } -static inline int -check_rx_thresh(uint16_t nb_desc, uint16_t thresh) -{ - /* The following constraints must be satisfied: - * thresh < rxq->nb_rx_desc - */ - if (thresh >= nb_desc) { - PMD_INIT_LOG(ERR, "rx_free_thresh (%u) must be less than %u", - thresh, nb_desc); - return -EINVAL; - } - return 0; -} - static inline int check_tx_thresh(uint16_t nb_desc, uint16_t tx_rs_thresh, uint16_t tx_free_thresh) @@ -589,8 +575,15 @@ iavf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, rx_free_thresh = (rx_conf->rx_free_thresh == 0) ? IAVF_DEFAULT_RX_FREE_THRESH : rx_conf->rx_free_thresh; - if (check_rx_thresh(nb_desc, rx_free_thresh) != 0) + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_free_thresh) { + PMD_INIT_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, rx_free_thresh); + if (nb_desc == IAVF_MIN_RING_DESC) + PMD_INIT_LOG(ERR, "To use the minimum ring size (%u), reduce rx_free_thresh to a lower value (recommended %u)", + IAVF_MIN_RING_DESC, IAVF_MIN_RING_DESC / 4); return -EINVAL; + } /* Free memory if needed */ if (dev->data->rx_queues[queue_idx]) { diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c index 74db0fbec9..510eb1d316 100644 --- a/drivers/net/intel/ice/ice_rxtx.c +++ b/drivers/net/intel/ice/ice_rxtx.c @@ -1295,6 +1295,16 @@ ice_rx_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_conf->rx_free_thresh) { + PMD_INIT_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, rx_conf->rx_free_thresh); + if (nb_desc == ICE_MIN_RING_DESC) + PMD_INIT_LOG(ERR, "To use the minimum ring size (%u), reduce rx_free_thresh to a lower value (recommended %u)", + ICE_MIN_RING_DESC, ICE_MIN_RING_DESC / 4); + return -EINVAL; + } + offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads; if (mp) diff --git a/drivers/net/intel/idpf/idpf_rxtx.c b/drivers/net/intel/idpf/idpf_rxtx.c index 4796d8b862..fefe7d89ce 100644 --- a/drivers/net/intel/idpf/idpf_rxtx.c +++ b/drivers/net/intel/idpf/idpf_rxtx.c @@ -244,6 +244,16 @@ idpf_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, if (idpf_qc_rx_thresh_check(nb_desc, rx_free_thresh) != 0) return -EINVAL; + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_free_thresh) { + PMD_INIT_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, rx_free_thresh); + if (rx_free_thresh == IDPF_DEFAULT_RX_FREE_THRESH) + PMD_INIT_LOG(ERR, "To use ring sizes of %u or smaller, reduce rx_free_thresh", + IDPF_DEFAULT_RX_FREE_THRESH * 2); + return -EINVAL; + } + /* Free memory if needed */ if (dev->data->rx_queues[queue_idx] != NULL) { idpf_qc_rx_queue_release(dev->data->rx_queues[queue_idx]); diff --git a/drivers/net/intel/ixgbe/ixgbe_rxtx.c b/drivers/net/intel/ixgbe/ixgbe_rxtx.c index a7583c178a..9f36179c21 100644 --- a/drivers/net/intel/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/intel/ixgbe/ixgbe_rxtx.c @@ -3206,6 +3206,16 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } + /* Check that ring size is > 2 * rx_free_thresh */ + if (nb_desc <= 2 * rx_conf->rx_free_thresh) { + PMD_INIT_LOG(ERR, "rx ring size (%u) must be > 2 * rx_free_thresh (%u)", + nb_desc, rx_conf->rx_free_thresh); + if (nb_desc == IXGBE_MIN_RING_DESC) + PMD_INIT_LOG(ERR, "To use the minimum ring size (%u), reduce rx_free_thresh to a lower value (recommended %u)", + IXGBE_MIN_RING_DESC, IXGBE_MIN_RING_DESC / 4); + return -EINVAL; + } + /* Free memory prior to re-allocation if needed... */ if (dev->data->rx_queues[queue_idx] != NULL) { ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]); -- 2.51.0 ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2025-12-16 10:48 UTC | newest] Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2025-12-15 17:35 [PATCH] net/intel: improve Rx descriptor ring size checks Bruce Richardson 2025-12-15 17:54 ` Morten Brørup 2025-12-15 17:58 ` Bruce Richardson 2025-12-15 18:20 ` Bruce Richardson 2025-12-15 18:53 ` Morten Brørup 2025-12-16 8:48 ` Bruce Richardson 2025-12-16 9:25 ` Morten Brørup 2025-12-16 9:52 ` Bruce Richardson 2025-12-16 10:48 ` Morten Brørup 2025-12-15 18:43 ` [PATCH v2] " Bruce Richardson
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).