* [dpdk-dev] [PATCH 1/3] common/qat: remove tail write coalescing feature
2019-09-06 15:44 [dpdk-dev] [PATCH 0/3] Add dual threading in QAT PMD Fiona Trahe
@ 2019-09-06 15:44 ` Fiona Trahe
2019-09-06 15:44 ` [dpdk-dev] [PATCH 2/3] common/qat: move max inflights param into qp Fiona Trahe
` (5 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Fiona Trahe @ 2019-09-06 15:44 UTC (permalink / raw)
To: dev; +Cc: akhil.goyal, arkadiuszx.kusztal, fiona.trahe
The feature Coalescing Tail Writes on Enqueue is removed
as it is not thread-safe and a dual-thread feature will be added shortly.
Signed-off-by: Fiona Trahe <fiona.trahe@intel.com>
---
drivers/common/qat/qat_qp.c | 16 +++-------------
drivers/common/qat/qat_qp.h | 6 ------
2 files changed, 3 insertions(+), 19 deletions(-)
diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c
index 03f11f8..01ddce0 100644
--- a/drivers/common/qat/qat_qp.c
+++ b/drivers/common/qat/qat_qp.c
@@ -538,7 +538,6 @@ static inline uint32_t adf_modulo(uint32_t data, uint32_t modulo_mask)
txq_write_tail(struct qat_qp *qp, struct qat_queue *q) {
WRITE_CSR_RING_TAIL(qp->mmap_bar_addr, q->hw_bundle_number,
q->hw_queue_number, q->tail);
- q->nb_pending_requests = 0;
q->csr_tail = q->tail;
}
@@ -622,25 +621,20 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
kick_tail:
queue->tail = tail;
tmp_qp->stats.enqueued_count += nb_ops_sent;
- queue->nb_pending_requests += nb_ops_sent;
- if (tmp_qp->inflights16 < QAT_CSR_TAIL_FORCE_WRITE_THRESH ||
- queue->nb_pending_requests > QAT_CSR_TAIL_WRITE_THRESH) {
- txq_write_tail(tmp_qp, queue);
- }
+ txq_write_tail(tmp_qp, queue);
return nb_ops_sent;
}
uint16_t
qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
{
- struct qat_queue *rx_queue, *tx_queue;
+ struct qat_queue *rx_queue;
struct qat_qp *tmp_qp = (struct qat_qp *)qp;
uint32_t head;
uint32_t resp_counter = 0;
uint8_t *resp_msg;
rx_queue = &(tmp_qp->rx_q);
- tx_queue = &(tmp_qp->tx_q);
head = rx_queue->head;
resp_msg = (uint8_t *)rx_queue->base_addr + rx_queue->head;
@@ -677,11 +671,7 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
QAT_CSR_HEAD_WRITE_THRESH)
rxq_free_desc(tmp_qp, rx_queue);
}
- /* also check if tail needs to be advanced */
- if (tmp_qp->inflights16 <= QAT_CSR_TAIL_FORCE_WRITE_THRESH &&
- tx_queue->tail != tx_queue->csr_tail) {
- txq_write_tail(tmp_qp, tx_queue);
- }
+
return resp_counter;
}
diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h
index 980c2ba..9212ca4 100644
--- a/drivers/common/qat/qat_qp.h
+++ b/drivers/common/qat/qat_qp.h
@@ -11,10 +11,6 @@
#define QAT_CSR_HEAD_WRITE_THRESH 32U
/* number of requests to accumulate before writing head CSR */
-#define QAT_CSR_TAIL_WRITE_THRESH 32U
-/* number of requests to accumulate before writing tail CSR */
-#define QAT_CSR_TAIL_FORCE_WRITE_THRESH 256U
-/* number of inflights below which no tail write coalescing should occur */
typedef int (*build_request_t)(void *op,
uint8_t *req, void *op_cookie,
@@ -64,8 +60,6 @@ struct qat_queue {
uint32_t csr_tail; /* last written tail value */
uint16_t nb_processed_responses;
/* number of responses processed since last CSR head write */
- uint16_t nb_pending_requests;
- /* number of requests pending since last CSR tail write */
};
struct qat_qp {
--
1.7.0.7
^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [PATCH 2/3] common/qat: move max inflights param into qp
2019-09-06 15:44 [dpdk-dev] [PATCH 0/3] Add dual threading in QAT PMD Fiona Trahe
2019-09-06 15:44 ` [dpdk-dev] [PATCH 1/3] common/qat: remove tail write coalescing feature Fiona Trahe
@ 2019-09-06 15:44 ` Fiona Trahe
2019-09-06 15:44 ` [dpdk-dev] [PATCH 3/3] common/qat: add dual thread support Fiona Trahe
` (4 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Fiona Trahe @ 2019-09-06 15:44 UTC (permalink / raw)
To: dev; +Cc: akhil.goyal, arkadiuszx.kusztal, fiona.trahe
The max_inflights parameter is moved from qat_queue to qat_qp as it's
a more appropriate location.
Signed-off-by: Fiona Trahe <fiona.trahe@intel.com>
---
drivers/common/qat/qat_qp.c | 23 ++++++++++++-----------
drivers/common/qat/qat_qp.h | 2 +-
2 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c
index 01ddce0..8e4c74a 100644
--- a/drivers/common/qat/qat_qp.c
+++ b/drivers/common/qat/qat_qp.c
@@ -239,6 +239,15 @@ int qat_qp_setup(struct qat_pci_device *qat_dev,
goto create_err;
}
+ qp->max_inflights = ADF_MAX_INFLIGHTS(qp->tx_q.queue_size,
+ ADF_BYTES_TO_MSG_SIZE(qp->tx_q.msg_size));
+
+ if (qp->max_inflights < 2) {
+ QAT_LOG(ERR, "Invalid num inflights");
+ qat_queue_delete(&(qp->tx_q));
+ goto create_err;
+ }
+
if (qat_queue_create(qat_dev, &(qp->rx_q), qat_qp_conf,
ADF_RING_DIR_RX) != 0) {
QAT_LOG(ERR, "Rx queue create failed "
@@ -416,15 +425,7 @@ static void qat_queue_delete(struct qat_queue *queue)
goto queue_create_err;
}
- queue->max_inflights = ADF_MAX_INFLIGHTS(queue->queue_size,
- ADF_BYTES_TO_MSG_SIZE(desc_size));
queue->modulo_mask = (1 << ADF_RING_SIZE_MODULO(queue->queue_size)) - 1;
-
- if (queue->max_inflights < 2) {
- QAT_LOG(ERR, "Invalid num inflights");
- ret = -EINVAL;
- goto queue_create_err;
- }
queue->head = 0;
queue->tail = 0;
queue->msg_size = desc_size;
@@ -443,11 +444,11 @@ static void qat_queue_delete(struct qat_queue *queue)
queue->hw_queue_number, queue_base);
QAT_LOG(DEBUG, "RING: Name:%s, size in CSR: %u, in bytes %u,"
- " nb msgs %u, msg_size %u, max_inflights %u modulo mask %u",
+ " nb msgs %u, msg_size %u, modulo mask %u",
queue->memz_name,
queue->queue_size, queue_size_bytes,
qp_conf->nb_descriptors, desc_size,
- queue->max_inflights, queue->modulo_mask);
+ queue->modulo_mask);
return 0;
@@ -590,7 +591,7 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
/* Find how many can actually fit on the ring */
tmp_qp->inflights16 += nb_ops;
- overflow = tmp_qp->inflights16 - queue->max_inflights;
+ overflow = tmp_qp->inflights16 - tmp_qp->max_inflights;
if (overflow > 0) {
tmp_qp->inflights16 -= overflow;
nb_ops_possible = nb_ops - overflow;
diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h
index 9212ca4..5066f06 100644
--- a/drivers/common/qat/qat_qp.h
+++ b/drivers/common/qat/qat_qp.h
@@ -51,7 +51,6 @@ struct qat_queue {
uint32_t tail; /* Shadow copy of the tail */
uint32_t modulo_mask;
uint32_t msg_size;
- uint16_t max_inflights;
uint32_t queue_size;
uint8_t hw_bundle_number;
uint8_t hw_queue_number;
@@ -76,6 +75,7 @@ struct qat_qp {
enum qat_service_type service_type;
struct qat_pci_device *qat_dev;
/**< qat device this qp is on */
+ uint16_t max_inflights;
} __rte_cache_aligned;
extern const struct qat_qp_hw_data qat_gen1_qps[][ADF_MAX_QPS_ON_ANY_SERVICE];
--
1.7.0.7
^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [PATCH 3/3] common/qat: add dual thread support
2019-09-06 15:44 [dpdk-dev] [PATCH 0/3] Add dual threading in QAT PMD Fiona Trahe
2019-09-06 15:44 ` [dpdk-dev] [PATCH 1/3] common/qat: remove tail write coalescing feature Fiona Trahe
2019-09-06 15:44 ` [dpdk-dev] [PATCH 2/3] common/qat: move max inflights param into qp Fiona Trahe
@ 2019-09-06 15:44 ` Fiona Trahe
2019-09-06 16:12 ` [dpdk-dev] [PATCH v2 0/3] Add dual threading in QAT PMD Fiona Trahe
` (3 subsequent siblings)
6 siblings, 0 replies; 10+ messages in thread
From: Fiona Trahe @ 2019-09-06 15:44 UTC (permalink / raw)
To: dev; +Cc: akhil.goyal, arkadiuszx.kusztal, fiona.trahe
Remove the limitation whereby enqueue and dequeue must be
done in same thread.
The inflight calculation is reworked to be thread-safe for 2
threads - note this is not general multi-thread support, i.e
all enqueues to a qp must still be done in one thread and
all dequeues must be done in one thread, but enqueues and
dequeues may be in separate threads.
Documentation updated.
Signed-off-by: Fiona Trahe <fiona.trahe@intel.com>
---
doc/guides/compressdevs/qat_comp.rst | 5 +++-
doc/guides/cryptodevs/qat.rst | 11 +++++++-
drivers/common/qat/qat_qp.c | 40 ++++++++++++++++++++-------------
drivers/common/qat/qat_qp.h | 3 +-
4 files changed, 39 insertions(+), 20 deletions(-)
diff --git a/doc/guides/compressdevs/qat_comp.rst b/doc/guides/compressdevs/qat_comp.rst
index 6f583a4..d06b08a 100644
--- a/doc/guides/compressdevs/qat_comp.rst
+++ b/doc/guides/compressdevs/qat_comp.rst
@@ -33,7 +33,10 @@ Limitations
-----------
* Compressdev level 0, no compression, is not supported.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single
+ queue-pair all enqueues to the TX queue must be done from one thread and all dequeues
+ from the RX queue must be done from one thread, but enqueues and dequeues may be done
+ in different threads.)
* No BSD support as BSD QAT kernel driver not available.
* When using Deflate dynamic huffman encoding for compression, the input size (op.src.length)
must be < CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE from the config file,
diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst
index e905f6d..7c81b9b 100644
--- a/doc/guides/cryptodevs/qat.rst
+++ b/doc/guides/cryptodevs/qat.rst
@@ -81,7 +81,11 @@ Limitations
* No BSD support as BSD QAT kernel driver not available.
* ZUC EEA3/EIA3 is not supported by dh895xcc devices
* Maximum additional authenticated data (AAD) for GCM is 240 bytes long and must be passed to the device in a buffer rounded up to the nearest block-size multiple (x16) and padded with zeros.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single
+ queue-pair all enqueues to the TX queue must be done from one thread and all dequeues
+ from the RX queue must be done from one thread, but enqueues and dequeues may be done
+ in different threads.)
+
Extra notes on KASUMI F9
~~~~~~~~~~~~~~~~~~~~~~~~
@@ -122,7 +126,10 @@ Limitations
~~~~~~~~~~~
* Big integers longer than 4096 bits are not supported.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* Queue-pairs are thread-safe on Intel CPUs but Queues are not (that is, within a single
+ queue-pair all enqueues to the TX queue must be done from one thread and all dequeues
+ from the RX queue must be done from one thread, but enqueues and dequeues may be done
+ in different threads.)
.. _building_qat:
diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c
index 8e4c74a..30cdc61 100644
--- a/drivers/common/qat/qat_qp.c
+++ b/drivers/common/qat/qat_qp.c
@@ -230,7 +230,7 @@ int qat_qp_setup(struct qat_pci_device *qat_dev,
}
qp->mmap_bar_addr = pci_dev->mem_resource[0].addr;
- qp->inflights16 = 0;
+ qp->enqueued = qp->dequeued = 0;
if (qat_queue_create(qat_dev, &(qp->tx_q), qat_qp_conf,
ADF_RING_DIR_TX) != 0) {
@@ -321,7 +321,7 @@ int qat_qp_release(struct qat_qp **qp_addr)
qp->qat_dev->qat_dev_id);
/* Don't free memory if there are still responses to be processed */
- if (qp->inflights16 == 0) {
+ if ((qp->enqueued - qp->dequeued) == 0) {
qat_queue_delete(&(qp->tx_q));
qat_queue_delete(&(qp->rx_q));
} else {
@@ -579,7 +579,6 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
uint16_t nb_ops_possible = nb_ops;
register uint8_t *base_addr;
register uint32_t tail;
- int overflow;
if (unlikely(nb_ops == 0))
return 0;
@@ -590,13 +589,25 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
tail = queue->tail;
/* Find how many can actually fit on the ring */
- tmp_qp->inflights16 += nb_ops;
- overflow = tmp_qp->inflights16 - tmp_qp->max_inflights;
- if (overflow > 0) {
- tmp_qp->inflights16 -= overflow;
- nb_ops_possible = nb_ops - overflow;
- if (nb_ops_possible == 0)
- return 0;
+ {
+ /* dequeued can only be written by one thread, but it may not
+ * be this thread. As it's 4-byte aligned it will be read
+ * atomically here by any Intel CPU.
+ * enqueued can wrap before dequeued, but cannot
+ * lap it as var size of enq/deq (uint32_t) > var size of
+ * max_inflights (uint16_t). In reality inflights is never
+ * even as big as max uint16_t, as it's <= ADF_MAX_DESC.
+ * On wrapping, the calculation still returns the correct
+ * positive value as all three vars are unsigned.
+ */
+ uint32_t inflights =
+ tmp_qp->enqueued - tmp_qp->dequeued;
+
+ if ((inflights + nb_ops) > tmp_qp->max_inflights) {
+ nb_ops_possible = tmp_qp->max_inflights - inflights;
+ if (nb_ops_possible == 0)
+ return 0;
+ }
}
while (nb_ops_sent != nb_ops_possible) {
@@ -605,11 +616,7 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
tmp_qp->qat_dev_gen);
if (ret != 0) {
tmp_qp->stats.enqueue_err_count++;
- /*
- * This message cannot be enqueued,
- * decrease number of ops that wasn't sent
- */
- tmp_qp->inflights16 -= nb_ops_possible - nb_ops_sent;
+ /* This message cannot be enqueued */
if (nb_ops_sent == 0)
return 0;
goto kick_tail;
@@ -621,6 +628,7 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
}
kick_tail:
queue->tail = tail;
+ tmp_qp->enqueued += nb_ops_sent;
tmp_qp->stats.enqueued_count += nb_ops_sent;
txq_write_tail(tmp_qp, queue);
return nb_ops_sent;
@@ -664,9 +672,9 @@ void rxq_free_desc(struct qat_qp *qp, struct qat_queue *q)
}
if (resp_counter > 0) {
rx_queue->head = head;
+ tmp_qp->dequeued += resp_counter;
tmp_qp->stats.dequeued_count += resp_counter;
rx_queue->nb_processed_responses += resp_counter;
- tmp_qp->inflights16 -= resp_counter;
if (rx_queue->nb_processed_responses >
QAT_CSR_HEAD_WRITE_THRESH)
diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h
index 5066f06..8b9ab79 100644
--- a/drivers/common/qat/qat_qp.h
+++ b/drivers/common/qat/qat_qp.h
@@ -63,7 +63,6 @@ struct qat_queue {
struct qat_qp {
void *mmap_bar_addr;
- uint16_t inflights16;
struct qat_queue tx_q;
struct qat_queue rx_q;
struct qat_common_stats stats;
@@ -75,6 +74,8 @@ struct qat_qp {
enum qat_service_type service_type;
struct qat_pci_device *qat_dev;
/**< qat device this qp is on */
+ uint32_t enqueued;
+ uint32_t dequeued __rte_aligned(4);
uint16_t max_inflights;
} __rte_cache_aligned;
--
1.7.0.7
^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [PATCH v2 0/3] Add dual threading in QAT PMD
2019-09-06 15:44 [dpdk-dev] [PATCH 0/3] Add dual threading in QAT PMD Fiona Trahe
` (2 preceding siblings ...)
2019-09-06 15:44 ` [dpdk-dev] [PATCH 3/3] common/qat: add dual thread support Fiona Trahe
@ 2019-09-06 16:12 ` Fiona Trahe
2019-10-04 10:48 ` Akhil Goyal
2019-09-06 16:12 ` [dpdk-dev] [PATCH v2 1/3] common/qat: remove tail write coalescing feature Fiona Trahe
` (2 subsequent siblings)
6 siblings, 1 reply; 10+ messages in thread
From: Fiona Trahe @ 2019-09-06 16:12 UTC (permalink / raw)
To: dev; +Cc: akhil.goyal, arkadiuszx.kusztal, fiona.trahe
Remove the limitation whereby enqueue and dequeue must be
done in same thread.
The inflight calculation is reworked to be thread-safe for 2
threads - note this is not general multi-thread support, i.e
all enqueues to a qp must still be done in one thread and
all dequeues must be done in one thread, but enqueues and
dequeues may be in separate threads.
As the tail-coalescing feature is not
threadsafe it is removed first.
v2 changes:
- clarified wording in docs
Fiona Trahe (3):
common/qat: remove tail write coalescing feature
common/qat: move max inflights param into qp
common/qat: add dual thread support
doc/guides/compressdevs/qat_comp.rst | 5 ++-
doc/guides/cryptodevs/qat.rst | 11 +++++-
drivers/common/qat/qat_qp.c | 77 ++++++++++++++++++------------------
drivers/common/qat/qat_qp.h | 11 ++----
4 files changed, 54 insertions(+), 50 deletions(-)
--
2.13.6
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH v2 0/3] Add dual threading in QAT PMD
2019-09-06 16:12 ` [dpdk-dev] [PATCH v2 0/3] Add dual threading in QAT PMD Fiona Trahe
@ 2019-10-04 10:48 ` Akhil Goyal
2019-10-08 10:53 ` Trahe, Fiona
0 siblings, 1 reply; 10+ messages in thread
From: Akhil Goyal @ 2019-10-04 10:48 UTC (permalink / raw)
To: Fiona Trahe, dev; +Cc: arkadiuszx.kusztal
Hi Fiona,
This patchset need a rebase. Could you please send v3.
Thanks,
Akhil
> -----Original Message-----
> From: Fiona Trahe <fiona.trahe@intel.com>
> Sent: Friday, September 6, 2019 9:42 PM
> To: dev@dpdk.org
> Cc: Akhil Goyal <akhil.goyal@nxp.com>; arkadiuszx.kusztal@intel.com;
> fiona.trahe@intel.com
> Subject: [PATCH v2 0/3] Add dual threading in QAT PMD
>
> Remove the limitation whereby enqueue and dequeue must be
> done in same thread.
> The inflight calculation is reworked to be thread-safe for 2
> threads - note this is not general multi-thread support, i.e
> all enqueues to a qp must still be done in one thread and
> all dequeues must be done in one thread, but enqueues and
> dequeues may be in separate threads.
> As the tail-coalescing feature is not
> threadsafe it is removed first.
>
> v2 changes:
> - clarified wording in docs
>
> Fiona Trahe (3):
> common/qat: remove tail write coalescing feature
> common/qat: move max inflights param into qp
> common/qat: add dual thread support
>
> doc/guides/compressdevs/qat_comp.rst | 5 ++-
> doc/guides/cryptodevs/qat.rst | 11 +++++-
> drivers/common/qat/qat_qp.c | 77 ++++++++++++++++++------------------
> drivers/common/qat/qat_qp.h | 11 ++----
> 4 files changed, 54 insertions(+), 50 deletions(-)
>
> --
> 2.13.6
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [PATCH v2 0/3] Add dual threading in QAT PMD
2019-10-04 10:48 ` Akhil Goyal
@ 2019-10-08 10:53 ` Trahe, Fiona
0 siblings, 0 replies; 10+ messages in thread
From: Trahe, Fiona @ 2019-10-08 10:53 UTC (permalink / raw)
To: Akhil Goyal, dev; +Cc: Kusztal, ArkadiuszX, Trahe, Fiona
Hi Akhil,
We're going to defer this patchset this to 20.02.
We want to add another patch to it to mitigate some performance impacts it causes.#
I'm not sure of the process for this - do I just mark it in patchwork as deferred?
Or is there a tag I can put in an email, like Deferred-by?
Fiona
> -----Original Message-----
> From: Akhil Goyal [mailto:akhil.goyal@nxp.com]
> Sent: Friday, October 4, 2019 11:48 AM
> To: Trahe, Fiona <fiona.trahe@intel.com>; dev@dpdk.org
> Cc: Kusztal, ArkadiuszX <arkadiuszx.kusztal@intel.com>
> Subject: RE: [PATCH v2 0/3] Add dual threading in QAT PMD
>
> Hi Fiona,
>
> This patchset need a rebase. Could you please send v3.
>
> Thanks,
> Akhil
>
> > -----Original Message-----
> > From: Fiona Trahe <fiona.trahe@intel.com>
> > Sent: Friday, September 6, 2019 9:42 PM
> > To: dev@dpdk.org
> > Cc: Akhil Goyal <akhil.goyal@nxp.com>; arkadiuszx.kusztal@intel.com;
> > fiona.trahe@intel.com
> > Subject: [PATCH v2 0/3] Add dual threading in QAT PMD
> >
> > Remove the limitation whereby enqueue and dequeue must be
> > done in same thread.
> > The inflight calculation is reworked to be thread-safe for 2
> > threads - note this is not general multi-thread support, i.e
> > all enqueues to a qp must still be done in one thread and
> > all dequeues must be done in one thread, but enqueues and
> > dequeues may be in separate threads.
> > As the tail-coalescing feature is not
> > threadsafe it is removed first.
> >
> > v2 changes:
> > - clarified wording in docs
> >
> > Fiona Trahe (3):
> > common/qat: remove tail write coalescing feature
> > common/qat: move max inflights param into qp
> > common/qat: add dual thread support
> >
> > doc/guides/compressdevs/qat_comp.rst | 5 ++-
> > doc/guides/cryptodevs/qat.rst | 11 +++++-
> > drivers/common/qat/qat_qp.c | 77 ++++++++++++++++++------------------
> > drivers/common/qat/qat_qp.h | 11 ++----
> > 4 files changed, 54 insertions(+), 50 deletions(-)
> >
> > --
> > 2.13.6
^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [PATCH v2 1/3] common/qat: remove tail write coalescing feature
2019-09-06 15:44 [dpdk-dev] [PATCH 0/3] Add dual threading in QAT PMD Fiona Trahe
` (3 preceding siblings ...)
2019-09-06 16:12 ` [dpdk-dev] [PATCH v2 0/3] Add dual threading in QAT PMD Fiona Trahe
@ 2019-09-06 16:12 ` Fiona Trahe
2019-09-06 16:12 ` [dpdk-dev] [PATCH v2 2/3] common/qat: move max inflights param into qp Fiona Trahe
2019-09-06 16:12 ` [dpdk-dev] [PATCH v2 3/3] common/qat: add dual thread support Fiona Trahe
6 siblings, 0 replies; 10+ messages in thread
From: Fiona Trahe @ 2019-09-06 16:12 UTC (permalink / raw)
To: dev; +Cc: akhil.goyal, arkadiuszx.kusztal, fiona.trahe
The feature Coalescing Tail Writes on Enqueue is removed
as it is not thread-safe and a dual-thread feature will be added shortly.
Signed-off-by: Fiona Trahe <fiona.trahe@intel.com>
---
drivers/common/qat/qat_qp.c | 16 +++-------------
drivers/common/qat/qat_qp.h | 6 ------
2 files changed, 3 insertions(+), 19 deletions(-)
diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c
index 03f11f869..01ddce008 100644
--- a/drivers/common/qat/qat_qp.c
+++ b/drivers/common/qat/qat_qp.c
@@ -538,7 +538,6 @@ static inline void
txq_write_tail(struct qat_qp *qp, struct qat_queue *q) {
WRITE_CSR_RING_TAIL(qp->mmap_bar_addr, q->hw_bundle_number,
q->hw_queue_number, q->tail);
- q->nb_pending_requests = 0;
q->csr_tail = q->tail;
}
@@ -622,25 +621,20 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
kick_tail:
queue->tail = tail;
tmp_qp->stats.enqueued_count += nb_ops_sent;
- queue->nb_pending_requests += nb_ops_sent;
- if (tmp_qp->inflights16 < QAT_CSR_TAIL_FORCE_WRITE_THRESH ||
- queue->nb_pending_requests > QAT_CSR_TAIL_WRITE_THRESH) {
- txq_write_tail(tmp_qp, queue);
- }
+ txq_write_tail(tmp_qp, queue);
return nb_ops_sent;
}
uint16_t
qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
{
- struct qat_queue *rx_queue, *tx_queue;
+ struct qat_queue *rx_queue;
struct qat_qp *tmp_qp = (struct qat_qp *)qp;
uint32_t head;
uint32_t resp_counter = 0;
uint8_t *resp_msg;
rx_queue = &(tmp_qp->rx_q);
- tx_queue = &(tmp_qp->tx_q);
head = rx_queue->head;
resp_msg = (uint8_t *)rx_queue->base_addr + rx_queue->head;
@@ -677,11 +671,7 @@ qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
QAT_CSR_HEAD_WRITE_THRESH)
rxq_free_desc(tmp_qp, rx_queue);
}
- /* also check if tail needs to be advanced */
- if (tmp_qp->inflights16 <= QAT_CSR_TAIL_FORCE_WRITE_THRESH &&
- tx_queue->tail != tx_queue->csr_tail) {
- txq_write_tail(tmp_qp, tx_queue);
- }
+
return resp_counter;
}
diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h
index 980c2ba32..9212ca457 100644
--- a/drivers/common/qat/qat_qp.h
+++ b/drivers/common/qat/qat_qp.h
@@ -11,10 +11,6 @@ struct qat_pci_device;
#define QAT_CSR_HEAD_WRITE_THRESH 32U
/* number of requests to accumulate before writing head CSR */
-#define QAT_CSR_TAIL_WRITE_THRESH 32U
-/* number of requests to accumulate before writing tail CSR */
-#define QAT_CSR_TAIL_FORCE_WRITE_THRESH 256U
-/* number of inflights below which no tail write coalescing should occur */
typedef int (*build_request_t)(void *op,
uint8_t *req, void *op_cookie,
@@ -64,8 +60,6 @@ struct qat_queue {
uint32_t csr_tail; /* last written tail value */
uint16_t nb_processed_responses;
/* number of responses processed since last CSR head write */
- uint16_t nb_pending_requests;
- /* number of requests pending since last CSR tail write */
};
struct qat_qp {
--
2.13.6
^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [PATCH v2 2/3] common/qat: move max inflights param into qp
2019-09-06 15:44 [dpdk-dev] [PATCH 0/3] Add dual threading in QAT PMD Fiona Trahe
` (4 preceding siblings ...)
2019-09-06 16:12 ` [dpdk-dev] [PATCH v2 1/3] common/qat: remove tail write coalescing feature Fiona Trahe
@ 2019-09-06 16:12 ` Fiona Trahe
2019-09-06 16:12 ` [dpdk-dev] [PATCH v2 3/3] common/qat: add dual thread support Fiona Trahe
6 siblings, 0 replies; 10+ messages in thread
From: Fiona Trahe @ 2019-09-06 16:12 UTC (permalink / raw)
To: dev; +Cc: akhil.goyal, arkadiuszx.kusztal, fiona.trahe
The max_inflights parameter is moved from qat_queue to qat_qp as it's
a more appropriate location.
Signed-off-by: Fiona Trahe <fiona.trahe@intel.com>
---
drivers/common/qat/qat_qp.c | 23 ++++++++++++-----------
drivers/common/qat/qat_qp.h | 2 +-
2 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c
index 01ddce008..8e4c74a02 100644
--- a/drivers/common/qat/qat_qp.c
+++ b/drivers/common/qat/qat_qp.c
@@ -239,6 +239,15 @@ int qat_qp_setup(struct qat_pci_device *qat_dev,
goto create_err;
}
+ qp->max_inflights = ADF_MAX_INFLIGHTS(qp->tx_q.queue_size,
+ ADF_BYTES_TO_MSG_SIZE(qp->tx_q.msg_size));
+
+ if (qp->max_inflights < 2) {
+ QAT_LOG(ERR, "Invalid num inflights");
+ qat_queue_delete(&(qp->tx_q));
+ goto create_err;
+ }
+
if (qat_queue_create(qat_dev, &(qp->rx_q), qat_qp_conf,
ADF_RING_DIR_RX) != 0) {
QAT_LOG(ERR, "Rx queue create failed "
@@ -416,15 +425,7 @@ qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue,
goto queue_create_err;
}
- queue->max_inflights = ADF_MAX_INFLIGHTS(queue->queue_size,
- ADF_BYTES_TO_MSG_SIZE(desc_size));
queue->modulo_mask = (1 << ADF_RING_SIZE_MODULO(queue->queue_size)) - 1;
-
- if (queue->max_inflights < 2) {
- QAT_LOG(ERR, "Invalid num inflights");
- ret = -EINVAL;
- goto queue_create_err;
- }
queue->head = 0;
queue->tail = 0;
queue->msg_size = desc_size;
@@ -443,11 +444,11 @@ qat_queue_create(struct qat_pci_device *qat_dev, struct qat_queue *queue,
queue->hw_queue_number, queue_base);
QAT_LOG(DEBUG, "RING: Name:%s, size in CSR: %u, in bytes %u,"
- " nb msgs %u, msg_size %u, max_inflights %u modulo mask %u",
+ " nb msgs %u, msg_size %u, modulo mask %u",
queue->memz_name,
queue->queue_size, queue_size_bytes,
qp_conf->nb_descriptors, desc_size,
- queue->max_inflights, queue->modulo_mask);
+ queue->modulo_mask);
return 0;
@@ -590,7 +591,7 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
/* Find how many can actually fit on the ring */
tmp_qp->inflights16 += nb_ops;
- overflow = tmp_qp->inflights16 - queue->max_inflights;
+ overflow = tmp_qp->inflights16 - tmp_qp->max_inflights;
if (overflow > 0) {
tmp_qp->inflights16 -= overflow;
nb_ops_possible = nb_ops - overflow;
diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h
index 9212ca457..5066f06f0 100644
--- a/drivers/common/qat/qat_qp.h
+++ b/drivers/common/qat/qat_qp.h
@@ -51,7 +51,6 @@ struct qat_queue {
uint32_t tail; /* Shadow copy of the tail */
uint32_t modulo_mask;
uint32_t msg_size;
- uint16_t max_inflights;
uint32_t queue_size;
uint8_t hw_bundle_number;
uint8_t hw_queue_number;
@@ -76,6 +75,7 @@ struct qat_qp {
enum qat_service_type service_type;
struct qat_pci_device *qat_dev;
/**< qat device this qp is on */
+ uint16_t max_inflights;
} __rte_cache_aligned;
extern const struct qat_qp_hw_data qat_gen1_qps[][ADF_MAX_QPS_ON_ANY_SERVICE];
--
2.13.6
^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [PATCH v2 3/3] common/qat: add dual thread support
2019-09-06 15:44 [dpdk-dev] [PATCH 0/3] Add dual threading in QAT PMD Fiona Trahe
` (5 preceding siblings ...)
2019-09-06 16:12 ` [dpdk-dev] [PATCH v2 2/3] common/qat: move max inflights param into qp Fiona Trahe
@ 2019-09-06 16:12 ` Fiona Trahe
6 siblings, 0 replies; 10+ messages in thread
From: Fiona Trahe @ 2019-09-06 16:12 UTC (permalink / raw)
To: dev; +Cc: akhil.goyal, arkadiuszx.kusztal, fiona.trahe
Remove the limitation whereby enqueue and dequeue must be
done in same thread.
The inflight calculation is reworked to be thread-safe for 2
threads - note this is not general multi-thread support, i.e
all enqueues to a qp must still be done in one thread and
all dequeues must be done in one thread, but enqueues and
dequeues may be in separate threads.
Documentation updated.
Signed-off-by: Fiona Trahe <fiona.trahe@intel.com>
---
doc/guides/compressdevs/qat_comp.rst | 5 ++++-
doc/guides/cryptodevs/qat.rst | 11 ++++++++--
drivers/common/qat/qat_qp.c | 40 +++++++++++++++++++++---------------
drivers/common/qat/qat_qp.h | 3 ++-
4 files changed, 39 insertions(+), 20 deletions(-)
diff --git a/doc/guides/compressdevs/qat_comp.rst b/doc/guides/compressdevs/qat_comp.rst
index 6f583a460..669e288e3 100644
--- a/doc/guides/compressdevs/qat_comp.rst
+++ b/doc/guides/compressdevs/qat_comp.rst
@@ -33,7 +33,10 @@ Limitations
-----------
* Compressdev level 0, no compression, is not supported.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* QAT queue-pairs are thread-safe when process runs on an Intel CPU but QAT queues are not
+ (that is, within a single queue-pair all enqueues to the TX queue must be done from one
+ thread and all dequeues from the RX queue must be done from one thread, but enqueues
+ and dequeues may be done in different threads.)
* No BSD support as BSD QAT kernel driver not available.
* When using Deflate dynamic huffman encoding for compression, the input size (op.src.length)
must be < CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE from the config file,
diff --git a/doc/guides/cryptodevs/qat.rst b/doc/guides/cryptodevs/qat.rst
index e905f6d00..750a03e6c 100644
--- a/doc/guides/cryptodevs/qat.rst
+++ b/doc/guides/cryptodevs/qat.rst
@@ -81,7 +81,11 @@ Limitations
* No BSD support as BSD QAT kernel driver not available.
* ZUC EEA3/EIA3 is not supported by dh895xcc devices
* Maximum additional authenticated data (AAD) for GCM is 240 bytes long and must be passed to the device in a buffer rounded up to the nearest block-size multiple (x16) and padded with zeros.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* QAT queue-pairs are thread-safe when process runs on an Intel CPU but QAT queues are not
+ (that is, within a single queue-pair all enqueues to the TX queue must be done from one
+ thread and all dequeues from the RX queue must be done from one thread, but enqueues
+ and dequeues may be done in different threads.)
+
Extra notes on KASUMI F9
~~~~~~~~~~~~~~~~~~~~~~~~
@@ -122,7 +126,10 @@ Limitations
~~~~~~~~~~~
* Big integers longer than 4096 bits are not supported.
-* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* QAT queue-pairs are thread-safe when process runs on an Intel CPU but QAT queues are not
+ (that is, within a single queue-pair all enqueues to the TX queue must be done from one
+ thread and all dequeues from the RX queue must be done from one thread, but enqueues
+ and dequeues may be done in different threads.)
.. _building_qat:
diff --git a/drivers/common/qat/qat_qp.c b/drivers/common/qat/qat_qp.c
index 8e4c74a02..30cdc618d 100644
--- a/drivers/common/qat/qat_qp.c
+++ b/drivers/common/qat/qat_qp.c
@@ -230,7 +230,7 @@ int qat_qp_setup(struct qat_pci_device *qat_dev,
}
qp->mmap_bar_addr = pci_dev->mem_resource[0].addr;
- qp->inflights16 = 0;
+ qp->enqueued = qp->dequeued = 0;
if (qat_queue_create(qat_dev, &(qp->tx_q), qat_qp_conf,
ADF_RING_DIR_TX) != 0) {
@@ -321,7 +321,7 @@ int qat_qp_release(struct qat_qp **qp_addr)
qp->qat_dev->qat_dev_id);
/* Don't free memory if there are still responses to be processed */
- if (qp->inflights16 == 0) {
+ if ((qp->enqueued - qp->dequeued) == 0) {
qat_queue_delete(&(qp->tx_q));
qat_queue_delete(&(qp->rx_q));
} else {
@@ -579,7 +579,6 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
uint16_t nb_ops_possible = nb_ops;
register uint8_t *base_addr;
register uint32_t tail;
- int overflow;
if (unlikely(nb_ops == 0))
return 0;
@@ -590,13 +589,25 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
tail = queue->tail;
/* Find how many can actually fit on the ring */
- tmp_qp->inflights16 += nb_ops;
- overflow = tmp_qp->inflights16 - tmp_qp->max_inflights;
- if (overflow > 0) {
- tmp_qp->inflights16 -= overflow;
- nb_ops_possible = nb_ops - overflow;
- if (nb_ops_possible == 0)
- return 0;
+ {
+ /* dequeued can only be written by one thread, but it may not
+ * be this thread. As it's 4-byte aligned it will be read
+ * atomically here by any Intel CPU.
+ * enqueued can wrap before dequeued, but cannot
+ * lap it as var size of enq/deq (uint32_t) > var size of
+ * max_inflights (uint16_t). In reality inflights is never
+ * even as big as max uint16_t, as it's <= ADF_MAX_DESC.
+ * On wrapping, the calculation still returns the correct
+ * positive value as all three vars are unsigned.
+ */
+ uint32_t inflights =
+ tmp_qp->enqueued - tmp_qp->dequeued;
+
+ if ((inflights + nb_ops) > tmp_qp->max_inflights) {
+ nb_ops_possible = tmp_qp->max_inflights - inflights;
+ if (nb_ops_possible == 0)
+ return 0;
+ }
}
while (nb_ops_sent != nb_ops_possible) {
@@ -605,11 +616,7 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
tmp_qp->qat_dev_gen);
if (ret != 0) {
tmp_qp->stats.enqueue_err_count++;
- /*
- * This message cannot be enqueued,
- * decrease number of ops that wasn't sent
- */
- tmp_qp->inflights16 -= nb_ops_possible - nb_ops_sent;
+ /* This message cannot be enqueued */
if (nb_ops_sent == 0)
return 0;
goto kick_tail;
@@ -621,6 +628,7 @@ qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops)
}
kick_tail:
queue->tail = tail;
+ tmp_qp->enqueued += nb_ops_sent;
tmp_qp->stats.enqueued_count += nb_ops_sent;
txq_write_tail(tmp_qp, queue);
return nb_ops_sent;
@@ -664,9 +672,9 @@ qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
}
if (resp_counter > 0) {
rx_queue->head = head;
+ tmp_qp->dequeued += resp_counter;
tmp_qp->stats.dequeued_count += resp_counter;
rx_queue->nb_processed_responses += resp_counter;
- tmp_qp->inflights16 -= resp_counter;
if (rx_queue->nb_processed_responses >
QAT_CSR_HEAD_WRITE_THRESH)
diff --git a/drivers/common/qat/qat_qp.h b/drivers/common/qat/qat_qp.h
index 5066f06f0..8b9ab79ff 100644
--- a/drivers/common/qat/qat_qp.h
+++ b/drivers/common/qat/qat_qp.h
@@ -63,7 +63,6 @@ struct qat_queue {
struct qat_qp {
void *mmap_bar_addr;
- uint16_t inflights16;
struct qat_queue tx_q;
struct qat_queue rx_q;
struct qat_common_stats stats;
@@ -75,6 +74,8 @@ struct qat_qp {
enum qat_service_type service_type;
struct qat_pci_device *qat_dev;
/**< qat device this qp is on */
+ uint32_t enqueued;
+ uint32_t dequeued __rte_aligned(4);
uint16_t max_inflights;
} __rte_cache_aligned;
--
2.13.6
^ permalink raw reply [flat|nested] 10+ messages in thread