From 184e56e77c06a7eef68a021e9d4b11a11a8ab096 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 24 Mar 2025 11:57:32 +0800 Subject: crypto: iaa - Move compression CRC into request object Rather than passing around a CRC between the functions, embed it into the acomp_request context. Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 09d9589f2d68..4240a2e3d375 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1020,8 +1020,7 @@ static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq, static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, struct idxd_wq *wq, dma_addr_t src_addr, unsigned int slen, - dma_addr_t dst_addr, unsigned int *dlen, - u32 compression_crc); + dma_addr_t dst_addr, unsigned int *dlen); static void iaa_desc_complete(struct idxd_desc *idxd_desc, enum idxd_complete_type comp_type, @@ -1087,10 +1086,10 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc, } if (ctx->compress && compression_ctx->verify_compress) { + u32 *compression_crc = acomp_request_ctx(ctx->req); dma_addr_t src_addr, dst_addr; - u32 compression_crc; - compression_crc = idxd_desc->iax_completion->crc; + *compression_crc = idxd_desc->iax_completion->crc; ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr); if (ret) { @@ -1100,8 +1099,7 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc, } ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr, - ctx->req->slen, dst_addr, &ctx->req->dlen, - compression_crc); + ctx->req->slen, dst_addr, &ctx->req->dlen); if (ret) { dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret); err = -EIO; @@ -1130,11 +1128,11 @@ out: static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, struct idxd_wq *wq, dma_addr_t src_addr, unsigned int slen, - dma_addr_t dst_addr, unsigned int *dlen, - u32 *compression_crc) + dma_addr_t dst_addr, unsigned int *dlen) { struct iaa_device_compression_mode *active_compression_mode; struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *compression_crc = acomp_request_ctx(req); struct iaa_device *iaa_device; struct idxd_desc *idxd_desc; struct iax_hw_desc *desc; @@ -1282,11 +1280,11 @@ out: static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, struct idxd_wq *wq, dma_addr_t src_addr, unsigned int slen, - dma_addr_t dst_addr, unsigned int *dlen, - u32 compression_crc) + dma_addr_t dst_addr, unsigned int *dlen) { struct iaa_device_compression_mode *active_compression_mode; struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *compression_crc = acomp_request_ctx(req); struct iaa_device *iaa_device; struct idxd_desc *idxd_desc; struct iax_hw_desc *desc; @@ -1346,10 +1344,10 @@ static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, goto err; } - if (compression_crc != idxd_desc->iax_completion->crc) { + if (*compression_crc != idxd_desc->iax_completion->crc) { ret = -EINVAL; dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:" - " comp=0x%x, decomp=0x%x\n", compression_crc, + " comp=0x%x, decomp=0x%x\n", *compression_crc, idxd_desc->iax_completion->crc); print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, idxd_desc->iax_completion, 64, 0); @@ -1496,7 +1494,6 @@ static int iaa_comp_acompress(struct acomp_req *req) dma_addr_t src_addr, dst_addr; int nr_sgs, cpu, ret = 0; struct iaa_wq *iaa_wq; - u32 compression_crc; struct idxd_wq *wq; struct device *dev; @@ -1557,7 +1554,7 @@ static int iaa_comp_acompress(struct acomp_req *req) req->dst, req->dlen, sg_dma_len(req->dst)); ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr, - &req->dlen, &compression_crc); + &req->dlen); if (ret == -EINPROGRESS) return ret; @@ -1569,7 +1566,7 @@ static int iaa_comp_acompress(struct acomp_req *req) } ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen, - dst_addr, &req->dlen, compression_crc); + dst_addr, &req->dlen); if (ret) dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret); @@ -1694,6 +1691,7 @@ static struct acomp_alg iaa_acomp_fixed_deflate = { .init = iaa_comp_init_fixed, .compress = iaa_comp_acompress, .decompress = iaa_comp_adecompress, + .reqsize = sizeof(u32), .base = { .cra_name = "deflate", .cra_driver_name = "deflate-iaa", -- cgit v1.2.3 From cc98d8ce934b99789d30421957fd6a20fffb1c22 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 24 Mar 2025 12:04:18 +0800 Subject: crypto: iaa - Do not clobber req->base.data The req->base.data field is for the user and must not be touched by the driver, unless you save it first. The iaa driver doesn't seem to be using the req->base.data value so just remove the assignment. Fixes: 09646c98d0bf ("crypto: iaa - Add irq support for the crypto async interface") Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 4240a2e3d375..773aab8a8830 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1185,8 +1185,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, " src_addr %llx, dst_addr %llx\n", __func__, active_compression_mode->name, src_addr, dst_addr); - } else if (ctx->async_mode) - req->base.data = idxd_desc; + } dev_dbg(dev, "%s: compression mode %s," " desc->src1_addr %llx, desc->src1_size %d," @@ -1423,8 +1422,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, " src_addr %llx, dst_addr %llx\n", __func__, active_compression_mode->name, src_addr, dst_addr); - } else if (ctx->async_mode && !disable_async) - req->base.data = idxd_desc; + } dev_dbg(dev, "%s: decompression mode %s," " desc->src1_addr %llx, desc->src1_size %d," -- cgit v1.2.3 From 39ccd0e54f164ea3c40327d9163205a8449047c2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 24 Mar 2025 12:08:52 +0800 Subject: crypto: iaa - Remove unused disable_async argument from iaa_decompress Remove the disable_async field left over after the NULL dst removal. Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 773aab8a8830..aa63df16f20b 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1366,8 +1366,7 @@ err: static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, struct idxd_wq *wq, dma_addr_t src_addr, unsigned int slen, - dma_addr_t dst_addr, unsigned int *dlen, - bool disable_async) + dma_addr_t dst_addr, unsigned int *dlen) { struct iaa_device_compression_mode *active_compression_mode; struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); @@ -1409,7 +1408,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, desc->src1_size = slen; desc->completion_addr = idxd_desc->compl_dma; - if (ctx->use_irq && !disable_async) { + if (ctx->use_irq) { desc->flags |= IDXD_OP_FLAG_RCI; idxd_desc->crypto.req = req; @@ -1442,7 +1441,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, update_total_decomp_calls(); update_wq_decomp_calls(wq); - if (ctx->async_mode && !disable_async) { + if (ctx->async_mode) { ret = -EINPROGRESS; dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__); goto out; @@ -1470,7 +1469,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, *dlen = req->dlen; - if (!ctx->async_mode || disable_async) + if (!ctx->async_mode) idxd_free_desc(wq, idxd_desc); /* Update stats */ @@ -1650,7 +1649,7 @@ static int iaa_comp_adecompress(struct acomp_req *req) req->dst, req->dlen, sg_dma_len(req->dst)); ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, - dst_addr, &req->dlen, false); + dst_addr, &req->dlen); if (ret == -EINPROGRESS) return ret; -- cgit v1.2.3 From d0a5c9d079decad95a77638c19bc5b3a6a0ffe21 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:02:58 +0800 Subject: crypto: iaa - Switch to ACOMP_FBREQ_ON_STACK Rather than copying the request by hand, use the ACOMP_FBREQ_ON_STACK helper to do it. Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index aa63df16f20b..b4f15e738cee 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -999,12 +999,9 @@ out: static int deflate_generic_decompress(struct acomp_req *req) { - ACOMP_REQUEST_ON_STACK(fbreq, crypto_acomp_reqtfm(req)); + ACOMP_FBREQ_ON_STACK(fbreq, req); int ret; - acomp_request_set_callback(fbreq, 0, NULL, NULL); - acomp_request_set_params(fbreq, req->src, req->dst, req->slen, - req->dlen); ret = crypto_acomp_decompress(fbreq); req->dlen = fbreq->dlen; -- cgit v1.2.3 From 47b5b6f9eb736b1868b0f9c1a1575b5922451cc6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Apr 2025 18:21:06 +0800 Subject: crypto: iaa - Use cra_reqsize for acomp Use the common reqsize field for acomp algorithms. Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index b4f15e738cee..be3b899c6977 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -1685,12 +1685,12 @@ static struct acomp_alg iaa_acomp_fixed_deflate = { .init = iaa_comp_init_fixed, .compress = iaa_comp_acompress, .decompress = iaa_comp_adecompress, - .reqsize = sizeof(u32), .base = { .cra_name = "deflate", .cra_driver_name = "deflate-iaa", .cra_flags = CRYPTO_ALG_ASYNC, .cra_ctxsize = sizeof(struct iaa_compression_ctx), + .cra_reqsize = sizeof(u32), .cra_module = THIS_MODULE, .cra_priority = IAA_ALG_PRIORITY, } -- cgit v1.2.3 From db4978d2f0559288a1595f82bda419fad216beed Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 25 Apr 2025 23:11:31 -0700 Subject: crypto: iaa - Adjust workqueue allocation type In preparation for making the kmalloc family of allocators type aware, we need to make sure that the returned type from the allocation matches the type of the variable being assigned. (Before, the allocator would always return "void *", which can be implicitly cast to any pointer type.) The assigned type is "struct idxd_wq **", but the returned type will be "struct wq **". These are the same size allocation (pointer sized), but the types don't match. Adjust the allocation type to match the assignment. Signed-off-by: Kees Cook Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index be3b899c6977..e9c762124f11 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -725,7 +725,7 @@ static int alloc_wq_table(int max_wqs) for (cpu = 0; cpu < nr_cpus; cpu++) { entry = per_cpu_ptr(wq_table, cpu); - entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL); + entry->wqs = kcalloc(max_wqs, sizeof(*entry->wqs), GFP_KERNEL); if (!entry->wqs) { free_wq_table(); return -ENOMEM; -- cgit v1.2.3 From 714ca27e9bf4608fcb1f627cd5599441f448771e Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 8 May 2025 15:59:50 -0400 Subject: crypto: iaa - Optimize rebalance_wq_table() The function opencodes for_each_cpu() by using a plain for-loop. The loop calls cpumask_weight() inside the conditional section. Because cpumask_weight() is O(1), the overall complexity of the function is O(node * node_cpus^2). Also, cpumask_nth() internally calls hweight(), which, if not hardware accelerated, is slower than cpumask_next() in for_each_cpu(). If switched to the dedicated for_each_cpu(), the rebalance_wq_table() can drop calling cpumask_weight(), together with some housekeeping code. This makes the overall complexity O(node * node_cpus), or simply speaking O(nr_cpu_ids). While there, fix opencoded for_each_possible_cpu() too. Signed-off-by: Yury Norov Signed-off-by: Herbert Xu --- drivers/crypto/intel/iaa/iaa_crypto_main.c | 35 ++++++++++++------------------ 1 file changed, 14 insertions(+), 21 deletions(-) (limited to 'drivers/crypto/intel/iaa/iaa_crypto_main.c') diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index e9c762124f11..23f585219fb4 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -894,7 +894,7 @@ out: static void rebalance_wq_table(void) { const struct cpumask *node_cpus; - int node, cpu, iaa = -1; + int node_cpu, node, cpu, iaa = 0; if (nr_iaa == 0) return; @@ -905,36 +905,29 @@ static void rebalance_wq_table(void) clear_wq_table(); if (nr_iaa == 1) { - for (cpu = 0; cpu < nr_cpus; cpu++) { - if (WARN_ON(wq_table_add_wqs(0, cpu))) { - pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu); - return; - } + for_each_possible_cpu(cpu) { + if (WARN_ON(wq_table_add_wqs(0, cpu))) + goto err; } return; } for_each_node_with_cpus(node) { + cpu = 0; node_cpus = cpumask_of_node(node); - for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) { - int node_cpu = cpumask_nth(cpu, node_cpus); - - if (WARN_ON(node_cpu >= nr_cpu_ids)) { - pr_debug("node_cpu %d doesn't exist!\n", node_cpu); - return; - } - - if ((cpu % cpus_per_iaa) == 0) - iaa++; - - if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) { - pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu); - return; - } + for_each_cpu(node_cpu, node_cpus) { + iaa = cpu / cpus_per_iaa; + if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) + goto err; + cpu++; } } + + return; +err: + pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu); } static inline int check_completion(struct device *dev, -- cgit v1.2.3