From f074f7b103a915edb1edf833f96a902adeb374cf Mon Sep 17 00:00:00 2001 From: Horia Geant? Date: Thu, 27 Aug 2015 18:38:36 +0300 Subject: crypto: tcrypt - avoid mapping from module image addresses The output buffer in test_ahash_speed will point to an address located within the tcrypt module image. This causes problems when trying to DMA map the buffer. For e.g. on ARM-based LS1021A, a page fault occurs within the DMA API when trying to access the struct page returned by virt_to_page(output): insmod tcrypt.ko mode=403 testing speed of async sha1 (sha1-caam) test 0 ( 16 byte blocks, 16 bytes per update, 1 updates): Unable to handle kernel paging request at virtual address f07e9080 pgd = e58d0e00 [f07e9080] *pgd=80000080007003, *pmd=00000000 Internal error: Oops: 206 [#1] SMP THUMB2 Modules linked in: tcrypt(+) CPU: 1 PID: 1119 Comm: insmod Not tainted 4.2.0-rc1-256134-gbf433416e675 #1 Hardware name: Freescale LS1021A task: ea063900 ti: e5a34000 task.ti: e5a34000 PC is at dma_cache_maint_page+0x38/0xd0 LR is at __dma_page_cpu_to_dev+0x15/0x64 pc : [<800155a0>] lr : [<8001564d>] psr: 000f0033 sp : e5a35ca0 ip : 8063df00 fp : f07e9080 r10: 00000cd0 r9 : 8063df00 r8 : 805a2f04 r7 : 0017f804 r6 : 00000002 r5 : ee7f9000 r4 : 00000014 r3 : 80612d40 r2 : 01ff0080 r1 : 00000380 r0 : ee7f9000 Flags: nzcv IRQs on FIQs on Mode SVC_32 ISA Thumb Segment user Control: 70c5387d Table: e58d0e00 DAC: 9b7ede70 Process insmod (pid: 1119, stack limit = 0xe5a34210) Stack: (0xe5a35ca0 to 0xe5a36000) [...] [<800155a0>] (dma_cache_maint_page) from [<8001564d>] (__dma_page_cpu_to_dev+0x15/0x64) [<8001564d>] (__dma_page_cpu_to_dev) from [<800156eb>] (arm_dma_map_page+0x1f/0x44) [<800156eb>] (arm_dma_map_page) from [<802935e3>] (ahash_digest+0x35f/0x510) [<802935e3>] (ahash_digest) from [<7f800d03>] (test_ahash_speed.constprop.6+0x24a/0x4e4 [tcrypt]) [<7f800d03>] (test_ahash_speed.constprop.6 [tcrypt]) from [<7f802fd5>] (do_test+0x1898/0x2058 [tcrypt]) [<7f802fd5>] (do_test [tcrypt]) from [<7f80802f>] (tcrypt_mod_init+0x2e/0x63 [tcrypt]) [<7f80802f>] (tcrypt_mod_init [tcrypt]) from [<80009517>] (do_one_initcall+0xb3/0x134) [<80009517>] (do_one_initcall) from [<80351ec7>] (do_init_module+0x3b/0x13c) [<80351ec7>] (do_init_module) from [<8005cc3f>] (load_module+0x97b/0x9dc) [<8005cc3f>] (load_module) from [<8005cd8d>] (SyS_finit_module+0x35/0x3e) [<8005cd8d>] (SyS_finit_module) from [<8000d101>] (ret_fast_syscall+0x1/0x4c) Code: 1aba 0152 eb00 0b02 (5882) 0f92 addr2line -f -i -e vmlinux 800155a0 page_zonenum include/linux/mm.h:728 page_zone include/linux/mm.h:881 dma_cache_maint_page arch/arm/mm/dma-mapping.c:822 Signed-off-by: Horia Geant? Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 2b00b617daab..46a4a757d478 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -48,6 +48,8 @@ #define ENCRYPT 1 #define DECRYPT 0 +#define MAX_DIGEST_SIZE 64 + /* * return a string with the driver name */ @@ -950,7 +952,7 @@ static void test_ahash_speed(const char *algo, unsigned int secs, struct tcrypt_result tresult; struct ahash_request *req; struct crypto_ahash *tfm; - static char output[1024]; + char *output; int i, ret; tfm = crypto_alloc_ahash(algo, 0, 0); @@ -963,9 +965,9 @@ static void test_ahash_speed(const char *algo, unsigned int secs, printk(KERN_INFO "\ntesting speed of async %s (%s)\n", algo, get_driver_name(crypto_ahash, tfm)); - if (crypto_ahash_digestsize(tfm) > sizeof(output)) { - pr_err("digestsize(%u) > outputbuffer(%zu)\n", - crypto_ahash_digestsize(tfm), sizeof(output)); + if (crypto_ahash_digestsize(tfm) > MAX_DIGEST_SIZE) { + pr_err("digestsize(%u) > %d\n", crypto_ahash_digestsize(tfm), + MAX_DIGEST_SIZE); goto out; } @@ -980,6 +982,10 @@ static void test_ahash_speed(const char *algo, unsigned int secs, ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, tcrypt_complete, &tresult); + output = kmalloc(MAX_DIGEST_SIZE, GFP_KERNEL); + if (!output) + goto out_nomem; + for (i = 0; speed[i].blen != 0; i++) { if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) { pr_err("template (%u) too big for tvmem (%lu)\n", @@ -1006,6 +1012,9 @@ static void test_ahash_speed(const char *algo, unsigned int secs, } } + kfree(output); + +out_nomem: ahash_request_free(req); out: -- cgit v1.2.3 From 3dc1597b6b0a8d17fc56d93b5e9cf859fe681727 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 28 Aug 2015 18:43:24 +0200 Subject: crypto: amcc - Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Herbert Xu --- drivers/crypto/amcc/crypto4xx_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 192a8fa325c1..7e9e461a9cbc 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -1284,6 +1284,7 @@ static const struct of_device_id crypto4xx_match[] = { { .compatible = "amcc,ppc4xx-crypto",}, { }, }; +MODULE_DEVICE_TABLE(of, crypto4xx_match); static struct platform_driver crypto4xx_driver = { .driver = { -- cgit v1.2.3 From c3abc0f3b606a984f864452292c423f01e57c656 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 28 Aug 2015 18:44:03 +0200 Subject: crypto: picoxcell - Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Herbert Xu --- drivers/crypto/picoxcell_crypto.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index da36de26a4dc..615da961c4d8 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1591,6 +1591,7 @@ static const struct of_device_id spacc_of_id_table[] = { { .compatible = "picochip,spacc-l2" }, {} }; +MODULE_DEVICE_TABLE(of, spacc_of_id_table); #endif /* CONFIG_OF */ static bool spacc_is_compatible(struct platform_device *pdev, -- cgit v1.2.3 From 53ed2d4e56c0e4749e3481475d3520275d9e67a5 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 28 Aug 2015 18:44:49 +0200 Subject: crypto: ux500 - Fix module autoload for OF platform drivers These platform drivers have a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Herbert Xu --- drivers/crypto/ux500/cryp/cryp_core.c | 1 + drivers/crypto/ux500/hash/hash_core.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index fded0a5cfcd7..00f97d9750b0 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -1777,6 +1777,7 @@ static const struct of_device_id ux500_cryp_match[] = { { .compatible = "stericsson,ux500-cryp" }, { }, }; +MODULE_DEVICE_TABLE(of, ux500_cryp_match); static struct platform_driver cryp_driver = { .probe = ux500_cryp_probe, diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 5f5f360628fc..e02bd000e750 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -1958,6 +1958,7 @@ static const struct of_device_id ux500_hash_match[] = { { .compatible = "stericsson,ux500-hash" }, { }, }; +MODULE_DEVICE_TABLE(of, ux500_hash_match); static struct platform_driver hash_driver = { .probe = ux500_hash_probe, -- cgit v1.2.3 From a2712e6c75f1e95b7c31830c19191e8fa367c15a Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 2 Sep 2015 12:05:18 -0300 Subject: crypto: mxs-dcp - Allow MXS_DCP to be used on MX6SL MX6SL has the same DCP crypto block as in MX23/MX28, so allow it to be built for ARCH_MXC. Signed-off-by: Fabio Estevam Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index d234719065a5..e7f24a88fae9 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -429,7 +429,7 @@ endif config CRYPTO_DEV_MXS_DCP tristate "Support for Freescale MXS DCP" - depends on ARCH_MXS + depends on (ARCH_MXS || ARCH_MXC) select CRYPTO_CBC select CRYPTO_ECB select CRYPTO_AES -- cgit v1.2.3 From 04b7d704c6160fa4e8eeffc8540575fb15142f7f Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Thu, 10 Sep 2015 11:32:26 +0530 Subject: hwrng: octeon - Use devm_hwrng_register Use resource managed function devm_hwrng_register instead of hwrng_register to make the error-path simpler. Also, remove octeon_rng_remove as it is now redundant. Signed-off-by: Vaishali Thakkar Signed-off-by: Herbert Xu --- drivers/char/hw_random/octeon-rng.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/char/hw_random/octeon-rng.c b/drivers/char/hw_random/octeon-rng.c index 6234a4a19b56..8c78aa090492 100644 --- a/drivers/char/hw_random/octeon-rng.c +++ b/drivers/char/hw_random/octeon-rng.c @@ -96,7 +96,7 @@ static int octeon_rng_probe(struct platform_device *pdev) rng->ops = ops; platform_set_drvdata(pdev, &rng->ops); - ret = hwrng_register(&rng->ops); + ret = devm_hwrng_register(&pdev->dev, &rng->ops); if (ret) return -ENOENT; @@ -105,21 +105,11 @@ static int octeon_rng_probe(struct platform_device *pdev) return 0; } -static int octeon_rng_remove(struct platform_device *pdev) -{ - struct hwrng *rng = platform_get_drvdata(pdev); - - hwrng_unregister(rng); - - return 0; -} - static struct platform_driver octeon_rng_driver = { .driver = { .name = "octeon_rng", }, .probe = octeon_rng_probe, - .remove = octeon_rng_remove, }; module_platform_driver(octeon_rng_driver); -- cgit v1.2.3 From 6c5de9871a4d43bba179c6a862eeb53b5232f996 Mon Sep 17 00:00:00 2001 From: John Griffin Date: Fri, 11 Sep 2015 12:26:00 -0700 Subject: crypto: qat - don't check for iommu In some cases we don't want iommu to be enabled but still we want to enable VFs. Signed-off-by: John Griffin Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_sriov.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 2f77a4a8cecb..1117a8b58280 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -244,11 +244,8 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs) return -EFAULT; } - if (!iommu_present(&pci_bus_type)) { - dev_err(&pdev->dev, - "IOMMU must be enabled for SR-IOV to work\n"); - return -EINVAL; - } + if (!iommu_present(&pci_bus_type)) + dev_warn(&pdev->dev, "IOMMU should be enabled for SR-IOV to work correctly\n"); if (accel_dev->pf.vf_info) { dev_info(&pdev->dev, "Already enabled for this device\n"); -- cgit v1.2.3 From 1bf2138e9806ea436933d5a803547833f9524c8f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 12 Sep 2015 20:19:50 -0300 Subject: hwrng: mxc-rnga - Remove unneeded goto label We can simplify the code by returning the error code immediately instead of jumping to a goto label. Signed-off-by: Fabio Estevam Signed-off-by: Herbert Xu --- drivers/char/hw_random/mxc-rnga.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c index 6cbb72ec6013..8803126c3330 100644 --- a/drivers/char/hw_random/mxc-rnga.c +++ b/drivers/char/hw_random/mxc-rnga.c @@ -160,13 +160,12 @@ static int __init mxc_rnga_probe(struct platform_device *pdev) mxc_rng->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(mxc_rng->clk)) { dev_err(&pdev->dev, "Could not get rng_clk!\n"); - err = PTR_ERR(mxc_rng->clk); - goto out; + return PTR_ERR(mxc_rng->clk); } err = clk_prepare_enable(mxc_rng->clk); if (err) - goto out; + return err; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); mxc_rng->mem = devm_ioremap_resource(&pdev->dev, res); @@ -187,8 +186,6 @@ static int __init mxc_rnga_probe(struct platform_device *pdev) err_ioremap: clk_disable_unprepare(mxc_rng->clk); - -out: return err; } -- cgit v1.2.3 From c09e2cc69441ef5d99d31aa83f7d2b6239e5b9a7 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 12 Sep 2015 20:19:51 -0300 Subject: hwrng: mxc-rnga - Remove uneeded initialization There is no need to pre-initialize variable 'err' as this initial value will be overwritten later on. Signed-off-by: Fabio Estevam Signed-off-by: Herbert Xu --- drivers/char/hw_random/mxc-rnga.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c index 8803126c3330..c7380b80c344 100644 --- a/drivers/char/hw_random/mxc-rnga.c +++ b/drivers/char/hw_random/mxc-rnga.c @@ -141,7 +141,7 @@ static void mxc_rnga_cleanup(struct hwrng *rng) static int __init mxc_rnga_probe(struct platform_device *pdev) { - int err = -ENODEV; + int err; struct resource *res; struct mxc_rng *mxc_rng; -- cgit v1.2.3 From eeb322540987d42a480cf8516fb88cda46347882 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 12 Sep 2015 20:19:52 -0300 Subject: hwrng: mxc-rnga - Remove unnecessary dev_info message There is no need to print a message simply saying that a kernel driver has been registered, so remove it. Signed-off-by: Fabio Estevam Signed-off-by: Herbert Xu --- drivers/char/hw_random/mxc-rnga.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c index c7380b80c344..ed2e3ef9f347 100644 --- a/drivers/char/hw_random/mxc-rnga.c +++ b/drivers/char/hw_random/mxc-rnga.c @@ -180,8 +180,6 @@ static int __init mxc_rnga_probe(struct platform_device *pdev) goto err_ioremap; } - dev_info(&pdev->dev, "MXC RNGA Registered.\n"); - return 0; err_ioremap: -- cgit v1.2.3 From 05db0ad8656376e341db6120758d04c247c9cf2f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 12 Sep 2015 20:19:53 -0300 Subject: hwrng: mxc-rnga - Use the preferred format for kzalloc According to Documentation/CodingStyle: "The preferred form for passing a size of a struct is the following: p = kmalloc(sizeof(*p), ...);" ,so do as suggested. Signed-off-by: Fabio Estevam Signed-off-by: Herbert Xu --- drivers/char/hw_random/mxc-rnga.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c index ed2e3ef9f347..467362262651 100644 --- a/drivers/char/hw_random/mxc-rnga.c +++ b/drivers/char/hw_random/mxc-rnga.c @@ -145,8 +145,7 @@ static int __init mxc_rnga_probe(struct platform_device *pdev) struct resource *res; struct mxc_rng *mxc_rng; - mxc_rng = devm_kzalloc(&pdev->dev, sizeof(struct mxc_rng), - GFP_KERNEL); + mxc_rng = devm_kzalloc(&pdev->dev, sizeof(*mxc_rng), GFP_KERNEL); if (!mxc_rng) return -ENOMEM; -- cgit v1.2.3 From 5f8741d1bfa42b7e5443f2d2035de37bd5b73978 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 15 Sep 2015 18:41:33 +0200 Subject: hwrng: Fix module autoload for OF platform drivers This platform drivers have a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Herbert Xu --- drivers/char/hw_random/pasemi-rng.c | 1 + drivers/char/hw_random/ppc4xx-rng.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/char/hw_random/pasemi-rng.c b/drivers/char/hw_random/pasemi-rng.c index 51cb1d5cc489..699b7259f5d7 100644 --- a/drivers/char/hw_random/pasemi-rng.c +++ b/drivers/char/hw_random/pasemi-rng.c @@ -138,6 +138,7 @@ static const struct of_device_id rng_match[] = { { .compatible = "pasemi,pwrficient-rng", }, { }, }; +MODULE_DEVICE_TABLE(of, rng_match); static struct platform_driver rng_driver = { .driver = { diff --git a/drivers/char/hw_random/ppc4xx-rng.c b/drivers/char/hw_random/ppc4xx-rng.c index b2cfda0fa93e..c0db4387d2e2 100644 --- a/drivers/char/hw_random/ppc4xx-rng.c +++ b/drivers/char/hw_random/ppc4xx-rng.c @@ -129,6 +129,7 @@ static const struct of_device_id ppc4xx_rng_match[] = { { .compatible = "amcc,ppc440epx-rng", }, {}, }; +MODULE_DEVICE_TABLE(of, ppc4xx_rng_match); static struct platform_driver ppc4xx_rng_driver = { .driver = { -- cgit v1.2.3 From 859e58055a36e7a4948df614e3c5d872ef153f36 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 15 Sep 2015 13:54:53 -0300 Subject: crypto: caam - Remove unused JUMP_TYPE_MASK definition Commit a1efb01feca597b ("jump_label, locking/static_keys: Rename JUMP_LABEL_TYPE_* and related helpers to the static_key* pattern") introduced the definition of JUMP_TYPE_MASK in include/linux/jump_label.h causing the following name collision: In file included from drivers/crypto/caam/desc_constr.h:7:0, from drivers/crypto/caam/ctrl.c:15: drivers/crypto/caam/desc.h:1495:0: warning: "JUMP_TYPE_MASK" redefined #define JUMP_TYPE_MASK (0x03 << JUMP_TYPE_SHIFT) ^ In file included from include/linux/module.h:19:0, from drivers/crypto/caam/compat.h:9, from drivers/crypto/caam/ctrl.c:11: include/linux/jump_label.h:131:0: note: this is the location of the previous definition #define JUMP_TYPE_MASK 1UL As JUMP_TYPE_MASK definition in desc.h is never used, we can safely remove it to avoid the name collision. Reported-by: Olof's autobuilder Signed-off-by: Fabio Estevam Reviewed-by: Horia Geant? Signed-off-by: Herbert Xu --- drivers/crypto/caam/desc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 983d663ef671..1e93c6af2275 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -1492,7 +1492,6 @@ struct sec4_sg_entry { #define JUMP_JSL (1 << JUMP_JSL_SHIFT) #define JUMP_TYPE_SHIFT 22 -#define JUMP_TYPE_MASK (0x03 << JUMP_TYPE_SHIFT) #define JUMP_TYPE_LOCAL (0x00 << JUMP_TYPE_SHIFT) #define JUMP_TYPE_NONLOCAL (0x01 << JUMP_TYPE_SHIFT) #define JUMP_TYPE_HALT (0x02 << JUMP_TYPE_SHIFT) -- cgit v1.2.3 From 3cc43a0a5cea4fe2a2107e77500d6032b9bcafde Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Wed, 16 Sep 2015 05:33:06 -0700 Subject: crypto: qat - Add load balancing across devices Load balancing of crypto instances only used a single device. There was no problem with that on PF, but since there is only one or two instance per VF we need to loadbalance across devices. Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_crypto.c | 61 ++++++++++++++++-------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 07c2f9f9d1fc..25db27c7bebb 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -60,8 +60,8 @@ static struct service_hndl qat_crypto; void qat_crypto_put_instance(struct qat_crypto_instance *inst) { - if (atomic_sub_return(1, &inst->refctr) == 0) - adf_dev_put(inst->accel_dev); + atomic_dec(&inst->refctr); + adf_dev_put(inst->accel_dev); } static int qat_crypto_free_instances(struct adf_accel_dev *accel_dev) @@ -97,19 +97,26 @@ static int qat_crypto_free_instances(struct adf_accel_dev *accel_dev) struct qat_crypto_instance *qat_crypto_get_instance_node(int node) { struct adf_accel_dev *accel_dev = NULL; - struct qat_crypto_instance *inst_best = NULL; + struct qat_crypto_instance *inst = NULL; struct list_head *itr; unsigned long best = ~0; list_for_each(itr, adf_devmgr_get_head()) { - accel_dev = list_entry(itr, struct adf_accel_dev, list); - - if ((node == dev_to_node(&GET_DEV(accel_dev)) || - dev_to_node(&GET_DEV(accel_dev)) < 0) && - adf_dev_started(accel_dev) && - !list_empty(&accel_dev->crypto_list)) - break; - accel_dev = NULL; + struct adf_accel_dev *tmp_dev; + unsigned long ctr; + + tmp_dev = list_entry(itr, struct adf_accel_dev, list); + + if ((node == dev_to_node(&GET_DEV(tmp_dev)) || + dev_to_node(&GET_DEV(tmp_dev)) < 0) && + adf_dev_started(tmp_dev) && + !list_empty(&tmp_dev->crypto_list)) { + ctr = atomic_read(&tmp_dev->ref_count); + if (best > ctr) { + accel_dev = tmp_dev; + best = ctr; + } + } } if (!accel_dev) { pr_err("QAT: Could not find a device on node %d\n", node); @@ -118,28 +125,26 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node) if (!accel_dev || !adf_dev_started(accel_dev)) return NULL; + best = ~0; list_for_each(itr, &accel_dev->crypto_list) { - struct qat_crypto_instance *inst; - unsigned long cur; - - inst = list_entry(itr, struct qat_crypto_instance, list); - cur = atomic_read(&inst->refctr); - if (best > cur) { - inst_best = inst; - best = cur; + struct qat_crypto_instance *tmp_inst; + unsigned long ctr; + + tmp_inst = list_entry(itr, struct qat_crypto_instance, list); + ctr = atomic_read(&tmp_inst->refctr); + if (best > ctr) { + inst = tmp_inst; + best = ctr; } } - if (inst_best) { - if (atomic_add_return(1, &inst_best->refctr) == 1) { - if (adf_dev_get(accel_dev)) { - atomic_dec(&inst_best->refctr); - dev_err(&GET_DEV(accel_dev), - "Could not increment dev refctr\n"); - return NULL; - } + if (inst) { + if (adf_dev_get(accel_dev)) { + dev_err(&GET_DEV(accel_dev), "Could not increment dev refctr\n"); + return NULL; } + atomic_inc(&inst->refctr); } - return inst_best; + return inst; } static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev) -- cgit v1.2.3 From 4dd17c9c8a30c8d8cd1c9d4b94f08aca4b038d3e Mon Sep 17 00:00:00 2001 From: sudip Date: Thu, 17 Sep 2015 13:12:51 +0530 Subject: crypto: asymmetric_keys - remove always false comparison hour, min and sec are unsigned int and they can never be less than zero. Signed-off-by: Sudip Mukherjee Signed-off-by: Herbert Xu --- crypto/asymmetric_keys/x509_cert_parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index af71878dc15b..3000ea3b6687 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -546,9 +546,9 @@ int x509_decode_time(time64_t *_t, size_t hdrlen, if (year < 1970 || mon < 1 || mon > 12 || day < 1 || day > mon_len || - hour < 0 || hour > 23 || - min < 0 || min > 59 || - sec < 0 || sec > 59) + hour > 23 || + min > 59 || + sec > 59) goto invalid_time; *_t = mktime64(year, mon, day, hour, min, sec); -- cgit v1.2.3 From e9e23158f2f5a0992f27d367aa1f11ba12b0b54c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 17 Sep 2015 14:45:51 +0100 Subject: hwrng: doc - Fix device node name reference /dev/hw_random => /dev/hwrng In April 2009, commit d405640 ("Driver Core: misc: add node name support for misc devices.") inadvertently changed the device node name from /dev/hw_random to /dev/hwrng. Since 6 years has passed since the change it seems unpractical to change it back, as this node name is probably considered ABI by now. So instead, we'll just change the documentation to match the current situation. NB: It looks like rng-tools have already been updated. Signed-off-by: Lee Jones Acked-by: Kieran Bingham Signed-off-by: Herbert Xu --- Documentation/hw_random.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/hw_random.txt b/Documentation/hw_random.txt index 026e237bbc87..fce1634907d0 100644 --- a/Documentation/hw_random.txt +++ b/Documentation/hw_random.txt @@ -3,7 +3,7 @@ Introduction: The hw_random framework is software that makes use of a special hardware feature on your CPU or motherboard, a Random Number Generator (RNG). The software has two parts: - a core providing the /dev/hw_random character device and its + a core providing the /dev/hwrng character device and its sysfs support, plus a hardware-specific driver that plugs into that core. @@ -14,7 +14,7 @@ Introduction: http://sourceforge.net/projects/gkernel/ - Those tools use /dev/hw_random to fill the kernel entropy pool, + Those tools use /dev/hwrng to fill the kernel entropy pool, which is used internally and exported by the /dev/urandom and /dev/random special files. @@ -32,13 +32,13 @@ Theory of operation: The rng-tools package uses such tests in "rngd", and lets you run them by hand with a "rngtest" utility. - /dev/hw_random is char device major 10, minor 183. + /dev/hwrng is char device major 10, minor 183. CLASS DEVICE. There is a /sys/class/misc/hw_random node with two unique attributes, "rng_available" and "rng_current". The "rng_available" attribute lists the hardware-specific drivers available, while "rng_current" lists the one which is currently - connected to /dev/hw_random. If your system has more than one + connected to /dev/hwrng. If your system has more than one RNG available, you may change the one used by writing a name from the list in "rng_available" into "rng_current". -- cgit v1.2.3 From cb2c316b22d89072643d2685ef0eb883cae8a9de Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 17 Sep 2015 14:45:52 +0100 Subject: hwrng: Kconfig - Fix device node name reference /dev/hw_random => /dev/hwrng In April 2009, commit d405640 ("Driver Core: misc: add node name support for misc devices.") inadvertently changed the device node name from /dev/hw_random to /dev/hwrng. Since 6 years has passed since the change it seems unpractical to change it back, as this node name is probably considered ABI by now. So instead, we'll just change the Kconfig help to match the current situation. NB: It looks like rng-tools have already been updated. Signed-off-by: Lee Jones Acked-by: Kieran Bingham Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index f48cf11c655e..8998108f9721 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -10,7 +10,7 @@ menuconfig HW_RANDOM To compile this driver as a module, choose M here: the module will be called rng-core. This provides a device - that's usually called /dev/hw_random, and which exposes one + that's usually called /dev/hwrng, and which exposes one of possibly several hardware random number generators. These hardware random number generators do not feed directly -- cgit v1.2.3 From d9a53b01404fa602117220e98bb6c9b2b63f1e7f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 17 Sep 2015 14:45:53 +0100 Subject: hwrng: core - Simplify RNG switching from sysfs If we attempt to use sysfs to change the current RNG in the usual way i.e. issuing something like: `echo 8a8a000.rng > /sys/devices/virtual/misc/hw_random/rng_current` ... it will fail because the code doesn't currently take the '\n' into consideration. Well, now it does. Signed-off-by: Lee Jones Acked-by: Peter Korsgaard Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 5643b65cee20..6f497aa1b276 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -323,7 +323,7 @@ static ssize_t hwrng_attr_current_store(struct device *dev, return -ERESTARTSYS; err = -ENODEV; list_for_each_entry(rng, &rng_list, list) { - if (strcmp(rng->name, buf) == 0) { + if (sysfs_streq(rng->name, buf)) { err = 0; if (rng != current_rng) err = set_current_rng(rng); -- cgit v1.2.3 From b4a5407ea82869bf947c8a1176984f80c30e25a6 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 17 Sep 2015 14:45:54 +0100 Subject: hwrng: st - Provide DT bindings for ST's Random Number Generator Signed-off-by: Lee Jones Signed-off-by: Herbert Xu --- Documentation/devicetree/bindings/rng/st,rng.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 Documentation/devicetree/bindings/rng/st,rng.txt diff --git a/Documentation/devicetree/bindings/rng/st,rng.txt b/Documentation/devicetree/bindings/rng/st,rng.txt new file mode 100644 index 000000000000..dbc64e61d8d9 --- /dev/null +++ b/Documentation/devicetree/bindings/rng/st,rng.txt @@ -0,0 +1,15 @@ +STMicroelectronics HW Random Number Generator +---------------------------------------------- + +Required parameters: +compatible : Should be "st,rng" +reg : Base address and size of IP's register map. +clocks : Phandle to device's clock (See: ../clocks/clock-bindings.txt) + +Example: + +rng@0xfee80000 { + compatible = "st,rng"; + reg = <0xfee80000 0x1000>; + clocks = <&clk_sysin>; +} -- cgit v1.2.3 From 4a4da53c408c9e1e545b60d2b07635d08a949c99 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 17 Sep 2015 14:45:55 +0100 Subject: hwrng: st - Add support for ST's HW Random Number Generator Signed-off-by: Pankaj Dev Signed-off-by: Lee Jones Acked-by: Kieran Bingham Signed-off-by: Herbert Xu --- drivers/char/hw_random/Kconfig | 10 +++ drivers/char/hw_random/Makefile | 1 + drivers/char/hw_random/st-rng.c | 144 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+) create mode 100644 drivers/char/hw_random/st-rng.c diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 8998108f9721..ba5406b9a072 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -346,6 +346,16 @@ config HW_RANDOM_MSM If unsure, say Y. +config HW_RANDOM_ST + tristate "ST Microelectronics HW Random Number Generator support" + depends on HW_RANDOM && ARCH_STI + ---help--- + This driver provides kernel-side support for the Random Number + Generator hardware found on STi series of SoCs. + + To compile this driver as a module, choose M here: the + module will be called st-rng. + config HW_RANDOM_XGENE tristate "APM X-Gene True Random Number Generator (TRNG) support" depends on HW_RANDOM && ARCH_XGENE diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile index 055bb01510ad..8bcfb45af2ec 100644 --- a/drivers/char/hw_random/Makefile +++ b/drivers/char/hw_random/Makefile @@ -30,4 +30,5 @@ obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o obj-$(CONFIG_HW_RANDOM_IPROC_RNG200) += iproc-rng200.o obj-$(CONFIG_HW_RANDOM_MSM) += msm-rng.o +obj-$(CONFIG_HW_RANDOM_ST) += st-rng.o obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o diff --git a/drivers/char/hw_random/st-rng.c b/drivers/char/hw_random/st-rng.c new file mode 100644 index 000000000000..8c8a435176e8 --- /dev/null +++ b/drivers/char/hw_random/st-rng.c @@ -0,0 +1,144 @@ +/* + * ST Random Number Generator Driver ST's Platforms + * + * Author: Pankaj Dev: + * Lee Jones + * + * Copyright (C) 2015 STMicroelectronics (R&D) Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Registers */ +#define ST_RNG_STATUS_REG 0x20 +#define ST_RNG_DATA_REG 0x24 + +/* Registers fields */ +#define ST_RNG_STATUS_BAD_SEQUENCE BIT(0) +#define ST_RNG_STATUS_BAD_ALTERNANCE BIT(1) +#define ST_RNG_STATUS_FIFO_FULL BIT(5) + +#define ST_RNG_FIFO_SIZE 8 +#define ST_RNG_SAMPLE_SIZE 2 /* 2 Byte (16bit) samples */ + +/* Samples are available every 0.667us, which we round to 1us */ +#define ST_RNG_FILL_FIFO_TIMEOUT (1 * (ST_RNG_FIFO_SIZE / ST_RNG_SAMPLE_SIZE)) + +struct st_rng_data { + void __iomem *base; + struct clk *clk; + struct hwrng ops; +}; + +static int st_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) +{ + struct st_rng_data *ddata = (struct st_rng_data *)rng->priv; + u32 status; + int i; + + if (max < sizeof(u16)) + return -EINVAL; + + /* Wait until FIFO is full - max 4uS*/ + for (i = 0; i < ST_RNG_FILL_FIFO_TIMEOUT; i++) { + status = readl_relaxed(ddata->base + ST_RNG_STATUS_REG); + if (status & ST_RNG_STATUS_FIFO_FULL) + break; + udelay(1); + } + + if (i == ST_RNG_FILL_FIFO_TIMEOUT) + return 0; + + for (i = 0; i < ST_RNG_FIFO_SIZE && i < max; i += 2) + *(u16 *)(data + i) = + readl_relaxed(ddata->base + ST_RNG_DATA_REG); + + return i; /* No of bytes read */ +} + +static int st_rng_probe(struct platform_device *pdev) +{ + struct st_rng_data *ddata; + struct resource *res; + struct clk *clk; + void __iomem *base; + int ret; + + ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL); + if (!ddata) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + ret = clk_prepare_enable(clk); + if (ret) + return ret; + + ddata->ops.priv = (unsigned long)ddata; + ddata->ops.read = st_rng_read; + ddata->ops.name = pdev->name; + ddata->base = base; + ddata->clk = clk; + + dev_set_drvdata(&pdev->dev, ddata); + + ret = hwrng_register(&ddata->ops); + if (ret) { + dev_err(&pdev->dev, "Failed to register HW RNG\n"); + return ret; + } + + dev_info(&pdev->dev, "Successfully registered HW RNG\n"); + + return 0; +} + +static int st_rng_remove(struct platform_device *pdev) +{ + struct st_rng_data *ddata = dev_get_drvdata(&pdev->dev); + + hwrng_unregister(&ddata->ops); + + clk_disable_unprepare(ddata->clk); + + return 0; +} + +static const struct of_device_id st_rng_match[] = { + { .compatible = "st,rng" }, + {}, +}; +MODULE_DEVICE_TABLE(of, st_rng_match); + +static struct platform_driver st_rng_driver = { + .driver = { + .name = "st-hwrandom", + .of_match_table = of_match_ptr(st_rng_match), + }, + .probe = st_rng_probe, + .remove = st_rng_remove +}; + +module_platform_driver(st_rng_driver); + +MODULE_AUTHOR("Pankaj Dev "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From ba25d8b4011bc496afff65b2f28136aa141d7d6b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 17 Sep 2015 14:45:56 +0100 Subject: ARM: STi: STiH407: Enable the 2 HW Random Number Generators for STiH4{07, 10} Signed-off-by: Lee Jones Signed-off-by: Herbert Xu --- arch/arm/boot/dts/stih407-family.dtsi | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi index ae0527754000..0c24fcb03577 100644 --- a/arch/arm/boot/dts/stih407-family.dtsi +++ b/arch/arm/boot/dts/stih407-family.dtsi @@ -610,5 +610,19 @@ clocks = <&clk_sysin>; st,pwm-num-chan = <4>; }; + + rng10: rng@08a89000 { + compatible = "st,rng"; + reg = <0x08a89000 0x1000>; + clocks = <&clk_sysin>; + status = "okay"; + }; + + rng11: rng@08a8a000 { + compatible = "st,rng"; + reg = <0x08a8a000 0x1000>; + clocks = <&clk_sysin>; + status = "okay"; + }; }; }; -- cgit v1.2.3 From b8e31bf3083106e8530d6d5786e23b365b49b11b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 17 Sep 2015 14:45:57 +0100 Subject: MAINTAINERS: Add ST's Random Number Generator to the ST entry Signed-off-by: Lee Jones Signed-off-by: Herbert Xu --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 274f85405584..698c21182c94 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1521,6 +1521,7 @@ W: http://www.stlinux.com S: Maintained F: arch/arm/mach-sti/ F: arch/arm/boot/dts/sti* +F: drivers/char/hw_random/st-rng.c F: drivers/clocksource/arm_global_timer.c F: drivers/clocksource/clksrc_st_lpc.c F: drivers/i2c/busses/i2c-st.c -- cgit v1.2.3 From c356a7e975a25e8867961c1b7a4a965d506f0a04 Mon Sep 17 00:00:00 2001 From: tim Date: Thu, 10 Sep 2015 15:26:59 -0700 Subject: crypto: x86/sha - Intel SHA Extensions optimized SHA1 transform function This patch includes the Intel SHA Extensions optimized implementation of SHA-1 update function. This function has been tested on Broxton platform and measured a speed up of 3.6x over the SSSE3 implementiation for 4K blocks. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_ni_asm.S | 302 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100644 arch/x86/crypto/sha1_ni_asm.S diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S new file mode 100644 index 000000000000..874a651b9e7d --- /dev/null +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -0,0 +1,302 @@ +/* + * Intel SHA Extensions optimized implementation of a SHA-1 update function + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Sean Gulley + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#define DIGEST_PTR %rdi /* 1st arg */ +#define DATA_PTR %rsi /* 2nd arg */ +#define NUM_BLKS %rdx /* 3rd arg */ + +#define RSPSAVE %rax + +/* gcc conversion */ +#define FRAME_SIZE 32 /* space for 2x16 bytes */ + +#define ABCD %xmm0 +#define E0 %xmm1 /* Need two E's b/c they ping pong */ +#define E1 %xmm2 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 +#define SHUF_MASK %xmm7 + + +/* + * Intel SHA Extensions optimized implementation of a SHA-1 update function + * + * The function takes a pointer to the current hash values, a pointer to the + * input data, and a number of 64 byte blocks to process. Once all blocks have + * been processed, the digest pointer is updated with the resulting hash value. + * The function only processes complete blocks, there is no functionality to + * store partial blocks. All message padding and hash value initialization must + * be done outside the update function. + * + * The indented lines in the loop are instructions related to rounds processing. + * The non-indented lines are instructions related to the message schedule. + * + * void sha1_ni_transform(uint32_t *digest, const void *data, + uint32_t numBlocks) + * digest : pointer to digest + * data: pointer to input data + * numBlocks: Number of blocks to process + */ +.text +.align 32 +ENTRY(sha1_ni_transform) + mov %rsp, RSPSAVE + sub $FRAME_SIZE, %rsp + and $~0xF, %rsp + + shl $6, NUM_BLKS /* convert to bytes */ + jz .Ldone_hash + add DATA_PTR, NUM_BLKS /* pointer to end of data */ + + /* load initial hash values */ + pinsrd $3, 1*16(DIGEST_PTR), E0 + movdqu 0*16(DIGEST_PTR), ABCD + pand UPPER_WORD_MASK(%rip), E0 + pshufd $0x1B, ABCD, ABCD + + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + +.Lloop0: + /* Save hash values for addition after rounds */ + movdqa E0, (0*16)(%rsp) + movdqa ABCD, (1*16)(%rsp) + + /* Rounds 0-3 */ + movdqu 0*16(DATA_PTR), MSG0 + pshufb SHUF_MASK, MSG0 + paddd MSG0, E0 + movdqa ABCD, E1 + sha1rnds4 $0, E0, ABCD + + /* Rounds 4-7 */ + movdqu 1*16(DATA_PTR), MSG1 + pshufb SHUF_MASK, MSG1 + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG1, MSG0 + + /* Rounds 8-11 */ + movdqu 2*16(DATA_PTR), MSG2 + pshufb SHUF_MASK, MSG2 + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 12-15 */ + movdqu 3*16(DATA_PTR), MSG3 + pshufb SHUF_MASK, MSG3 + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 16-19 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 20-23 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 24-27 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 28-31 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 32-35 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 36-39 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 40-43 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 44-47 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 48-51 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 52-55 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 56-59 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 60-63 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $3, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 64-67 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $3, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 68-71 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $3, E1, ABCD + pxor MSG1, MSG3 + + /* Rounds 72-75 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $3, E0, ABCD + + /* Rounds 76-79 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1rnds4 $3, E1, ABCD + + /* Add current hash values with previously saved */ + sha1nexte (0*16)(%rsp), E0 + paddd (1*16)(%rsp), ABCD + + /* Increment data pointer and loop if more to process */ + add $64, DATA_PTR + cmp NUM_BLKS, DATA_PTR + jne .Lloop0 + + /* Write hash values back in the correct order */ + pshufd $0x1B, ABCD, ABCD + movdqu ABCD, 0*16(DIGEST_PTR) + pextrd $3, E0, 1*16(DIGEST_PTR) + +.Ldone_hash: + mov RSPSAVE, %rsp + + ret +ENDPROC(sha1_ni_transform) + +.data + +.align 64 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x000102030405060708090a0b0c0d0e0f +UPPER_WORD_MASK: + .octa 0xFFFFFFFF000000000000000000000000 -- cgit v1.2.3 From 600a2334e83d22e5c3f7ff2581f545bfc354d206 Mon Sep 17 00:00:00 2001 From: tim Date: Thu, 10 Sep 2015 15:27:13 -0700 Subject: crypto: x86/sha - Intel SHA Extensions optimized SHA256 transform function This patch includes the Intel SHA Extensions optimized implementation of SHA-256 update function. This function has been tested on Broxton platform and measured a speed up of 3.6x over the SSSE3 implementiation for 4K blocks. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256_ni_asm.S | 353 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 353 insertions(+) create mode 100644 arch/x86/crypto/sha256_ni_asm.S diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S new file mode 100644 index 000000000000..748cdf21a938 --- /dev/null +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -0,0 +1,353 @@ +/* + * Intel SHA Extensions optimized implementation of a SHA-256 update function + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Sean Gulley + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#define DIGEST_PTR %rdi /* 1st arg */ +#define DATA_PTR %rsi /* 2nd arg */ +#define NUM_BLKS %rdx /* 3rd arg */ + +#define SHA256CONSTANTS %rax + +#define MSG %xmm0 +#define STATE0 %xmm1 +#define STATE1 %xmm2 +#define MSGTMP0 %xmm3 +#define MSGTMP1 %xmm4 +#define MSGTMP2 %xmm5 +#define MSGTMP3 %xmm6 +#define MSGTMP4 %xmm7 + +#define SHUF_MASK %xmm8 + +#define ABEF_SAVE %xmm9 +#define CDGH_SAVE %xmm10 + +/* + * Intel SHA Extensions optimized implementation of a SHA-256 update function + * + * The function takes a pointer to the current hash values, a pointer to the + * input data, and a number of 64 byte blocks to process. Once all blocks have + * been processed, the digest pointer is updated with the resulting hash value. + * The function only processes complete blocks, there is no functionality to + * store partial blocks. All message padding and hash value initialization must + * be done outside the update function. + * + * The indented lines in the loop are instructions related to rounds processing. + * The non-indented lines are instructions related to the message schedule. + * + * void sha256_ni_transform(uint32_t *digest, const void *data, + uint32_t numBlocks); + * digest : pointer to digest + * data: pointer to input data + * numBlocks: Number of blocks to process + */ + +.text +.align 32 +ENTRY(sha256_ni_transform) + + shl $6, NUM_BLKS /* convert to bytes */ + jz .Ldone_hash + add DATA_PTR, NUM_BLKS /* pointer to end of data */ + + /* + * load initial hash values + * Need to reorder these appropriately + * DCBA, HGFE -> ABEF, CDGH + */ + movdqu 0*16(DIGEST_PTR), STATE0 + movdqu 1*16(DIGEST_PTR), STATE1 + + pshufd $0xB1, STATE0, STATE0 /* CDAB */ + pshufd $0x1B, STATE1, STATE1 /* EFGH */ + movdqa STATE0, MSGTMP4 + palignr $8, STATE1, STATE0 /* ABEF */ + pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ + + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + lea K256(%rip), SHA256CONSTANTS + +.Lloop0: + /* Save hash values for addition after rounds */ + movdqa STATE0, ABEF_SAVE + movdqa STATE1, CDGH_SAVE + + /* Rounds 0-3 */ + movdqu 0*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP0 + paddd 0*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 4-7 */ + movdqu 1*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP1 + paddd 1*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 8-11 */ + movdqu 2*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP2 + paddd 2*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 12-15 */ + movdqu 3*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP3 + paddd 3*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 16-19 */ + movdqa MSGTMP0, MSG + paddd 4*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 20-23 */ + movdqa MSGTMP1, MSG + paddd 5*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 24-27 */ + movdqa MSGTMP2, MSG + paddd 6*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 28-31 */ + movdqa MSGTMP3, MSG + paddd 7*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 32-35 */ + movdqa MSGTMP0, MSG + paddd 8*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 36-39 */ + movdqa MSGTMP1, MSG + paddd 9*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 40-43 */ + movdqa MSGTMP2, MSG + paddd 10*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 44-47 */ + movdqa MSGTMP3, MSG + paddd 11*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 48-51 */ + movdqa MSGTMP0, MSG + paddd 12*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 52-55 */ + movdqa MSGTMP1, MSG + paddd 13*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 56-59 */ + movdqa MSGTMP2, MSG + paddd 14*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 60-63 */ + movdqa MSGTMP3, MSG + paddd 15*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Add current hash values with previously saved */ + paddd ABEF_SAVE, STATE0 + paddd CDGH_SAVE, STATE1 + + /* Increment data pointer and loop if more to process */ + add $64, DATA_PTR + cmp NUM_BLKS, DATA_PTR + jne .Lloop0 + + /* Write hash values back in the correct order */ + pshufd $0x1B, STATE0, STATE0 /* FEBA */ + pshufd $0xB1, STATE1, STATE1 /* DCHG */ + movdqa STATE0, MSGTMP4 + pblendw $0xF0, STATE1, STATE0 /* DCBA */ + palignr $8, MSGTMP4, STATE1 /* HGFE */ + + movdqu STATE0, 0*16(DIGEST_PTR) + movdqu STATE1, 1*16(DIGEST_PTR) + +.Ldone_hash: + + ret +ENDPROC(sha256_ni_transform) + +.data +.align 64 +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x0c0d0e0f08090a0b0405060700010203 -- cgit v1.2.3 From 95fca7df0b4964fbe3fe159e3d6e681e6b5b7a53 Mon Sep 17 00:00:00 2001 From: tim Date: Thu, 10 Sep 2015 15:27:20 -0700 Subject: crypto: x86/sha - glue code for Intel SHA extensions optimized SHA1 & SHA256 This patch adds the glue code to detect and utilize the Intel SHA extensions optimized SHA1 and SHA256 update transforms when available. This code has been tested on Broxton for functionality. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_ssse3_glue.c | 12 +++++++++++- arch/x86/crypto/sha256_ssse3_glue.c | 38 ++++++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 7c48e8b20848..98be8cc17ca2 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -44,6 +44,10 @@ asmlinkage void sha1_transform_avx(u32 *digest, const char *data, asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, unsigned int rounds); #endif +#ifdef CONFIG_AS_SHA1_NI +asmlinkage void sha1_ni_transform(u32 *digest, const char *data, + unsigned int rounds); +#endif static void (*sha1_transform_asm)(u32 *, const char *, unsigned int); @@ -166,12 +170,18 @@ static int __init sha1_ssse3_mod_init(void) #endif } #endif +#ifdef CONFIG_AS_SHA1_NI + if (boot_cpu_has(X86_FEATURE_SHA_NI)) { + sha1_transform_asm = sha1_ni_transform; + algo_name = "SHA-NI"; + } +#endif if (sha1_transform_asm) { pr_info("Using %s optimized SHA-1 implementation\n", algo_name); return crypto_register_shash(&alg); } - pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n"); + pr_info("Neither AVX nor AVX2 nor SSSE3/SHA-NI is available/usable.\n"); return -ENODEV; } diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index f8097fc0d1d1..9c7b22c489f6 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -50,6 +50,10 @@ asmlinkage void sha256_transform_avx(u32 *digest, const char *data, asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, u64 rounds); #endif +#ifdef CONFIG_AS_SHA256_NI +asmlinkage void sha256_ni_transform(u32 *digest, const char *data, + u64 rounds); /*unsigned int rounds);*/ +#endif static void (*sha256_transform_asm)(u32 *, const char *, u64); @@ -142,36 +146,40 @@ static bool __init avx_usable(void) static int __init sha256_ssse3_mod_init(void) { + char *algo; + /* test for SSSE3 first */ - if (cpu_has_ssse3) + if (cpu_has_ssse3) { sha256_transform_asm = sha256_transform_ssse3; + algo = "SSSE3"; + } #ifdef CONFIG_AS_AVX /* allow AVX to override SSSE3, it's a little faster */ if (avx_usable()) { + sha256_transform_asm = sha256_transform_avx; + algo = "AVX"; #ifdef CONFIG_AS_AVX2 - if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) + if (boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_BMI2)) { sha256_transform_asm = sha256_transform_rorx; - else + algo = "AVX2"; + } +#endif + } #endif - sha256_transform_asm = sha256_transform_avx; +#ifdef CONFIG_AS_SHA256_NI + if (boot_cpu_has(X86_FEATURE_SHA_NI)) { + sha256_transform_asm = sha256_ni_transform; + algo = "SHA-256-NI"; } #endif if (sha256_transform_asm) { -#ifdef CONFIG_AS_AVX - if (sha256_transform_asm == sha256_transform_avx) - pr_info("Using AVX optimized SHA-256 implementation\n"); -#ifdef CONFIG_AS_AVX2 - else if (sha256_transform_asm == sha256_transform_rorx) - pr_info("Using AVX2 optimized SHA-256 implementation\n"); -#endif - else -#endif - pr_info("Using SSSE3 optimized SHA-256 implementation\n"); + pr_info("Using %s optimized SHA-256 implementation\n", algo); return crypto_register_shashes(algs, ARRAY_SIZE(algs)); } - pr_info("Neither AVX nor SSSE3 is available/usable.\n"); + pr_info("Neither AVX nor SSSE3/SHA-NI is available/usable.\n"); return -ENODEV; } -- cgit v1.2.3 From e38b6b7fcfd11fb83dcac54a33cbca3739c45a09 Mon Sep 17 00:00:00 2001 From: tim Date: Thu, 10 Sep 2015 15:27:26 -0700 Subject: crypto: x86/sha - Add build support for Intel SHA Extensions optimized SHA1 and SHA256 This patch provides the configuration and build support to include and build the optimized SHA1 and SHA256 update transforms for the kernel's crypto library. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/Makefile | 6 ++++-- arch/x86/crypto/Makefile | 8 ++++++++ crypto/Kconfig | 10 ++++++---- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 747860c696e1..a8009c77918a 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -165,9 +165,11 @@ asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1) avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) +sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1) +sha256_ni_instr :=$(call as-in