From 7fcb5e66e8dbda6e9164d59aa4c01a612df4e236 Mon Sep 17 00:00:00 2001 From: Kapil Gupta Date: Thu, 12 Mar 2026 12:00:05 +0530 Subject: [PATCH 1/2] ci(esp_wifi): Update supplicant's crypto Unit test cases --- .../test_apps/main/CMakeLists.txt | 2 +- .../test_apps/main/test_crypto.c | 414 +++++++++++++++++- .../wpa_supplicant/test_apps/main/test_sae.c | 36 +- .../test_apps/main/test_wpa_supplicant_main.c | 2 +- 4 files changed, 449 insertions(+), 5 deletions(-) diff --git a/components/wpa_supplicant/test_apps/main/CMakeLists.txt b/components/wpa_supplicant/test_apps/main/CMakeLists.txt index 5f2fc90cc2..8d910c610e 100644 --- a/components/wpa_supplicant/test_apps/main/CMakeLists.txt +++ b/components/wpa_supplicant/test_apps/main/CMakeLists.txt @@ -10,7 +10,7 @@ idf_component_register(SRCS "test_wpa_supplicant_main.c" "test_wifi_external_bss.c" PRIV_INCLUDE_DIRS "." - PRIV_REQUIRES wpa_supplicant mbedtls esp_wifi esp_event unity esp_psram + PRIV_REQUIRES wpa_supplicant mbedtls esp_wifi esp_event unity esp_psram esp_timer WHOLE_ARCHIVE) idf_component_get_property(esp_supplicant_dir wpa_supplicant COMPONENT_DIR) diff --git a/components/wpa_supplicant/test_apps/main/test_crypto.c b/components/wpa_supplicant/test_apps/main/test_crypto.c index cd925acad0..1e8bfe628a 100644 --- a/components/wpa_supplicant/test_apps/main/test_crypto.c +++ b/components/wpa_supplicant/test_apps/main/test_crypto.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2015-2025 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2015-2026 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -8,6 +8,7 @@ #include #include #include +#include #include "unity.h" #include #include "utils/common.h" @@ -19,12 +20,14 @@ #include "mbedtls/pk.h" #include "test_utils.h" #include "test_wpa_supplicant_common.h" +#include "esp_log.h" +#include #include "psa/crypto.h" #include "mbedtls/psa_util.h" #include "esp_heap_caps.h" #include "crypto/sha384.h" -#include "esp_log.h" +#include "esp_timer.h" typedef struct crypto_bignum crypto_bignum; @@ -33,6 +36,93 @@ typedef struct { psa_key_id_t key_id; } crypto_ec_key_wrapper_test_t; +static const uint8_t test_secp256r1_prime[32] = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +static int test_mbedtls_rng(void *ctx, unsigned char *buf, size_t len) +{ + (void) ctx; + return os_get_random(buf, len) == 0 ? 0 : MBEDTLS_ERR_ECP_RANDOM_FAILED; +} + +static void test_load_valid_p256_scalar(const mbedtls_ecp_group *grp, + const uint8_t *seed, size_t seed_len, + mbedtls_mpi *scalar) +{ + mbedtls_mpi range; + + mbedtls_mpi_init(&range); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_sub_int(&range, &grp->N, 1)); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_read_binary(scalar, seed, seed_len)); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_mod_mpi(scalar, scalar, &range)); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_add_int(scalar, scalar, 1)); + mbedtls_mpi_free(&range); +} + +static void test_make_p256_affine_point(mbedtls_ecp_group *grp, + unsigned int multiplier, + mbedtls_ecp_point *point) +{ + mbedtls_mpi k; + + mbedtls_mpi_init(&k); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_lset(&k, multiplier)); + TEST_ASSERT_EQUAL(0, mbedtls_ecp_mul(grp, point, &k, &grp->G, + test_mbedtls_rng, NULL)); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_cmp_int(&point->MBEDTLS_PRIVATE(Z), 1)); + mbedtls_mpi_free(&k); +} + +static int test_legendre_reference(const mbedtls_mpi *a, const mbedtls_mpi *p) +{ + mbedtls_mpi a_mod, exp, res, one, pm1; + int legendre = -2; + + mbedtls_mpi_init(&a_mod); + mbedtls_mpi_init(&exp); + mbedtls_mpi_init(&res); + mbedtls_mpi_init(&one); + mbedtls_mpi_init(&pm1); + + TEST_ASSERT_EQUAL(0, mbedtls_mpi_mod_mpi(&a_mod, a, p)); + if (mbedtls_mpi_cmp_int(&a_mod, 0) == 0) { + legendre = 0; + goto cleanup; + } + + TEST_ASSERT_EQUAL(0, mbedtls_mpi_copy(&exp, p)); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_sub_int(&exp, &exp, 1)); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_shift_r(&exp, 1)); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_exp_mod(&res, &a_mod, &exp, p, NULL)); + + TEST_ASSERT_EQUAL(0, mbedtls_mpi_lset(&one, 1)); + if (mbedtls_mpi_cmp_mpi(&res, &one) == 0) { + legendre = 1; + goto cleanup; + } + + TEST_ASSERT_EQUAL(0, mbedtls_mpi_copy(&pm1, p)); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_sub_int(&pm1, &pm1, 1)); + if (mbedtls_mpi_cmp_mpi(&res, &pm1) == 0) { + legendre = -1; + goto cleanup; + } + + TEST_FAIL_MESSAGE("Unexpected Legendre reference result"); + +cleanup: + mbedtls_mpi_free(&a_mod); + mbedtls_mpi_free(&exp); + mbedtls_mpi_free(&res); + mbedtls_mpi_free(&one); + mbedtls_mpi_free(&pm1); + return legendre; +} + TEST_CASE("Test crypto lib bignum apis", "[wpa_crypto]") { set_leak_threshold(300); @@ -216,6 +306,38 @@ TEST_CASE("Test crypto lib bignum apis", "[wpa_crypto]") } + { /** BN mul mod on secp256r1 prime */ + uint8_t val[32]; + uint8_t one[32] = {0}; + crypto_bignum *bn1, *bn2, *bn3, *mulmod; + + one[0] = 1; + os_memcpy(val, test_secp256r1_prime, sizeof(val)); + val[31]--; + + mulmod = crypto_bignum_init(); + TEST_ASSERT_NOT_NULL(mulmod); + + bn1 = crypto_bignum_init_set(val, sizeof(val)); + TEST_ASSERT_NOT_NULL(bn1); + + bn2 = crypto_bignum_init_set(val, sizeof(val)); + TEST_ASSERT_NOT_NULL(bn2); + + bn3 = crypto_bignum_init_set(test_secp256r1_prime, + sizeof(test_secp256r1_prime)); + TEST_ASSERT_NOT_NULL(bn3); + + TEST_ASSERT(crypto_bignum_mulmod(bn1, bn2, bn3, mulmod) == 0); + TEST_ASSERT(crypto_bignum_to_bin(mulmod, val, sizeof(val), 0) == 1); + TEST_ASSERT_EQUAL_UINT8_ARRAY(one, val, 1); + + crypto_bignum_deinit(bn1, 1); + crypto_bignum_deinit(bn2, 1); + crypto_bignum_deinit(bn3, 1); + crypto_bignum_deinit(mulmod, 1); + } + { /** BN exp mod*/ uint8_t buf1[32], buf2[32], buf3[32], buf4[32], buf5[32]; @@ -286,6 +408,134 @@ TEST_CASE("Test crypto lib bignum apis", "[wpa_crypto]") crypto_bignum_deinit(bn2, 1); } + + { /** BN Legendre symbol test on secp256r1 prime */ + uint8_t val[32] = {0}; + crypto_bignum *bn_val, *bn_p; + + bn_p = crypto_bignum_init_set(test_secp256r1_prime, + sizeof(test_secp256r1_prime)); + TEST_ASSERT_NOT_NULL(bn_p); + + val[31] = 1; + bn_val = crypto_bignum_init_set(val, sizeof(val)); + TEST_ASSERT_NOT_NULL(bn_val); + TEST_ASSERT(crypto_bignum_legendre(bn_val, bn_p) == 1); + crypto_bignum_deinit(bn_val, 1); + + os_memset(val, 0, sizeof(val)); + bn_val = crypto_bignum_init_set(val, sizeof(val)); + TEST_ASSERT_NOT_NULL(bn_val); + TEST_ASSERT(crypto_bignum_legendre(bn_val, bn_p) == 0); + crypto_bignum_deinit(bn_val, 1); + + os_memcpy(val, test_secp256r1_prime, sizeof(val)); + val[31]--; + bn_val = crypto_bignum_init_set(val, sizeof(val)); + TEST_ASSERT_NOT_NULL(bn_val); + TEST_ASSERT(crypto_bignum_legendre(bn_val, bn_p) == -1); + crypto_bignum_deinit(bn_val, 1); + + crypto_bignum_deinit(bn_p, 1); + } +} + +TEST_CASE("Test secp256r1 fast bignum paths against mbedtls reference", "[wpa_crypto]") +{ + static const uint8_t a_cases[][32] = { + { + 0xa5, 0xa5, 0xa5, 0xa5, 0x5a, 0x5a, 0x5a, 0x5a, + 0x11, 0x22, 0x33, 0x44, 0x88, 0x77, 0x66, 0x55, + 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef + }, + { + 0xff, 0xee, 0xdd, 0xcc, 0xbb, 0xaa, 0x99, 0x88, + 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11, 0x00, + 0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, + 0xca, 0xfe, 0xba, 0xbe, 0x13, 0x57, 0x9b, 0xdf + }, + { + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + 0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78, + 0x87, 0x96, 0xa5, 0xb4, 0xc3, 0xd2, 0xe1, 0xf0 + } + }; + static const uint8_t b_cases[][32] = { + { + 0x13, 0x57, 0x9b, 0xdf, 0xca, 0xfe, 0xba, 0xbe, + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + 0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, + 0x55, 0xaa, 0x55, 0xaa, 0x12, 0x34, 0x56, 0x78 + }, + { + 0x24, 0x68, 0xac, 0xe0, 0x0f, 0x1e, 0x2d, 0x3c, + 0x4b, 0x5a, 0x69, 0x78, 0x87, 0x96, 0xa5, 0xb4, + 0xc3, 0xd2, 0xe1, 0xf0, 0xff, 0x00, 0xee, 0x11, + 0xdd, 0x22, 0xcc, 0x33, 0xbb, 0x44, 0xaa, 0x55 + }, + { + 0x7f, 0x6e, 0x5d, 0x4c, 0x3b, 0x2a, 0x19, 0x08, + 0xf1, 0xe2, 0xd3, 0xc4, 0xb5, 0xa6, 0x97, 0x88, + 0x79, 0x6a, 0x5b, 0x4c, 0x3d, 0x2e, 0x1f, 0x10, + 0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf1 + } + }; + mbedtls_mpi ref_a, ref_b, ref_p, ref_mul; + struct crypto_bignum *bn_a, *bn_b, *bn_p, *bn_mul; + int i; + uint8_t got[32], expected[32]; + + set_leak_threshold(620); + + mbedtls_mpi_init(&ref_a); + mbedtls_mpi_init(&ref_b); + mbedtls_mpi_init(&ref_p); + mbedtls_mpi_init(&ref_mul); + + TEST_ASSERT_EQUAL(0, mbedtls_mpi_read_binary(&ref_p, + test_secp256r1_prime, + sizeof(test_secp256r1_prime))); + bn_p = crypto_bignum_init_set(test_secp256r1_prime, + sizeof(test_secp256r1_prime)); + TEST_ASSERT_NOT_NULL(bn_p); + bn_mul = crypto_bignum_init(); + TEST_ASSERT_NOT_NULL(bn_mul); + + for (i = 0; i < ARRAY_SIZE(a_cases); i++) { + TEST_ASSERT_EQUAL(0, mbedtls_mpi_read_binary(&ref_a, + a_cases[i], + sizeof(a_cases[i]))); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_read_binary(&ref_b, + b_cases[i], + sizeof(b_cases[i]))); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_mul_mpi(&ref_mul, &ref_a, &ref_b)); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_mod_mpi(&ref_mul, &ref_mul, &ref_p)); + TEST_ASSERT_EQUAL(0, mbedtls_mpi_write_binary(&ref_mul, expected, + sizeof(expected))); + + bn_a = crypto_bignum_init_set(a_cases[i], sizeof(a_cases[i])); + TEST_ASSERT_NOT_NULL(bn_a); + bn_b = crypto_bignum_init_set(b_cases[i], sizeof(b_cases[i])); + TEST_ASSERT_NOT_NULL(bn_b); + + TEST_ASSERT_EQUAL(0, crypto_bignum_mulmod(bn_a, bn_b, bn_p, bn_mul)); + TEST_ASSERT_EQUAL(32, crypto_bignum_to_bin(bn_mul, got, sizeof(got), 0)); + TEST_ASSERT_EQUAL_UINT8_ARRAY(expected, got, sizeof(got)); + TEST_ASSERT_EQUAL(test_legendre_reference(&ref_a, &ref_p), + crypto_bignum_legendre(bn_a, bn_p)); + + crypto_bignum_deinit(bn_a, 1); + crypto_bignum_deinit(bn_b, 1); + } + + crypto_bignum_deinit(bn_p, 1); + crypto_bignum_deinit(bn_mul, 1); + mbedtls_mpi_free(&ref_a); + mbedtls_mpi_free(&ref_b); + mbedtls_mpi_free(&ref_p); + mbedtls_mpi_free(&ref_mul); } /* @@ -550,6 +800,80 @@ TEST_CASE("Test crypto lib ECC apis", "[wpa_crypto]") } +TEST_CASE("Test secp256r1 point multiply against mbedtls reference", "[wpa_crypto]") +{ + static const uint8_t scalar_seeds[][32] = { + { + 0xff, 0xff, 0xff, 0xff, 0xde, 0xad, 0xbe, 0xef, + 0xca, 0xfe, 0xba, 0xbe, 0x88, 0x99, 0xaa, 0xbb, + 0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe, + 0x13, 0x57, 0x9b, 0xdf, 0x24, 0x68, 0xac, 0xe0 + }, + { + 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0x5a, + 0x5a, 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, 0x5a, 0xa5, + 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x78, + 0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0 + }, + { + 0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78, + 0x87, 0x96, 0xa5, 0xb4, 0xc3, 0xd2, 0xe1, 0xf0, + 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87, + 0x78, 0x69, 0x5a, 0x4b, 0x3c, 0x2d, 0x1e, 0x0f + } + }; + const unsigned int point_multipliers[] = { 7, 13 }; + struct crypto_ec *e; + struct crypto_ec_point *p = NULL; + struct crypto_ec_point *res = NULL; + mbedtls_ecp_point ref; + int i, j; + + set_leak_threshold(620); + + e = crypto_ec_init(19); + TEST_ASSERT_NOT_NULL(e); + + p = crypto_ec_point_init(e); + TEST_ASSERT_NOT_NULL(p); + res = crypto_ec_point_init(e); + TEST_ASSERT_NOT_NULL(res); + mbedtls_ecp_point_init(&ref); + + for (i = 0; i < ARRAY_SIZE(point_multipliers); i++) { + test_make_p256_affine_point((mbedtls_ecp_group *) e, + point_multipliers[i], + (mbedtls_ecp_point *) p); + + for (j = 0; j < ARRAY_SIZE(scalar_seeds); j++) { + mbedtls_mpi scalar; + + mbedtls_mpi_init(&scalar); + test_load_valid_p256_scalar((const mbedtls_ecp_group *) e, + scalar_seeds[j], + sizeof(scalar_seeds[j]), + &scalar); + + TEST_ASSERT_EQUAL(0, crypto_ec_point_mul(e, p, + (struct crypto_bignum *) &scalar, + res)); + TEST_ASSERT_EQUAL(0, mbedtls_ecp_mul((mbedtls_ecp_group *) e, + &ref, &scalar, + (const mbedtls_ecp_point *) p, + test_mbedtls_rng, NULL)); + TEST_ASSERT_EQUAL(0, crypto_ec_point_cmp(e, res, + (const struct crypto_ec_point *) &ref)); + + mbedtls_mpi_free(&scalar); + } + } + + mbedtls_ecp_point_free(&ref); + crypto_ec_point_deinit(p, 1); + crypto_ec_point_deinit(res, 1); + crypto_ec_deinit(e); +} + TEST_CASE("Test crypto lib aes apis", "[wpa_crypto]") { set_leak_threshold(1); @@ -1485,3 +1809,89 @@ TEST_CASE("Test crypto_ecdh_set_peerkey with X-only coordinate (OWE case)", "[wp crypto_ecdh_deinit(peer_ecdh); } } + +TEST_CASE("crypto_ec_point_mul Performance", "[wpa_crypto][performance]") +{ + + /* Test vectors for SECP256R1 - same as mbedtls test */ + static const uint8_t test_point_x[] = { + 0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, + 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, + 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, + 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96 + }; + + static const uint8_t test_point_y[] = { + 0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, + 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16, + 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, + 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5 + }; + + static const uint8_t test_scalar[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05 /* scalar = 5 */ + }; + + struct crypto_ec *e; + struct crypto_ec_point *P, *R; + struct crypto_bignum *scalar; + int ret; + const int iterations = 10; + int64_t total_time = 0; + int64_t min_time = INT64_MAX; + int64_t max_time = 0; + uint8_t point_bin[64]; /* 32 bytes X + 32 bytes Y */ + + /* Initialize EC context for SECP256R1 (group 19) */ + e = crypto_ec_init(19); + TEST_ASSERT_NOT_NULL(e); + + /* Initialize result point */ + R = crypto_ec_point_init(e); + TEST_ASSERT_NOT_NULL(R); + + /* Initialize scalar */ + scalar = crypto_bignum_init_set(test_scalar, sizeof(test_scalar)); + TEST_ASSERT_NOT_NULL(scalar); + + /* Set point P from test vector */ + memcpy(point_bin, test_point_x, 32); + memcpy(point_bin + 32, test_point_y, 32); + P = crypto_ec_point_from_bin(e, point_bin); + TEST_ASSERT_NOT_NULL(P); + + /* Warm-up run */ + ret = crypto_ec_point_mul(e, P, scalar, R); + TEST_ASSERT_EQUAL(0, ret); + + /* Benchmark iterations */ + for (int i = 0; i < iterations; i++) { + int64_t start = esp_timer_get_time(); + ret = crypto_ec_point_mul(e, P, scalar, R); + int64_t elapsed = esp_timer_get_time() - start; + TEST_ASSERT_EQUAL(0, ret); + + total_time += elapsed; + if (elapsed < min_time) { + min_time = elapsed; + } + if (elapsed > max_time) { + max_time = elapsed; + } + } + + int64_t avg_time = total_time / iterations; + + ESP_LOGI("WPA_CRYPTO", + "crypto_ec_point_mul timing(us): loops=%d total=%" PRId64 " avg=%" PRId64 " (%.2f ms) min=%" PRId64 " max=%" PRId64, + iterations, total_time, avg_time, avg_time / 1000.0, min_time, max_time); + + /* Cleanup */ + crypto_ec_point_deinit(P, 1); + crypto_ec_point_deinit(R, 1); + crypto_bignum_deinit(scalar, 1); + crypto_ec_deinit(e); +} diff --git a/components/wpa_supplicant/test_apps/main/test_sae.c b/components/wpa_supplicant/test_apps/main/test_sae.c index bb1e36728d..770d7c8be6 100644 --- a/components/wpa_supplicant/test_apps/main/test_sae.c +++ b/components/wpa_supplicant/test_apps/main/test_sae.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2015-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2015-2026 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -20,9 +20,33 @@ #include "utils/wpabuf.h" #include "test_utils.h" #include "test_wpa_supplicant_common.h" +#include "esp_timer.h" typedef struct crypto_bignum crypto_bignum; +static int sae_commit_parse_limit_us(void) +{ +#if CONFIG_IDF_TARGET_ESP32 + return 400000; +#elif CONFIG_IDF_TARGET_ESP32S3 + return 300000; +#elif CONFIG_IDF_TARGET_ESP32S2 + return 380000; +#elif CONFIG_IDF_TARGET_ESP32C3 + return 340000; +#elif CONFIG_IDF_TARGET_ESP32C5 + return 130000; +#elif CONFIG_IDF_TARGET_ESP32C6 + return 180000; +#elif CONFIG_IDF_TARGET_ESP32C61 + return 200000; +#elif CONFIG_IDF_TARGET_ESP32C2 + return 230000; +#else + return 230000; +#endif +} + static struct wpabuf *wpabuf_alloc2(size_t len) { struct wpabuf *buf = (struct wpabuf *)os_zalloc(sizeof(struct wpabuf) + len); @@ -233,8 +257,12 @@ TEST_CASE("Test SAE functionality with ECC group", "[wpa3_sae]") u8 pwd[] = "ESP32-WPA3"; struct wpabuf *buf; int default_groups[] = { IANA_SECP256R1, 0 }; + int64_t start_us; + int64_t total_us; + int limit_us = sae_commit_parse_limit_us(); memset(&sae, 0, sizeof(sae)); + start_us = esp_timer_get_time(); TEST_ASSERT(sae_set_group(&sae, IANA_SECP256R1) == 0); @@ -253,6 +281,12 @@ TEST_CASE("Test SAE functionality with ECC group", "[wpa3_sae]") wpabuf_free2(buf); sae_clear_temp_data(&sae); sae_clear_data(&sae); + total_us = esp_timer_get_time() - start_us; + + ESP_LOGI("SAE Test", + "Commit/parse timing(us): total=%lld limit=%d", + (long long) total_us, limit_us); + TEST_ASSERT_MESSAGE(total_us <= limit_us, "SAE commit/parse timing regression"); } ESP_LOGI("SAE Test", "=========== Complete ============"); diff --git a/components/wpa_supplicant/test_apps/main/test_wpa_supplicant_main.c b/components/wpa_supplicant/test_apps/main/test_wpa_supplicant_main.c index 55bdc1b70b..001f012e80 100644 --- a/components/wpa_supplicant/test_apps/main/test_wpa_supplicant_main.c +++ b/components/wpa_supplicant/test_apps/main/test_wpa_supplicant_main.c @@ -32,7 +32,7 @@ static void check_leak(size_t before_free, size_t after_free, const char *type) { ssize_t delta = after_free - before_free; printf("MALLOC_CAP_%s: Before %u bytes free, After %u bytes free (delta %d, threshold %d)\n", type, before_free, after_free, delta, leak_threshold); - TEST_ASSERT_MESSAGE(delta > leak_threshold, "memory leak"); + TEST_ASSERT_MESSAGE(delta >= leak_threshold, "memory leak"); } #if SOC_SHA_SUPPORT_SHA512 From b5d26380b95a93ec0bd6d97c08f54d0aaddea4f9 Mon Sep 17 00:00:00 2001 From: Kapil Gupta Date: Thu, 12 Mar 2026 11:59:07 +0530 Subject: [PATCH 2/2] fix(esp_wifi): Optimize crypto operations for SAE - Montgomery multiplication fast path for P-256 mulmod - Jacobi symbol for legendre (replacing exp_mod) - Software Jacobian point multiplication for MPI-only chips - ECC hardware acceleration for supported chips --- .../src/crypto/crypto_mbedtls-bignum.c | 418 ++++++++++++++- .../src/crypto/crypto_mbedtls-ec.c | 488 +++++++++++++++++- .../esp_supplicant/src/crypto/p256_common.h | 236 +++++++++ 3 files changed, 1099 insertions(+), 43 deletions(-) create mode 100644 components/wpa_supplicant/esp_supplicant/src/crypto/p256_common.h diff --git a/components/wpa_supplicant/esp_supplicant/src/crypto/crypto_mbedtls-bignum.c b/components/wpa_supplicant/esp_supplicant/src/crypto/crypto_mbedtls-bignum.c index 040013d65e..a8b44dbef3 100644 --- a/components/wpa_supplicant/esp_supplicant/src/crypto/crypto_mbedtls-bignum.c +++ b/components/wpa_supplicant/esp_supplicant/src/crypto/crypto_mbedtls-bignum.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2015-2025 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2015-2026 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -8,6 +8,7 @@ #include "esp_system.h" #include "mbedtls/bignum.h" #include "mbedtls/esp_mbedtls_random.h" +#include "soc/soc_caps.h" #endif #include "utils/includes.h" @@ -17,6 +18,340 @@ #include "sha256.h" #include "mbedtls/pk.h" #include "mbedtls/psa_util.h" +#include "p256_common.h" + +static int mpi_is_secp256r1_prime(const mbedtls_mpi *p) +{ + u8 p_be[P256_LEN_BYTES]; + + if (!p || mbedtls_mpi_size(p) != P256_LEN_BYTES) { + return 0; + } + + if (mbedtls_mpi_write_binary(p, p_be, sizeof(p_be)) != 0) { + return 0; + } + + return os_memcmp(p_be, p256_p_be, sizeof(p_be)) == 0; +} + +static int p256_words_is_one(const u32 *a) +{ + size_t i; + + if (a[0] != 1) { + return 0; + } + + for (i = 1; i < P256_WORDS; i++) { + if (a[i] != 0) { + return 0; + } + } + + return 1; +} + +static int p256_words_cmp(const u32 *a, const u32 *b) +{ + int i; + + for (i = P256_WORDS - 1; i >= 0; i--) { + if (a[i] < b[i]) { + return -1; + } + if (a[i] > b[i]) { + return 1; + } + } + + return 0; +} + +static size_t p256_words_ctz(const u32 *a) +{ + size_t i; + + for (i = 0; i < P256_WORDS; i++) { + if (a[i] != 0) { + return i * 32 + __builtin_ctz(a[i]); + } + } + + return P256_WORDS * 32; +} + +static void p256_words_rshift(u32 *a, size_t count) +{ + size_t word_shift = count / 32; + size_t bit_shift = count % 32; + size_t i; + + if (word_shift >= P256_WORDS) { + os_memset(a, 0, sizeof(u32) * P256_WORDS); + return; + } + + if (word_shift > 0) { + for (i = 0; i + word_shift < P256_WORDS; i++) { + a[i] = a[i + word_shift]; + } + for (; i < P256_WORDS; i++) { + a[i] = 0; + } + } + + if (bit_shift > 0) { + for (i = 0; i < P256_WORDS - 1; i++) { + a[i] = (a[i] >> bit_shift) | + (a[i + 1] << (32 - bit_shift)); + } + a[P256_WORDS - 1] >>= bit_shift; + } +} + +static void p256_words_lshift(const u32 *in, size_t count, u32 *out) +{ + size_t word_shift = count / 32; + size_t bit_shift = count % 32; + size_t i; + + os_memset(out, 0, sizeof(u32) * P256_WORDS); + + if (word_shift >= P256_WORDS) { + return; + } + + for (i = 0; i < P256_WORDS; i++) { + u64 val; + size_t dst; + + if (in[i] == 0) { + continue; + } + + dst = i + word_shift; + if (dst >= P256_WORDS) { + break; + } + + val = (u64) in[i] << bit_shift; + out[dst] |= (u32) val; + if (bit_shift > 0 && dst + 1 < P256_WORDS) { + out[dst + 1] |= (u32)(val >> 32); + } + } +} + +static void p256_words_sub(u32 *a, const u32 *b) +{ + size_t i; + u64 borrow = 0; + + for (i = 0; i < P256_WORDS; i++) { + u64 ai = a[i]; + u64 bi = b[i]; + u64 res = ai - bi - borrow; + + a[i] = (u32) res; + borrow = (ai < bi + borrow) ? 1 : 0; + } +} + +static void p256_words_swap(u32 *a, u32 *b) +{ + u32 tmp[P256_WORDS]; + + os_memcpy(tmp, a, sizeof(tmp)); + os_memcpy(a, b, sizeof(tmp)); + os_memcpy(b, tmp, sizeof(tmp)); +} + +static void p256_words_mod(u32 *a, const u32 *n) +{ + u32 tmp[P256_WORDS]; + + while (p256_words_cmp(a, n) >= 0) { + size_t a_bits = p256_words_bitlen(a); + size_t n_bits = p256_words_bitlen(n); + size_t shift = a_bits - n_bits; + + p256_words_lshift(n, shift, tmp); + if (p256_words_cmp(a, tmp) < 0) { + shift--; + p256_words_lshift(n, shift, tmp); + } + p256_words_sub(a, tmp); + } +} + +static int crypto_bignum_mulmod_secp256r1(const mbedtls_mpi *a, + const mbedtls_mpi *b, + const mbedtls_mpi *mod, + mbedtls_mpi *out) +{ + u32 a_words[P256_WORDS]; + u32 b_words[P256_WORDS]; + u32 b_mont[P256_WORDS]; + u32 result[P256_WORDS]; + + if (!mpi_is_secp256r1_prime(mod) || + p256_words_from_mpi_reduced(a, a_words) != 0 || + p256_words_from_mpi_reduced(b, b_words) != 0) { + return -2; + } + + p256_mont_mul(b_mont, b_words, p256_r2_le); + p256_mont_mul(result, a_words, b_mont); + + return p256_words_to_mpi(result, out) == 0 ? 0 : -1; +} + +static int crypto_bignum_legendre_secp256r1(const mbedtls_mpi *a, + const mbedtls_mpi *p) +{ + u32 A[P256_WORDS]; + u32 N[P256_WORDS]; + unsigned int n_mod8; + unsigned int a_mod4; + unsigned int n_mod4; + size_t two_power; + int sign = 1; + + if (!mpi_is_secp256r1_prime(p) || + p256_words_from_mpi_reduced(a, A) != 0) { + return -2; + } + + os_memcpy(N, p256_p_le, sizeof(N)); + + if (p256_words_is_zero(A)) { + return 0; + } + + while (!p256_words_is_zero(A)) { + if (p256_words_is_one(A)) { + return sign; + } + + n_mod8 = N[0] & 0x7; + two_power = p256_words_ctz(A); + if (two_power > 0) { + p256_words_rshift(A, two_power); + if ((n_mod8 == 3 || n_mod8 == 5) && (two_power & 1U)) { + sign = -sign; + } + } + + p256_words_swap(A, N); + + a_mod4 = A[0] & 0x3; + n_mod4 = N[0] & 0x3; + if (a_mod4 == 3 && n_mod4 == 3) { + sign = -sign; + } + + if (p256_words_cmp(A, N) >= 0) { + p256_words_mod(A, N); + } + + if (p256_words_is_one(N)) { + return sign; + } + } + + return p256_words_is_one(N) ? sign : 0; +} + +/* + * Compute Legendre symbol through Jacobi algorithm for odd prime modulus. + * This avoids an expensive modular exponentiation in the SAE hot loop. + */ +static int crypto_bignum_legendre_jacobi(const mbedtls_mpi *a, + const mbedtls_mpi *p) +{ + mbedtls_mpi A, N; + unsigned int n_mod8, a_mod4, n_mod4; + size_t two_power; + int ret = 0; + int sign = 1; + int res = -2; + + if (mbedtls_mpi_cmp_int(p, 3) < 0 || mbedtls_mpi_get_bit(p, 0) == 0) { + return -2; + } + + mbedtls_mpi_init(&A); + mbedtls_mpi_init(&N); + + MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&N, p)); + if (mbedtls_mpi_cmp_int(a, 0) >= 0 && mbedtls_mpi_cmp_mpi(a, &N) < 0) { + MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&A, a)); + } else { + MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&A, a, &N)); + } + + if (mbedtls_mpi_cmp_int(&A, 0) == 0) { + res = 0; + goto cleanup; + } + + while (mbedtls_mpi_cmp_int(&A, 0) != 0) { + if (mbedtls_mpi_cmp_int(&A, 1) == 0) { + res = sign; + goto cleanup; + } + + n_mod8 = (mbedtls_mpi_get_bit(&N, 0) << 0) | + (mbedtls_mpi_get_bit(&N, 1) << 1) | + (mbedtls_mpi_get_bit(&N, 2) << 2); + two_power = mbedtls_mpi_lsb(&A); + if (two_power > 0) { + MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&A, two_power)); + if ((n_mod8 == 3 || n_mod8 == 5) && (two_power & 1U)) { + sign = -sign; + } + } + + mbedtls_mpi_swap(&A, &N); + + a_mod4 = (mbedtls_mpi_get_bit(&A, 0) << 0) | + (mbedtls_mpi_get_bit(&A, 1) << 1); + n_mod4 = (mbedtls_mpi_get_bit(&N, 0) << 0) | + (mbedtls_mpi_get_bit(&N, 1) << 1); + if (a_mod4 == 3 && n_mod4 == 3) { + sign = -sign; + } + + if (mbedtls_mpi_cmp_mpi(&A, &N) >= 0) { + size_t a_bits = mbedtls_mpi_bitlen(&A); + size_t n_bits = mbedtls_mpi_bitlen(&N); + if (a_bits <= n_bits + 4) { + while (mbedtls_mpi_cmp_mpi(&A, &N) >= 0) { + MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&A, &A, &N)); + } + } else { + MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&A, &A, &N)); + } + } + + if (mbedtls_mpi_cmp_int(&N, 1) == 0) { + res = sign; + goto cleanup; + } + } + + res = (mbedtls_mpi_cmp_int(&N, 1) == 0) ? sign : 0; + +cleanup: + mbedtls_mpi_free(&A); + mbedtls_mpi_free(&N); + + if (ret != 0) { + return -2; + } + return res; +} struct crypto_bignum *crypto_bignum_init(void) { @@ -122,7 +457,43 @@ int crypto_bignum_exptmod(const struct crypto_bignum *a, const struct crypto_bignum *c, struct crypto_bignum *d) { - return mbedtls_mpi_exp_mod((mbedtls_mpi *) d, (const mbedtls_mpi *) a, (const mbedtls_mpi *) b, (const mbedtls_mpi *) c, NULL) ? -1 : 0; + int ret; + + /* Fast path for small public exponents frequently used in SAE math. */ + if (mbedtls_mpi_cmp_int((const mbedtls_mpi *) b, 0) >= 0 && + mbedtls_mpi_cmp_int((const mbedtls_mpi *) b, 3) <= 0) { + if (mbedtls_mpi_cmp_int((const mbedtls_mpi *) b, 0) == 0) { + ret = mbedtls_mpi_lset((mbedtls_mpi *) d, 1) || + mbedtls_mpi_mod_mpi((mbedtls_mpi *) d, (mbedtls_mpi *) d, + (const mbedtls_mpi *) c); + return ret ? -1 : 0; + } + + if (mbedtls_mpi_cmp_int((const mbedtls_mpi *) b, 1) == 0) { + ret = mbedtls_mpi_copy((mbedtls_mpi *) d, (const mbedtls_mpi *) a) || + mbedtls_mpi_mod_mpi((mbedtls_mpi *) d, (mbedtls_mpi *) d, + (const mbedtls_mpi *) c); + return ret ? -1 : 0; + } + + if (mbedtls_mpi_cmp_int((const mbedtls_mpi *) b, 2) == 0) { + return crypto_bignum_mulmod(a, a, c, d); + } else { + mbedtls_mpi tmp; + mbedtls_mpi_init(&tmp); + ret = crypto_bignum_mulmod(a, a, c, (struct crypto_bignum *) &tmp); + if (ret == 0) { + ret = crypto_bignum_mulmod((struct crypto_bignum *) &tmp, + a, c, d); + } + mbedtls_mpi_free(&tmp); + return ret ? -1 : 0; + } + } + + return mbedtls_mpi_exp_mod((mbedtls_mpi *) d, (const mbedtls_mpi *) a, + (const mbedtls_mpi *) b, + (const mbedtls_mpi *) c, NULL) ? -1 : 0; } @@ -155,6 +526,13 @@ int crypto_bignum_mulmod(const struct crypto_bignum *a, const struct crypto_bignum *c, struct crypto_bignum *d) { + int fast_ret = crypto_bignum_mulmod_secp256r1((const mbedtls_mpi *) a, + (const mbedtls_mpi *) b, + (const mbedtls_mpi *) c, + (mbedtls_mpi *) d); + if (fast_ret != -2) { + return fast_ret; + } return mbedtls_mpi_mul_mpi((mbedtls_mpi *)d, (const mbedtls_mpi *)a, (const mbedtls_mpi *)b) || mbedtls_mpi_mod_mpi((mbedtls_mpi *)d, (mbedtls_mpi *)d, (const mbedtls_mpi *)c) ? -1 : 0; } @@ -163,17 +541,7 @@ int crypto_bignum_sqrmod(const struct crypto_bignum *a, const struct crypto_bignum *b, struct crypto_bignum *c) { - int res; - struct crypto_bignum *tmp = crypto_bignum_init(); - if (!tmp) { - return -1; - } - - res = mbedtls_mpi_copy((mbedtls_mpi *) tmp, (const mbedtls_mpi *) a); - res = crypto_bignum_mulmod(a, tmp, b, c); - - crypto_bignum_deinit(tmp, 0); - return res ? -1 : 0; + return crypto_bignum_mulmod(a, a, b, c); } int crypto_bignum_rshift(const struct crypto_bignum *a, int n, @@ -222,8 +590,8 @@ int crypto_bignum_rand(struct crypto_bignum *r, const struct crypto_bignum *m) mbedtls_psa_get_random, MBEDTLS_PSA_RANDOM_STATE) != 0) ? -1 : 0); } -int crypto_bignum_legendre(const struct crypto_bignum *a, - const struct crypto_bignum *p) +static int mbedtls_bignum_legendre(const struct crypto_bignum *a, + const struct crypto_bignum *p) { mbedtls_mpi exp, tmp; int res = -2, ret; @@ -255,6 +623,26 @@ cleanup: return res; } +int crypto_bignum_legendre(const struct crypto_bignum *a, + const struct crypto_bignum *p) +{ + int jacobi_res; + + jacobi_res = crypto_bignum_legendre_secp256r1((const mbedtls_mpi *) a, + (const mbedtls_mpi *) p); + if (jacobi_res != -2) { + return jacobi_res; + } + + jacobi_res = crypto_bignum_legendre_jacobi((const mbedtls_mpi *) a, + (const mbedtls_mpi *) p); + if (jacobi_res != -2) { + return jacobi_res; + } + + return mbedtls_bignum_legendre(a, p); +} + int crypto_bignum_addmod(const struct crypto_bignum *a, const struct crypto_bignum *b, const struct crypto_bignum *c, diff --git a/components/wpa_supplicant/esp_supplicant/src/crypto/crypto_mbedtls-ec.c b/components/wpa_supplicant/esp_supplicant/src/crypto/crypto_mbedtls-ec.c index 271654c8a3..9e4706f007 100644 --- a/components/wpa_supplicant/esp_supplicant/src/crypto/crypto_mbedtls-ec.c +++ b/components/wpa_supplicant/esp_supplicant/src/crypto/crypto_mbedtls-ec.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2015-2025 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2015-2026 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -8,8 +8,12 @@ #ifdef ESP_PLATFORM #include "esp_system.h" +#include "esp_random.h" #include "mbedtls/bignum.h" #include "mbedtls/esp_mbedtls_random.h" +#if CONFIG_MBEDTLS_HARDWARE_ECC +#include "ecc_impl.h" +#endif #endif #include "utils/includes.h" @@ -23,7 +27,9 @@ #include "mbedtls/asn1write.h" #include "mbedtls/error.h" #include "mbedtls/oid.h" +#include "mbedtls/platform_util.h" #include +#include "p256_common.h" #include "psa/crypto.h" #include "psa/crypto_sizes.h" #include "esp_heap_caps.h" @@ -39,6 +45,70 @@ #define ESP_SUP_MAX_ECC_KEY_SIZE 256 +#if CONFIG_MBEDTLS_HARDWARE_ECC +static bool crypto_ec_point_mul_curve_supported(const mbedtls_ecp_group *grp) +{ + switch (grp->id) { + case MBEDTLS_ECP_DP_SECP192R1: + case MBEDTLS_ECP_DP_SECP256R1: +#if SOC_ECC_SUPPORT_CURVE_P384 + case MBEDTLS_ECP_DP_SECP384R1: +#endif + return true; + default: + return false; + } +} + +static int crypto_ec_point_mul_ecc_hw(const mbedtls_ecp_group *grp, + const mbedtls_ecp_point *p, + const mbedtls_mpi *k, + mbedtls_ecp_point *res) +{ + int ret = MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + ecc_point_t p_hw = { 0 }; + ecc_point_t r_hw = { 0 }; + unsigned char scalar_le[MAX_SIZE] = { 0 }; + size_t curve_len = grp->pbits / 8; + + if (!crypto_ec_point_mul_curve_supported(grp)) { + return MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE; + } + + if (curve_len != P192_LEN && curve_len != P256_LEN +#if SOC_ECC_SUPPORT_CURVE_P384 + && curve_len != P384_LEN +#endif + ) { + return MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE; + } + + /* Preserve mbedTLS input validation semantics for this fast path. */ + MBEDTLS_MPI_CHK(mbedtls_ecp_check_privkey(grp, k)); + MBEDTLS_MPI_CHK(mbedtls_ecp_check_pubkey(grp, p)); + + p_hw.len = curve_len; + MBEDTLS_MPI_CHK(mbedtls_mpi_write_binary_le(&p->MBEDTLS_PRIVATE(X), p_hw.x, MAX_SIZE)); + MBEDTLS_MPI_CHK(mbedtls_mpi_write_binary_le(&p->MBEDTLS_PRIVATE(Y), p_hw.y, MAX_SIZE)); + MBEDTLS_MPI_CHK(mbedtls_mpi_write_binary_le(k, scalar_le, MAX_SIZE)); + + if (esp_ecc_point_multiply(&p_hw, scalar_le, &r_hw, false) != 0) { + ret = MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + goto cleanup; + } + + MBEDTLS_MPI_CHK(mbedtls_mpi_read_binary_le(&res->MBEDTLS_PRIVATE(X), r_hw.x, curve_len)); + MBEDTLS_MPI_CHK(mbedtls_mpi_read_binary_le(&res->MBEDTLS_PRIVATE(Y), r_hw.y, curve_len)); + MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&res->MBEDTLS_PRIVATE(Z), 1)); + +cleanup: + mbedtls_platform_zeroize(scalar_le, sizeof(scalar_le)); + mbedtls_platform_zeroize(&p_hw, sizeof(p_hw)); + mbedtls_platform_zeroize(&r_hw, sizeof(r_hw)); + return ret; +} +#endif + #ifdef CONFIG_ECC // Wrapper structure for EC keys that includes PSA key ID, curve info, and group @@ -287,6 +357,306 @@ cleanup: return NULL; } +#if CONFIG_MBEDTLS_HARDWARE_MPI && !CONFIG_MBEDTLS_HARDWARE_ECC +typedef struct { + u32 X[P256_WORDS]; + u32 Y[P256_WORDS]; + u32 Z[P256_WORDS]; +} p256_fast_jac_point; + +static u32 p256_fast_words_add_carry(u32 *z, const u32 *x, const u32 *y) +{ + size_t i; + u64 carry = 0; + + for (i = 0; i < P256_WORDS; i++) { + u64 sum = (u64) x[i] + y[i] + carry; + + z[i] = (u32) sum; + carry = sum >> 32; + } + + return (u32) carry; +} + +static void p256_fast_words_add_mod(u32 *z, const u32 *x, const u32 *y) +{ + u32 reduced[P256_WORDS]; + u32 carry = p256_fast_words_add_carry(z, x, y); + u32 borrow = p256_words_sub_borrow(reduced, z, p256_p_le); + u32 use_sub = carry | (1U - borrow); + + p256_words_cmov(z, reduced, use_sub); +} + +static void p256_fast_words_sub_mod(u32 *z, const u32 *x, const u32 *y) +{ + u32 tmp[P256_WORDS]; + + if (p256_words_sub_borrow(z, x, y) != 0) { + (void) p256_fast_words_add_carry(tmp, z, p256_p_le); + os_memcpy(z, tmp, sizeof(tmp)); + } +} + +static void p256_fast_to_mont(u32 out[P256_WORDS], + const u32 in[P256_WORDS]) +{ + p256_mont_mul(out, in, p256_r2_le); +} + +static void p256_fast_from_mont(u32 out[P256_WORDS], + const u32 in[P256_WORDS]) +{ + static const u32 one[P256_WORDS] = {1}; + + p256_mont_mul(out, in, one); +} + +static void p256_fast_sqr(u32 out[P256_WORDS], const u32 in[P256_WORDS]) +{ + p256_mont_mul(out, in, in); +} + +static void p256_fast_mul(u32 out[P256_WORDS], + const u32 a[P256_WORDS], + const u32 b[P256_WORDS]) +{ + p256_mont_mul(out, a, b); +} + +static void p256_fast_dbl(u32 out[P256_WORDS], const u32 in[P256_WORDS]) +{ + p256_fast_words_add_mod(out, in, in); +} + +static void p256_fast_triple(u32 out[P256_WORDS], const u32 in[P256_WORDS]) +{ + u32 tmp[P256_WORDS]; + + p256_fast_dbl(tmp, in); + p256_fast_words_add_mod(out, tmp, in); +} + +static void p256_fast_eight(u32 out[P256_WORDS], const u32 in[P256_WORDS]) +{ + u32 tmp[P256_WORDS]; + + p256_fast_dbl(tmp, in); + p256_fast_dbl(tmp, tmp); + p256_fast_dbl(out, tmp); +} + +static void p256_fast_point_set_zero(p256_fast_jac_point *p) +{ + os_memset(p, 0, sizeof(*p)); +} + +static void p256_fast_point_from_affine(p256_fast_jac_point *p, + const u32 x[P256_WORDS], + const u32 y[P256_WORDS], + const u32 one_mont[P256_WORDS]) +{ + os_memcpy(p->X, x, sizeof(p->X)); + os_memcpy(p->Y, y, sizeof(p->Y)); + os_memcpy(p->Z, one_mont, sizeof(p->Z)); +} + +static void p256_fast_point_double(p256_fast_jac_point *r) +{ + u32 z2[P256_WORDS], y2[P256_WORDS], y4[P256_WORDS]; + u32 s[P256_WORDS], m[P256_WORDS], x3[P256_WORDS]; + u32 y3[P256_WORDS], z3[P256_WORDS], tmp1[P256_WORDS]; + u32 tmp2[P256_WORDS]; + + if (p256_words_is_zero(r->Z) || p256_words_is_zero(r->Y)) { + p256_fast_point_set_zero(r); + return; + } + + p256_fast_sqr(z2, r->Z); + p256_fast_sqr(y2, r->Y); + p256_fast_sqr(y4, y2); + + p256_fast_mul(s, r->X, y2); + p256_fast_dbl(s, s); + p256_fast_dbl(s, s); + + p256_fast_words_add_mod(tmp1, r->X, z2); + p256_fast_words_sub_mod(tmp2, r->X, z2); + p256_fast_mul(m, tmp1, tmp2); + p256_fast_triple(m, m); + + p256_fast_sqr(x3, m); + p256_fast_words_sub_mod(x3, x3, s); + p256_fast_words_sub_mod(x3, x3, s); + + p256_fast_words_sub_mod(tmp1, s, x3); + p256_fast_mul(y3, m, tmp1); + p256_fast_eight(tmp2, y4); + p256_fast_words_sub_mod(y3, y3, tmp2); + + p256_fast_mul(z3, r->Y, r->Z); + p256_fast_dbl(z3, z3); + + os_memcpy(r->X, x3, sizeof(r->X)); + os_memcpy(r->Y, y3, sizeof(r->Y)); + os_memcpy(r->Z, z3, sizeof(r->Z)); +} + +static void p256_fast_point_add_mixed(p256_fast_jac_point *r, + const u32 qx[P256_WORDS], + const u32 qy[P256_WORDS], + const u32 one_mont[P256_WORDS]) +{ + u32 z1z1[P256_WORDS], z1z1z1[P256_WORDS]; + u32 u2[P256_WORDS], s2[P256_WORDS], h[P256_WORDS]; + u32 rr[P256_WORDS], hh[P256_WORDS], hhh[P256_WORDS]; + u32 v[P256_WORDS], x3[P256_WORDS], y3[P256_WORDS]; + u32 z3[P256_WORDS], tmp[P256_WORDS], y1[P256_WORDS]; + + if (p256_words_is_zero(r->Z)) { + p256_fast_point_from_affine(r, qx, qy, one_mont); + return; + } + + os_memcpy(y1, r->Y, sizeof(y1)); + + p256_fast_sqr(z1z1, r->Z); + p256_fast_mul(z1z1z1, z1z1, r->Z); + p256_fast_mul(u2, qx, z1z1); + p256_fast_mul(s2, qy, z1z1z1); + p256_fast_words_sub_mod(h, u2, r->X); + p256_fast_words_sub_mod(rr, s2, y1); + + if (p256_words_is_zero(h)) { + if (p256_words_is_zero(rr)) { + p256_fast_point_double(r); + } else { + p256_fast_point_set_zero(r); + } + return; + } + + p256_fast_sqr(hh, h); + p256_fast_mul(hhh, hh, h); + p256_fast_mul(v, r->X, hh); + + p256_fast_sqr(x3, rr); + p256_fast_words_sub_mod(x3, x3, hhh); + p256_fast_words_sub_mod(x3, x3, v); + p256_fast_words_sub_mod(x3, x3, v); + + p256_fast_words_sub_mod(tmp, v, x3); + p256_fast_mul(y3, rr, tmp); + p256_fast_mul(tmp, y1, hhh); + p256_fast_words_sub_mod(y3, y3, tmp); + + p256_fast_mul(z3, r->Z, h); + + os_memcpy(r->X, x3, sizeof(r->X)); + os_memcpy(r->Y, y3, sizeof(r->Y)); + os_memcpy(r->Z, z3, sizeof(r->Z)); +} + +static int p256_fast_point_normalize(const mbedtls_ecp_group *grp, + const p256_fast_jac_point *p, + mbedtls_ecp_point *res) +{ + mbedtls_mpi z_mpi, inv_mpi; + u32 z_std[P256_WORDS], inv_std[P256_WORDS], inv_mont[P256_WORDS]; + u32 x_std[P256_WORDS], y_std[P256_WORDS], tmp[P256_WORDS]; + int ret = MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + + if (p256_words_is_zero(p->Z)) { + return mbedtls_ecp_set_zero(res); + } + + mbedtls_mpi_init(&z_mpi); + mbedtls_mpi_init(&inv_mpi); + + p256_fast_from_mont(z_std, p->Z); + MBEDTLS_MPI_CHK(p256_words_to_mpi(z_std, &z_mpi)); + MBEDTLS_MPI_CHK(mbedtls_mpi_inv_mod(&inv_mpi, &z_mpi, &grp->P)); + if (p256_words_from_mpi_reduced(&inv_mpi, inv_std) != 0) { + ret = MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + goto cleanup; + } + + p256_fast_to_mont(inv_mont, inv_std); + p256_fast_mul(y_std, p->Y, inv_mont); + p256_fast_sqr(tmp, inv_mont); + p256_fast_mul(x_std, p->X, tmp); + p256_fast_mul(y_std, y_std, tmp); + p256_fast_from_mont(x_std, x_std); + p256_fast_from_mont(y_std, y_std); + + MBEDTLS_MPI_CHK(p256_words_to_mpi(x_std, &res->MBEDTLS_PRIVATE(X))); + MBEDTLS_MPI_CHK(p256_words_to_mpi(y_std, &res->MBEDTLS_PRIVATE(Y))); + MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&res->MBEDTLS_PRIVATE(Z), 1)); + +cleanup: + mbedtls_mpi_free(&z_mpi); + mbedtls_mpi_free(&inv_mpi); + return ret; +} + +static int crypto_ec_point_mul_p256_jacobian_fast(const mbedtls_ecp_group *grp, + const mbedtls_ecp_point *p, + const mbedtls_mpi *k, + mbedtls_ecp_point *res) +{ + p256_fast_jac_point r; + u32 scalar[P256_WORDS], x_std[P256_WORDS], y_std[P256_WORDS]; + u32 x_mont[P256_WORDS], y_mont[P256_WORDS], one_mont[P256_WORDS]; + static const u32 one_std[P256_WORDS] = {1}; + int bit; + int ret = MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE; + + if (!grp || grp->id != MBEDTLS_ECP_DP_SECP256R1 || + mbedtls_mpi_cmp_int(&p->MBEDTLS_PRIVATE(Z), 1) != 0) { + return MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE; + } + + MBEDTLS_MPI_CHK(mbedtls_ecp_check_privkey(grp, k)); + MBEDTLS_MPI_CHK(mbedtls_ecp_check_pubkey(grp, p)); + + if (p256_words_from_mpi(k, scalar) != 0 || + p256_words_from_mpi_reduced(&p->MBEDTLS_PRIVATE(X), x_std) != 0 || + p256_words_from_mpi_reduced(&p->MBEDTLS_PRIVATE(Y), y_std) != 0) { + ret = MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE; + goto cleanup; + } + + bit = (int) p256_words_bitlen(scalar); + if (bit <= 0) { + ret = mbedtls_ecp_set_zero(res); + goto cleanup; + } + + p256_fast_to_mont(x_mont, x_std); + p256_fast_to_mont(y_mont, y_std); + p256_fast_to_mont(one_mont, one_std); + p256_fast_point_from_affine(&r, x_mont, y_mont, one_mont); + + for (bit -= 2; bit >= 0; bit--) { + p256_fast_point_double(&r); + if ((scalar[bit / 32] >> (bit % 32)) & 1U) { + p256_fast_point_add_mixed(&r, x_mont, y_mont, one_mont); + } + } + + ret = p256_fast_point_normalize(grp, &r, res); + +cleanup: + mbedtls_platform_zeroize(scalar, sizeof(scalar)); + mbedtls_platform_zeroize(&r, sizeof(r)); + mbedtls_platform_zeroize(x_mont, sizeof(x_mont)); + mbedtls_platform_zeroize(y_mont, sizeof(y_mont)); + return ret; +} +#endif + int crypto_ec_point_add(struct crypto_ec *e, const struct crypto_ec_point *a, const struct crypto_ec_point *b, struct crypto_ec_point *c) @@ -308,7 +678,31 @@ int crypto_ec_point_mul(struct crypto_ec *e, const struct crypto_ec_point *p, const struct crypto_bignum *b, struct crypto_ec_point *res) { - int ret; + int ret = MBEDTLS_ERR_ECP_BAD_INPUT_DATA; + +#if CONFIG_MBEDTLS_HARDWARE_ECC + ret = crypto_ec_point_mul_ecc_hw((mbedtls_ecp_group *)e, + (const mbedtls_ecp_point *)p, + (const mbedtls_mpi *)b, + (mbedtls_ecp_point *)res); + if (ret == 0) { + goto cleanup; + } + if (ret != MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE) { + goto cleanup; + } +#elif CONFIG_MBEDTLS_HARDWARE_MPI + ret = crypto_ec_point_mul_p256_jacobian_fast((mbedtls_ecp_group *) e, + (const mbedtls_ecp_point *) p, + (const mbedtls_mpi *) b, + (mbedtls_ecp_point *) res); + if (ret == 0) { + goto cleanup; + } + if (ret != MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE) { + goto cleanup; + } +#endif MBEDTLS_MPI_CHK(mbedtls_ecp_mul((mbedtls_ecp_group *)e, (mbedtls_ecp_point *) res, (const mbedtls_mpi *)b, @@ -401,6 +795,7 @@ struct crypto_bignum *crypto_ec_point_compute_y_sqr(struct crypto_ec *e, const struct crypto_bignum *x) { mbedtls_mpi temp, temp2, num; + mbedtls_ecp_group *grp = (mbedtls_ecp_group *) e; int ret = 0; mbedtls_mpi *y_sqr = os_zalloc(sizeof(mbedtls_mpi)); @@ -414,30 +809,48 @@ struct crypto_bignum *crypto_ec_point_compute_y_sqr(struct crypto_ec *e, mbedtls_mpi_init(y_sqr); /* y^2 = x^3 + ax + b mod P */ - /* X*X*X is faster on esp32 whereas X^3 is faster on other chips */ -#if CONFIG_IDF_TARGET_ESP32 - /* Calculate x*x*x mod P*/ - MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&temp, (const mbedtls_mpi *) x, (const mbedtls_mpi *) x)); - MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&temp, &temp, (const mbedtls_mpi *) x)); - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&temp, &temp, &((mbedtls_ecp_group *)e)->P)); -#else - /* Calculate x^3 mod P*/ - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&num, 3)); - MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&temp, (const mbedtls_mpi *) x, &num, &((mbedtls_ecp_group *)e)->P, NULL)); -#endif + MBEDTLS_MPI_CHK(crypto_bignum_mulmod(x, x, + (const struct crypto_bignum *) &grp->P, + (struct crypto_bignum *) &temp)); + MBEDTLS_MPI_CHK(crypto_bignum_mulmod((const struct crypto_bignum *) &temp, x, + (const struct crypto_bignum *) &grp->P, + (struct crypto_bignum *) &temp)); - /* Calculate ax mod P*/ - MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&num, -3)); - MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&temp2, (const mbedtls_mpi *) x, &num)); - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&temp2, &temp2, &((mbedtls_ecp_group *)e)->P)); + if (mbedtls_ecp_group_a_is_minus_3(grp)) { + /* + * For NIST P-curves used in SAE, a == -3. Compute (-3x + b) mod p + * with additions/subtractions instead of a generic multiply+mod path. + */ + MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&temp2, (const mbedtls_mpi *) x, + (const mbedtls_mpi *) x)); + if (mbedtls_mpi_cmp_mpi(&temp2, &grp->P) >= 0) { + MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&temp2, &temp2, &grp->P)); + } - /* Calculate ax + b mod P. Note that b is already < P*/ - MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&temp2, &temp2, &((mbedtls_ecp_group *)e)->B)); - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&temp2, &temp2, &((mbedtls_ecp_group *)e)->P)); + MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&temp2, &temp2, + (const mbedtls_mpi *) x)); + while (mbedtls_mpi_cmp_mpi(&temp2, &grp->P) >= 0) { + MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&temp2, &temp2, &grp->P)); + } - /* Calculate x^3 + ax + b mod P*/ - MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&temp2, &temp2, &temp)); - MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(y_sqr, &temp2, &((mbedtls_ecp_group *)e)->P)); + if (mbedtls_mpi_cmp_int(&temp2, 0) != 0) { + MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&temp2, &grp->P, &temp2)); + } + } else { + MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&temp2, (const mbedtls_mpi *) x, + &grp->A)); + MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&temp2, &temp2, &grp->P)); + } + + MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&temp2, &temp2, &grp->B)); + while (mbedtls_mpi_cmp_mpi(&temp2, &grp->P) >= 0) { + MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&temp2, &temp2, &grp->P)); + } + + MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(y_sqr, &temp2, &temp)); + while (mbedtls_mpi_cmp_mpi(y_sqr, &grp->P) >= 0) { + MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(y_sqr, y_sqr, &grp->P)); + } cleanup: mbedtls_mpi_free(&temp); @@ -458,8 +871,9 @@ int crypto_ec_point_is_at_infinity(struct crypto_ec *e, return mbedtls_ecp_is_zero((mbedtls_ecp_point *) p); } -int crypto_ec_point_is_on_curve(struct crypto_ec *e, - const struct crypto_ec_point *p) +#if !CONFIG_MBEDTLS_HARDWARE_ECC +static int crypto_ec_point_is_on_curve_mpi(struct crypto_ec *e, + const struct crypto_ec_point *p) { mbedtls_mpi y_sqr_lhs, *y_sqr_rhs = NULL, two; int ret = 0, on_curve = 0; @@ -467,11 +881,15 @@ int crypto_ec_point_is_on_curve(struct crypto_ec *e, mbedtls_mpi_init(&y_sqr_lhs); mbedtls_mpi_init(&two); - /* Calculate y^2 mod P*/ + /* Calculate y^2 mod P */ MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&two, 2)); - MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&y_sqr_lhs, &((const mbedtls_ecp_point *)p)->MBEDTLS_PRIVATE(Y), &two, &((mbedtls_ecp_group *)e)->P, NULL)); + MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&y_sqr_lhs, + &((const mbedtls_ecp_point *)p)->MBEDTLS_PRIVATE(Y), + &two, &((mbedtls_ecp_group *)e)->P, NULL)); - y_sqr_rhs = (mbedtls_mpi *) crypto_ec_point_compute_y_sqr(e, (const struct crypto_bignum *) & ((const mbedtls_ecp_point *)p)->MBEDTLS_PRIVATE(X)); + y_sqr_rhs = (mbedtls_mpi *) crypto_ec_point_compute_y_sqr( + e, (const struct crypto_bignum *) + & ((const mbedtls_ecp_point *)p)->MBEDTLS_PRIVATE(X)); if (y_sqr_rhs && (mbedtls_mpi_cmp_mpi(y_sqr_rhs, &y_sqr_lhs) == 0)) { on_curve = 1; @@ -484,6 +902,20 @@ cleanup: os_free(y_sqr_rhs); return (ret == 0) && (on_curve == 1); } +#endif + +int crypto_ec_point_is_on_curve(struct crypto_ec *e, + const struct crypto_ec_point *p) +{ +#if CONFIG_MBEDTLS_HARDWARE_ECC + /* ECC HW verify path via mbedTLS alt hooks. */ + return mbedtls_ecp_check_pubkey((const mbedtls_ecp_group *)e, + (const mbedtls_ecp_point *)p) == 0; +#else + /* MPI implementation. */ + return crypto_ec_point_is_on_curve_mpi(e, p); +#endif +} int crypto_ec_point_cmp(const struct crypto_ec *e, const struct crypto_ec_point *a, diff --git a/components/wpa_supplicant/esp_supplicant/src/crypto/p256_common.h b/components/wpa_supplicant/esp_supplicant/src/crypto/p256_common.h new file mode 100644 index 0000000000..b2fb6da6ea --- /dev/null +++ b/components/wpa_supplicant/esp_supplicant/src/crypto/p256_common.h @@ -0,0 +1,236 @@ +/* + * SPDX-FileCopyrightText: 2026 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * Shared P-256 (secp256r1) word-level and Montgomery arithmetic used by + * both the bignum and EC fast paths. All helpers are static inline so + * each translation unit gets its own copy without linkage issues. + * + * Prerequisites: the including .c file must already provide u8/u32/u64 + * typedefs, os_memcmp/os_memset/os_memcpy (via utils/common.h), and + * mbedtls/bignum.h. + */ + +#pragma once + +#define P256_WORDS 8 +#define P256_LEN_BYTES 32 + +static const u8 p256_p_be[P256_LEN_BYTES] = { + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +}; + +static const u32 p256_p_le[P256_WORDS] = { + 0xffffffffU, 0xffffffffU, 0xffffffffU, 0x00000000U, + 0x00000000U, 0x00000000U, 0x00000001U, 0xffffffffU +}; + +/* R^2 mod p in little-endian word order, for Montgomery domain entry. */ +static const u32 p256_r2_le[P256_WORDS] = { + 0x00000003U, 0x00000000U, 0xffffffffU, 0xfffffffbU, + 0xfffffffeU, 0xffffffffU, 0xfffffffdU, 0x00000004U +}; + +static inline int p256_words_is_zero(const u32 *a) +{ + size_t i; + + for (i = 0; i < P256_WORDS; i++) { + if (a[i] != 0) { + return 0; + } + } + + return 1; +} + +static inline size_t p256_words_bitlen(const u32 *a) +{ + int i; + + for (i = P256_WORDS - 1; i >= 0; i--) { + if (a[i] != 0) { + return (size_t) i * 32 + 32 - __builtin_clz(a[i]); + } + } + + return 0; +} + +static inline int p256_words_from_mpi(const mbedtls_mpi *in, u32 *out) +{ + u8 in_be[P256_LEN_BYTES]; + size_t i; + + if (!in || in->MBEDTLS_PRIVATE(s) < 0 || + mbedtls_mpi_size(in) > P256_LEN_BYTES || + mbedtls_mpi_write_binary(in, in_be, sizeof(in_be)) != 0) { + return -1; + } + + for (i = 0; i < P256_WORDS; i++) { + size_t off = P256_LEN_BYTES - (i + 1) * 4; + + out[i] = ((u32) in_be[off] << 24) | + ((u32) in_be[off + 1] << 16) | + ((u32) in_be[off + 2] << 8) | + (u32) in_be[off + 3]; + } + + return 0; +} + +static inline int p256_words_from_mpi_reduced(const mbedtls_mpi *in, u32 *out) +{ + u8 in_be[P256_LEN_BYTES]; + size_t i; + size_t in_size; + + if (!in || in->MBEDTLS_PRIVATE(s) < 0) { + return -1; + } + + in_size = mbedtls_mpi_size(in); + if (in_size > P256_LEN_BYTES) { + return -1; + } + + if (mbedtls_mpi_write_binary(in, in_be, sizeof(in_be)) != 0) { + return -1; + } + + if (in_size == P256_LEN_BYTES && + os_memcmp(in_be, p256_p_be, sizeof(in_be)) >= 0) { + return -1; + } + + for (i = 0; i < P256_WORDS; i++) { + size_t off = P256_LEN_BYTES - (i + 1) * 4; + + out[i] = ((u32) in_be[off] << 24) | + ((u32) in_be[off + 1] << 16) | + ((u32) in_be[off + 2] << 8) | + (u32) in_be[off + 3]; + } + + return 0; +} + +static inline int p256_words_to_mpi(const u32 *in, mbedtls_mpi *out) +{ + u8 out_be[P256_LEN_BYTES]; + size_t i; + + for (i = 0; i < P256_WORDS; i++) { + size_t off = P256_LEN_BYTES - (i + 1) * 4; + + out_be[off] = (u8)(in[i] >> 24); + out_be[off + 1] = (u8)(in[i] >> 16); + out_be[off + 2] = (u8)(in[i] >> 8); + out_be[off + 3] = (u8) in[i]; + } + + return mbedtls_mpi_read_binary(out, out_be, sizeof(out_be)); +} + +static inline u32 p256_words_sub_borrow(u32 *z, const u32 *x, const u32 *y) +{ + size_t i; + u32 borrow = 0; + + for (i = 0; i < P256_WORDS; i++) { + u64 diff = (u64) x[i] - y[i] - borrow; + + z[i] = (u32) diff; + borrow = -(u32)(diff >> 32); + } + + return borrow; +} + +static inline void p256_words_cmov(u32 *z, const u32 *x, u32 c) +{ + size_t i; + u32 mask = (u32) - (int) c; + + for (i = 0; i < P256_WORDS; i++) { + z[i] = (z[i] & ~mask) | (x[i] & mask); + } +} + +static inline u64 p256_u32_muladd64(u32 x, u32 y, u32 z, u32 t) +{ + return (u64) x * y + z + t; +} + +static inline u32 p256_u288_muladd(u32 z[P256_WORDS + 1], u32 x, + const u32 y[P256_WORDS]) +{ + size_t i; + u32 carry = 0; + + for (i = 0; i < P256_WORDS; i++) { + u64 prod = p256_u32_muladd64(x, y[i], z[i], carry); + + z[i] = (u32) prod; + carry = (u32)(prod >> 32); + } + + { + u64 sum = (u64) z[P256_WORDS] + carry; + z[P256_WORDS] = (u32) sum; + carry = (u32)(sum >> 32); + } + + return carry; +} + +static inline void p256_u288_rshift32(u32 z[P256_WORDS + 1], u32 c) +{ + size_t i; + + for (i = 0; i < P256_WORDS; i++) { + z[i] = z[i + 1]; + } + z[P256_WORDS] = c; +} + +/* + * CIOS Montgomery multiplication for secp256r1. + * + * The Montgomery constant mu = -p^{-1} mod 2^{32} equals 1 for this prime + * because p[0] = 0xFFFFFFFF, i.e. p ≡ -1 (mod 2^{32}). That simplifies + * the reduction factor to u = new_a[0] * 1 = new_a[0], which we compute + * early as a[0] + x[i]*y[0] (the low word of the partial accumulator after + * the multiply step) to break the data dependency. + */ +static inline void p256_mont_mul(u32 z[P256_WORDS], + const u32 x[P256_WORDS], + const u32 y[P256_WORDS]) +{ + u32 a[P256_WORDS + 1] = {0}; + u32 reduced[P256_WORDS]; + size_t i; + + for (i = 0; i < P256_WORDS; i++) { + u32 u = a[0] + x[i] * y[0]; + u32 c = p256_u288_muladd(a, x[i], y); + c += p256_u288_muladd(a, u, p256_p_le); + p256_u288_rshift32(a, c); + } + + { + u32 carry_add = a[P256_WORDS]; + u32 carry_sub = p256_words_sub_borrow(reduced, a, p256_p_le); + u32 use_sub = carry_add | (1U - carry_sub); + + os_memcpy(z, a, sizeof(u32) * P256_WORDS); + p256_words_cmov(z, reduced, use_sub); + } +}