From bbe311569c4f991979f89279d2cf584ee2744d33 Mon Sep 17 00:00:00 2001 From: Alexey Lapshin Date: Wed, 24 Dec 2025 13:05:25 +0700 Subject: [PATCH] fix(picolibc): add psram fix for libc functions --- components/esp_libc/CMakeLists.txt | 21 + components/esp_libc/src/esp_libc.lf | 8 + .../src/picolibc/esp32_psram/memcpy.S | 358 ++++++++++++++++++ .../src/picolibc/esp32_psram/memset.S | 208 ++++++++++ .../src/picolibc/esp32_psram/strcpy.S | 289 ++++++++++++++ .../src/picolibc/esp32_psram/strncpy.S | 288 ++++++++++++++ .../src/picolibc/esp32_psram/xtensa-asm.h | 78 ++++ tools/ci/check_copyright_config.yaml | 8 + 8 files changed, 1258 insertions(+) create mode 100644 components/esp_libc/src/picolibc/esp32_psram/memcpy.S create mode 100644 components/esp_libc/src/picolibc/esp32_psram/memset.S create mode 100644 components/esp_libc/src/picolibc/esp32_psram/strcpy.S create mode 100644 components/esp_libc/src/picolibc/esp32_psram/strncpy.S create mode 100644 components/esp_libc/src/picolibc/esp32_psram/xtensa-asm.h diff --git a/components/esp_libc/CMakeLists.txt b/components/esp_libc/CMakeLists.txt index 1234576f4e..38f218270f 100644 --- a/components/esp_libc/CMakeLists.txt +++ b/components/esp_libc/CMakeLists.txt @@ -33,6 +33,18 @@ set(srcs "src/reent_syscalls.c" "src/port/esp_time_impl.c") +# TODO IDF-15041: remove this block and sources +if(CONFIG_LIBC_PICOLIBC AND CONFIG_SPIRAM_CACHE_WORKAROUND) + list(APPEND srcs "src/picolibc/esp32_psram/memcpy.S") + list(APPEND srcs "src/picolibc/esp32_psram/memset.S") + list(APPEND srcs "src/picolibc/esp32_psram/strcpy.S") + list(APPEND srcs "src/picolibc/esp32_psram/strncpy.S") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memcpy_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_memset_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strcpy_impl") + list(APPEND EXTRA_LINK_FLAGS "-u esp_libc_include_strncpy_impl") +endif() + if(CONFIG_STDATOMIC_S32C1I_SPIRAM_WORKAROUND) list(APPEND srcs "src/port/xtensa/stdatomic_s32c1i.c") endif() @@ -101,6 +113,15 @@ target_link_libraries(${COMPONENT_LIB} INTERFACE c m ${CONFIG_COMPILER_RT_LIB_NA set_source_files_properties(heap.c PROPERTIES COMPILE_FLAGS -fno-builtin) +# TODO IDF-15041: remove this block and sources +if(CONFIG_LIBC_PICOLIBC AND CONFIG_SPIRAM_CACHE_WORKAROUND) + set_source_files_properties("src/picolibc/esp32_psram/memcpy.S" + "src/picolibc/esp32_psram/memset.S" + "src/picolibc/esp32_psram/strcpy.S" + "src/picolibc/esp32_psram/strncpy.S" + PROPERTIES COMPILE_FLAGS "-DXTENSA_ESP32_PSRAM_CACHE_FIX=1") +endif() + if(CONFIG_STDATOMIC_S32C1I_SPIRAM_WORKAROUND) set_source_files_properties("src/port/xtensa/stdatomic_s32c1i.c" PROPERTIES COMPILE_FLAGS "-mno-disable-hardware-atomics") diff --git a/components/esp_libc/src/esp_libc.lf b/components/esp_libc/src/esp_libc.lf index 46509e58e2..401418ee7a 100644 --- a/components/esp_libc/src/esp_libc.lf +++ b/components/esp_libc/src/esp_libc.lf @@ -9,6 +9,14 @@ entries: strncpy (noflash) strcmp (noflash) strncmp (noflash) + # TODO IDF-15041: remove this block + if LIBC_PICOLIBC = y: + if SPIRAM_CACHE_WORKAROUND = y: + memcpy (noflash) + memset (noflash) + if SPIRAM_CACHE_LIBSTR_IN_IRAM = y: + strcpy (noflash) + strncpy (noflash) if LIBC_MISC_IN_IRAM = y: if HEAP_PLACE_FUNCTION_INTO_FLASH = n: heap (noflash) diff --git a/components/esp_libc/src/picolibc/esp32_psram/memcpy.S b/components/esp_libc/src/picolibc/esp32_psram/memcpy.S new file mode 100644 index 0000000000..2a15428e69 --- /dev/null +++ b/components/esp_libc/src/picolibc/esp32_psram/memcpy.S @@ -0,0 +1,358 @@ +/* + * SPDX-FileCopyrightText: 2002-2008 Tensilica Inc. + * + * SPDX-License-Identifier: MIT + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +/* ANSI C standard library function memcpy. + + Copyright (c) 2002-2008 Tensilica Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include + +#include "xtensa-asm.h" + +/* If the Xtensa Unaligned Load Exception option is not used, this + code can run a few cycles faster by relying on the low address bits + being ignored. However, if the code is then run with an Xtensa ISS + client that checks for unaligned accesses, it will produce a lot of + warning messages. Set this flag to disable the use of unaligned + accesses and keep the ISS happy. */ + +/* #define UNALIGNED_ADDRESSES_CHECKED XCHAL_UNALIGNED_LOAD_EXCEPTION */ +#define UNALIGNED_ADDRESSES_CHECKED 1 + + +/* void *memcpy (void *dst, const void *src, size_t len) + + The algorithm is as follows: + + If the destination is unaligned, align it by conditionally + copying 1- and/or 2-byte pieces. + + If the source is aligned, copy 16 bytes with a loop, and then finish up + with 8, 4, 2, and 1-byte copies conditional on the length. + + Else (if source is unaligned), do the same, but use SRC to align the + source data. + + This code tries to use fall-through branches for the common + case of aligned source and destination and multiple of 4 (or 8) length. */ + + +/* Byte by byte copy. */ + + .text + .begin schedule + .align XCHAL_INST_FETCH_WIDTH + .literal_position +__memcpy_aux: + + /* Skip bytes to get proper alignment for three-byte loop */ +.skip XCHAL_INST_FETCH_WIDTH - 3 + +.Lbytecopy: +#if XCHAL_HAVE_LOOPS + loopnez a4, 2f +#else + beqz a4, 2f + add a7, a3, a4 // a7 = end address for source +#endif +1: l8ui a6, a3, 0 + addi a3, a3, 1 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + nop + nop + nop +#endif + s8i a6, a5, 0 + addi a5, a5, 1 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif +#if !XCHAL_HAVE_LOOPS + bltu a3, a7, 1b +#endif +2: leaf_return + + +/* Destination is unaligned. */ + + .align 4 +.Ldst1mod2: // dst is only byte aligned + + /* Do short copies byte-by-byte. */ + bltui a4, 7, .Lbytecopy + + /* Copy 1 byte. */ + l8ui a6, a3, 0 + addi a3, a3, 1 + addi a4, a4, -1 + s8i a6, a5, 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + addi a5, a5, 1 + + /* Return to main algorithm if dst is now aligned. */ + bbci.l a5, 1, .Ldstaligned + +.Ldst2mod4: // dst has 16-bit alignment + + /* Do short copies byte-by-byte. */ + bltui a4, 6, .Lbytecopy + + /* Copy 2 bytes. */ + l8ui a6, a3, 0 + l8ui a7, a3, 1 + addi a3, a3, 2 + addi a4, a4, -2 + s8i a6, a5, 0 + s8i a7, a5, 1 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + addi a5, a5, 2 + + /* dst is now aligned; return to main algorithm. */ + j .Ldstaligned + + + .align 4 + .global memcpy + .type memcpy, @function +memcpy: + leaf_entry sp, 16 + /* a2 = dst, a3 = src, a4 = len */ + + mov a5, a2 // copy dst so that a2 is return value + bbsi.l a2, 0, .Ldst1mod2 + bbsi.l a2, 1, .Ldst2mod4 +.Ldstaligned: + + /* Get number of loop iterations with 16B per iteration. */ + srli a7, a4, 4 + + /* Check if source is aligned. */ + slli a8, a3, 30 + bnez a8, .Lsrcunaligned + + /* Destination and source are word-aligned, use word copy. */ +#if XCHAL_HAVE_LOOPS + loopnez a7, 2f +#else + beqz a7, 2f + slli a8, a7, 4 + add a8, a8, a3 // a8 = end of last 16B source chunk +#endif + +#if XTENSA_ESP32_PSRAM_CACHE_FIX + +1: l32i a6, a3, 0 + l32i a7, a3, 4 + s32i a6, a5, 0 + s32i a7, a5, 4 + memw + l32i a6, a3, 8 + l32i a7, a3, 12 + s32i a6, a5, 8 + s32i a7, a5, 12 + memw + + addi a3, a3, 16 + addi a5, a5, 16 + +#else + +1: l32i a6, a3, 0 + l32i a7, a3, 4 + s32i a6, a5, 0 + l32i a6, a3, 8 + s32i a7, a5, 4 + l32i a7, a3, 12 + s32i a6, a5, 8 + addi a3, a3, 16 + s32i a7, a5, 12 + addi a5, a5, 16 + +#endif + + +#if !XCHAL_HAVE_LOOPS + bltu a3, a8, 1b +#endif + + /* Copy any leftover pieces smaller than 16B. */ +2: bbci.l a4, 3, 3f + + /* Copy 8 bytes. */ + l32i a6, a3, 0 + l32i a7, a3, 4 + addi a3, a3, 8 + s32i a6, a5, 0 + s32i a7, a5, 4 + addi a5, a5, 8 + +3: bbsi.l a4, 2, 4f + bbsi.l a4, 1, 5f + bbsi.l a4, 0, 6f +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + + .align 4 + /* Copy 4 bytes. */ +4: l32i a6, a3, 0 + addi a3, a3, 4 + s32i a6, a5, 0 + addi a5, a5, 4 + bbsi.l a4, 1, 5f + bbsi.l a4, 0, 6f +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + + /* Copy 2 bytes. */ +5: l16ui a6, a3, 0 + addi a3, a3, 2 + s16i a6, a5, 0 + addi a5, a5, 2 + bbsi.l a4, 0, 6f +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + + /* Copy 1 byte. */ +6: l8ui a6, a3, 0 + s8i a6, a5, 0 + +.Ldone: +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + + +/* Destination is aligned; source is unaligned. */ + + .align 4 +.Lsrcunaligned: + /* Avoid loading anything for zero-length copies. */ + beqz a4, .Ldone + + /* Copy 16 bytes per iteration for word-aligned dst and + unaligned src. */ + ssa8 a3 // set shift amount from byte offset +#if UNALIGNED_ADDRESSES_CHECKED + srli a11, a8, 30 // save unalignment offset for below + sub a3, a3, a11 // align a3 +#endif + l32i a6, a3, 0 // load first word +#if XCHAL_HAVE_LOOPS + loopnez a7, 2f +#else + beqz a7, 2f + slli a10, a7, 4 + add a10, a10, a3 // a10 = end of last 16B source chunk +#endif +1: l32i a7, a3, 4 + l32i a8, a3, 8 + src_b a6, a6, a7 + s32i a6, a5, 0 + l32i a9, a3, 12 + src_b a7, a7, a8 + s32i a7, a5, 4 + l32i a6, a3, 16 + src_b a8, a8, a9 + s32i a8, a5, 8 + addi a3, a3, 16 + src_b a9, a9, a6 + s32i a9, a5, 12 + addi a5, a5, 16 +#if !XCHAL_HAVE_LOOPS + bltu a3, a10, 1b +#endif + +2: bbci.l a4, 3, 3f + + /* Copy 8 bytes. */ + l32i a7, a3, 4 + l32i a8, a3, 8 + src_b a6, a6, a7 + s32i a6, a5, 0 + addi a3, a3, 8 + src_b a7, a7, a8 + s32i a7, a5, 4 + addi a5, a5, 8 + mov a6, a8 + +3: bbci.l a4, 2, 4f + + /* Copy 4 bytes. */ + l32i a7, a3, 4 + addi a3, a3, 4 + src_b a6, a6, a7 + s32i a6, a5, 0 + addi a5, a5, 4 + mov a6, a7 +4: +#if UNALIGNED_ADDRESSES_CHECKED + add a3, a3, a11 // readjust a3 with correct misalignment +#endif + bbsi.l a4, 1, 5f + bbsi.l a4, 0, 6f + leaf_return + + /* Copy 2 bytes. */ +5: l8ui a6, a3, 0 + l8ui a7, a3, 1 + addi a3, a3, 2 + s8i a6, a5, 0 + s8i a7, a5, 1 + addi a5, a5, 2 + bbsi.l a4, 0, 6f +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + + /* Copy 1 byte. */ +6: l8ui a6, a3, 0 + s8i a6, a5, 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + + .end schedule + + .size memcpy, . - memcpy + + + .global esp_libc_include_memcpy_impl + .type esp_libc_include_memcpy_impl, @function +esp_libc_include_memcpy_impl: + .size esp_libc_include_memcpy_impl, . - esp_libc_include_memcpy_impl diff --git a/components/esp_libc/src/picolibc/esp32_psram/memset.S b/components/esp_libc/src/picolibc/esp32_psram/memset.S new file mode 100644 index 0000000000..78d6860052 --- /dev/null +++ b/components/esp_libc/src/picolibc/esp32_psram/memset.S @@ -0,0 +1,208 @@ +/* + * SPDX-FileCopyrightText: 2001-2008 Tensilica Inc. + * + * SPDX-License-Identifier: MIT + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +/* ANSI C standard library function memset. + + Copyright (c) 2001-2008 Tensilica Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include + +#include "xtensa-asm.h" + +/* void *memset (void *dst, int c, size_t length) + + The algorithm is as follows: + + Create a word with c in all byte positions. + + If the destination is aligned, set 16B chunks with a loop, and then + finish up with 8B, 4B, 2B, and 1B stores conditional on the length. + + If the destination is unaligned, align it by conditionally + setting 1B and/or 2B and then go to aligned case. + + This code tries to use fall-through branches for the common + case of an aligned destination (except for the branches to + the alignment labels). */ + + +/* Byte-by-byte set. */ + + .text + .begin schedule + .align XCHAL_INST_FETCH_WIDTH + .literal_position +__memset_aux: + + /* Skip bytes to get proper alignment for three-byte loop */ +.skip XCHAL_INST_FETCH_WIDTH - 3 + +.Lbyteset: +#if XCHAL_HAVE_LOOPS + loopnez a4, 2f +#else + beqz a4, 2f + add a6, a5, a4 // a6 = ending address +#endif +1: s8i a3, a5, 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + addi a5, a5, 1 +#if !XCHAL_HAVE_LOOPS + bltu a5, a6, 1b +#endif +2: leaf_return + + +/* Destination is unaligned. */ + + .align 4 + +.Ldst1mod2: // dst is only byte aligned + + /* Do short sizes byte-by-byte. */ + bltui a4, 8, .Lbyteset + + /* Set 1 byte. */ + s8i a3, a5, 0 + addi a5, a5, 1 + addi a4, a4, -1 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + + /* Now retest if dst is aligned. */ + _bbci.l a5, 1, .Ldstaligned + +.Ldst2mod4: // dst has 16-bit alignment + + /* Do short sizes byte-by-byte. */ + bltui a4, 8, .Lbyteset + + /* Set 2 bytes. */ + s16i a3, a5, 0 + addi a5, a5, 2 + addi a4, a4, -2 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + + /* dst is now aligned; return to main algorithm */ + j .Ldstaligned + + + .align 4 + .global memset + .type memset, @function +memset: + leaf_entry sp, 16 + /* a2 = dst, a3 = c, a4 = length */ + + /* Duplicate character into all bytes of word. */ + extui a3, a3, 0, 8 + slli a7, a3, 8 + or a3, a3, a7 + slli a7, a3, 16 + or a3, a3, a7 + + mov a5, a2 // copy dst so that a2 is return value + + /* Check if dst is unaligned. */ + _bbsi.l a2, 0, .Ldst1mod2 + _bbsi.l a2, 1, .Ldst2mod4 +.Ldstaligned: + + /* Get number of loop iterations with 16B per iteration. */ + srli a7, a4, 4 + +#if XTENSA_ESP32_PSRAM_CACHE_FIX + //do not do this if we have less than one iteration to do + beqz a7, 2f + //this seems to work to prefetch the cache line + s32i a3, a5, 0 + nop +#endif + + /* Destination is word-aligned. */ +#if XCHAL_HAVE_LOOPS + loopnez a7, 2f +#else + beqz a7, 2f + slli a6, a7, 4 + add a6, a6, a5 // a6 = end of last 16B chunk +#endif + /* Set 16 bytes per iteration. */ +1: s32i a3, a5, 0 + s32i a3, a5, 4 + s32i a3, a5, 8 + s32i a3, a5, 12 + addi a5, a5, 16 +#if !XCHAL_HAVE_LOOPS + bltu a5, a6, 1b +#endif + + /* Set any leftover pieces smaller than 16B. */ +2: bbci.l a4, 3, 3f + + /* Set 8 bytes. */ + s32i a3, a5, 0 + s32i a3, a5, 4 + addi a5, a5, 8 + +3: bbci.l a4, 2, 4f + + /* Set 4 bytes. */ + s32i a3, a5, 0 + addi a5, a5, 4 + +4: bbci.l a4, 1, 5f + + /* Set 2 bytes. */ + s16i a3, a5, 0 + addi a5, a5, 2 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + +5: bbci.l a4, 0, 6f + + /* Set 1 byte. */ + s8i a3, a5, 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif +6: leaf_return + + .end schedule + + .size memset, . - memset + + + .global esp_libc_include_memset_impl + .type esp_libc_include_memset_impl, @function +esp_libc_include_memset_impl: + .size esp_libc_include_memset_impl, . - esp_libc_include_memset_impl diff --git a/components/esp_libc/src/picolibc/esp32_psram/strcpy.S b/components/esp_libc/src/picolibc/esp32_psram/strcpy.S new file mode 100644 index 0000000000..397d29f773 --- /dev/null +++ b/components/esp_libc/src/picolibc/esp32_psram/strcpy.S @@ -0,0 +1,289 @@ +/* + * SPDX-FileCopyrightText: 2001-2008 Tensilica Inc. + * + * SPDX-License-Identifier: MIT + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +/* ANSI C standard library function strcpy. + + Copyright (c) 2001-2008 Tensilica Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include + +#include "xtensa-asm.h" + + .text + .begin schedule + .align 4 + .literal_position + .global strcpy + .type strcpy, @function +strcpy: + leaf_entry sp, 16 + /* a2 = dst, a3 = src */ + + mov a10, a2 // leave dst in return value register + movi a4, MASK0 + movi a5, MASK1 + movi a6, MASK2 + movi a7, MASK3 + bbsi.l a3, 0, .Lsrc1mod2 + bbsi.l a3, 1, .Lsrc2mod4 +.Lsrcaligned: + + /* Check if the destination is aligned. */ + movi a8, 3 + bnone a10, a8, .Laligned + + j .Ldstunaligned + +.Lsrc1mod2: // src address is odd + l8ui a8, a3, 0 // get byte 0 + addi a3, a3, 1 // advance src pointer + s8i a8, a10, 0 // store byte 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + beqz a8, 1f // if byte 0 is zero + addi a10, a10, 1 // advance dst pointer + bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned + +.Lsrc2mod4: // src address is 2 mod 4 + l8ui a8, a3, 0 // get byte 0 + /* 1-cycle interlock */ + s8i a8, a10, 0 // store byte 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + beqz a8, 1f // if byte 0 is zero + l8ui a8, a3, 1 // get byte 0 + addi a3, a3, 2 // advance src pointer + s8i a8, a10, 1 // store byte 0 + addi a10, a10, 2 // advance dst pointer +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + bnez a8, .Lsrcaligned +1: leaf_return + + +/* dst is word-aligned; src is word-aligned. */ + + .align 4 +#if XCHAL_HAVE_LOOPS +#if XCHAL_HAVE_DENSITY + /* (2 mod 4) alignment for loop instruction */ +#else + /* (1 mod 4) alignment for loop instruction */ + .byte 0 + .byte 0 +#endif +.Laligned: +#if XCHAL_HAVE_DENSITY + _movi.n a8, 0 // set up for the maximum loop count +#else + _movi a8, 0 // set up for the maximum loop count +#endif + loop a8, .Lz3 // loop forever (almost anyway) + l32i a8, a3, 0 // get word from src + addi a3, a3, 4 // advance src pointer + bnone a8, a4, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero + s32i a8, a10, 0 // store word to dst +#if XTENSA_ESP32_PSRAM_CACHE_FIX + l32i a8, a10, 0 + s32i a8, a10, 0 +#endif + bnone a8, a7, .Lz3 // if byte 3 is zero + addi a10, a10, 4 // advance dst pointer + +#else /* !XCHAL_HAVE_LOOPS */ + +1: addi a10, a10, 4 // advance dst pointer +.Laligned: + l32i a8, a3, 0 // get word from src + addi a3, a3, 4 // advance src pointer + bnone a8, a4, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero + s32i a8, a10, 0 // store word to dst +#if XTENSA_ESP32_PSRAM_CACHE_FIX + l32i a8, a10, 0 + s32i a8, a10, 0 +#endif + + bany a8, a7, 1b // if byte 3 is zero +#endif /* !XCHAL_HAVE_LOOPS */ + +.Lz3: /* Byte 3 is zero. */ + leaf_return + +.Lz0: /* Byte 0 is zero. */ +#ifdef __XTENSA_EB__ + movi a8, 0 +#endif + s8i a8, a10, 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + +.Lz1: /* Byte 1 is zero. */ +#ifdef __XTENSA_EB__ + extui a8, a8, 16, 16 +#endif + s16i a8, a10, 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + +.Lz2: /* Byte 2 is zero. */ +#ifdef __XTENSA_EB__ + extui a8, a8, 16, 16 +#endif + s16i a8, a10, 0 + movi a8, 0 + s8i a8, a10, 2 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + +#if 1 +/* For now just use byte copy loop for the unaligned destination case. */ + + .align 4 +#if XCHAL_HAVE_LOOPS +#if XCHAL_HAVE_DENSITY + /* (2 mod 4) alignment for loop instruction */ +#else + /* (1 mod 4) alignment for loop instruction */ + .byte 0 + .byte 0 +#endif +#endif +.Ldstunaligned: + +#if XCHAL_HAVE_LOOPS +#if XCHAL_HAVE_DENSITY + _movi.n a8, 0 // set up for the maximum loop count +#else + _movi a8, 0 // set up for the maximum loop count +#endif + loop a8, 2f // loop forever (almost anyway) +#endif +1: l8ui a8, a3, 0 + addi a3, a3, 1 + s8i a8, a10, 0 + addi a10, a10, 1 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif +#if XCHAL_HAVE_LOOPS + beqz a8, 2f +#else + bnez a8, 1b +#endif +2: leaf_return + +#else /* 0 */ + +/* This code is not functional yet. */ + +.Ldstunaligned: + l32i a9, a2, 0 // load word from dst +#ifdef __XTENSA_EB__ + ssa8b a9 // rotate by dst alignment so that + src a9, a9, a9 // shift in loop will put back in place + ssa8l a9 // shift left by byte*8 +#else + ssa8l a9 // rotate by dst alignment so that + src a9, a9, a9 // shift in loop will put back in place + ssa8b a9 // shift left by 32-byte*8 +#endif + +/* dst is word-aligned; src is unaligned. */ + +.Ldstunalignedloop: + l32i a8, a3, 0 // get word from src + /* 1-cycle interlock */ + bnone a8, a4, .Lu0 // if byte 0 is zero + bnone a8, a5, .Lu1 // if byte 1 is zero + bnone a8, a6, .Lu2 // if byte 2 is zero + src a9, a8, a9 // combine last word and this word + s32i a9, a10, 0 // store word to dst + bnone a8, a7, .Lu3 // if byte 3 is nonzero, iterate + l32i a9, a3, 4 // get word from src + addi a3, a3, 8 // advance src pointer + bnone a9, a4, .Lu4 // if byte 0 is zero + bnone a9, a5, .Lu5 // if byte 1 is zero + bnone a9, a6, .Lu6 // if byte 2 is zero + src a8, a9, a8 // combine last word and this word + s32i a8, a10, 4 // store word to dst + addi a10, a10, 8 // advance dst pointer + bany a8, a7, .Ldstunalignedloop // if byte 3 is nonzero, iterate + + /* Byte 7 is zero. */ +.Lu7: leaf_return + +.Lu0: /* Byte 0 is zero. */ +#ifdef __XTENSA_EB__ + movi a8, 0 +#endif + s8i a8, a10, 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + +.Lu1: /* Byte 1 is zero. */ +#ifdef __XTENSA_EB__ + extui a8, a8, 16, 16 +#endif + s16i a8, a10, 0 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + +.Lu2: /* Byte 2 is zero. */ + s16i a8, a10, 0 + movi a8, 0 + s8i a8, a10, 2 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + +#endif /* 0 */ + .end schedule + + .size strcpy, . - strcpy + + + .global esp_libc_include_strcpy_impl + .type esp_libc_include_strcpy_impl, @function +esp_libc_include_strcpy_impl: + .size esp_libc_include_strcpy_impl, . - esp_libc_include_strcpy_impl diff --git a/components/esp_libc/src/picolibc/esp32_psram/strncpy.S b/components/esp_libc/src/picolibc/esp32_psram/strncpy.S new file mode 100644 index 0000000000..09c7727aff --- /dev/null +++ b/components/esp_libc/src/picolibc/esp32_psram/strncpy.S @@ -0,0 +1,288 @@ +/* + * SPDX-FileCopyrightText: 2001-2008 Tensilica Inc. + * + * SPDX-License-Identifier: MIT + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +/* ANSI C standard library function strncpy. + + Copyright (c) 2001-2008 Tensilica Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include + +#include "xtensa-asm.h" + + .text +.begin schedule + .align 4 + .literal_position +__strncpy_aux: + +.Lsrc1mod2: // src address is odd + l8ui a8, a3, 0 // get byte 0 + addi a3, a3, 1 // advance src pointer + s8i a8, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + beqz a4, .Lret // if n is zero + addi a10, a10, 1 // advance dst pointer + beqz a8, .Lfill // if byte 0 is zero + bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned + +.Lsrc2mod4: // src address is 2 mod 4 + l8ui a8, a3, 0 // get byte 0 + addi a4, a4, -1 // decrement n + s8i a8, a10, 0 // store byte 0 + beqz a4, .Lret // if n is zero + addi a10, a10, 1 // advance dst pointer + beqz a8, .Lfill // if byte 0 is zero + l8ui a8, a3, 1 // get byte 0 + addi a3, a3, 2 // advance src pointer + s8i a8, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + beqz a4, .Lret // if n is zero + addi a10, a10, 1 // advance dst pointer + bnez a8, .Lsrcaligned + j .Lfill + +.Lret: +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + + + .align 4 + .global strncpy + .type strncpy, @function +strncpy: + leaf_entry sp, 16 + /* a2 = dst, a3 = src */ + + mov a10, a2 // leave dst in return value register + beqz a4, .Lret // if n is zero + + movi a11, MASK0 + movi a5, MASK1 + movi a6, MASK2 + movi a7, MASK3 + bbsi.l a3, 0, .Lsrc1mod2 + bbsi.l a3, 1, .Lsrc2mod4 +.Lsrcaligned: + + /* Check if the destination is aligned. */ + movi a8, 3 + bnone a10, a8, .Laligned + + j .Ldstunaligned + + +/* Fill the dst with zeros -- n is at least 1. */ + +.Lfill: + movi a9, 0 + bbsi.l a10, 0, .Lfill1mod2 + bbsi.l a10, 1, .Lfill2mod4 +.Lfillaligned: + blti a4, 4, .Lfillcleanup + + /* Loop filling complete words with zero. */ +#if XCHAL_HAVE_LOOPS + + srai a8, a4, 2 + loop a8, 1f + s32i a9, a10, 0 + addi a10, a10, 4 + +1: slli a8, a8, 2 + sub a4, a4, a8 + +#else /* !XCHAL_HAVE_LOOPS */ + +1: s32i a9, a10, 0 + addi a10, a10, 4 + addi a4, a4, -4 + bgei a4, 4, 1b + +#endif /* !XCHAL_HAVE_LOOPS */ + + beqz a4, 2f + +.Lfillcleanup: + /* Fill leftover (1 to 3) bytes with zero. */ + s8i a9, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + addi a10, a10, 1 + bnez a4, .Lfillcleanup + +2: +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return + +.Lfill1mod2: // dst address is odd + s8i a9, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + beqz a4, 2b // if n is zero + addi a10, a10, 1 // advance dst pointer + bbci.l a10, 1, .Lfillaligned // if dst is now word-aligned + +.Lfill2mod4: // dst address is 2 mod 4 + s8i a9, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + beqz a4, 2b // if n is zero + s8i a9, a10, 1 // store byte 1 + addi a4, a4, -1 // decrement n + beqz a4, 2b // if n is zero + addi a10, a10, 2 // advance dst pointer + j .Lfillaligned + + +/* dst is word-aligned; src is word-aligned; n is at least 1. */ + + .align 4 +#if XCHAL_HAVE_LOOPS +#if XCHAL_HAVE_DENSITY + /* (2 mod 4) alignment for loop instruction */ +#else + /* (1 mod 4) alignment for loop instruction */ + .byte 0 + .byte 0 +#endif +#endif +.Laligned: +#if XCHAL_HAVE_LOOPS +#if XCHAL_HAVE_DENSITY + _movi.n a8, 0 // set up for the maximum loop count +#else + _movi a8, 0 // set up for the maximum loop count +#endif + loop a8, 1f // loop forever (almost anyway) + blti a4, 5, .Ldstunaligned // n is near limit; do one at a time + l32i a8, a3, 0 // get word from src + addi a3, a3, 4 // advance src pointer + bnone a8, a11, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero + s32i a8, a10, 0 // store word to dst + addi a4, a4, -4 // decrement n + addi a10, a10, 4 // advance dst pointer + bnone a8, a7, .Lfill // if byte 3 is zero +1: + +#else /* !XCHAL_HAVE_LOOPS */ + +1: blti a4, 5, .Ldstunaligned // n is near limit; do one at a time + l32i a8, a3, 0 // get word from src + addi a3, a3, 4 // advance src pointer + bnone a8, a11, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero + s32i a8, a10, 0 // store word to dst + addi a4, a4, -4 // decrement n + addi a10, a10, 4 // advance dst pointer + bany a8, a7, 1b // no zeroes +#endif /* !XCHAL_HAVE_LOOPS */ + + j .Lfill + +.Lz0: /* Byte 0 is zero. */ +#ifdef __XTENSA_EB__ + movi a8, 0 +#endif + s8i a8, a10, 0 + addi a4, a4, -1 // decrement n + addi a10, a10, 1 // advance dst pointer + j .Lfill + +.Lz1: /* Byte 1 is zero. */ +#ifdef __XTENSA_EB__ + extui a8, a8, 16, 16 +#endif + s16i a8, a10, 0 + addi a4, a4, -2 // decrement n + addi a10, a10, 2 // advance dst pointer + j .Lfill + +.Lz2: /* Byte 2 is zero. */ +#ifdef __XTENSA_EB__ + extui a8, a8, 16, 16 +#endif + s16i a8, a10, 0 + movi a8, 0 + s8i a8, a10, 2 + addi a4, a4, -3 // decrement n + addi a10, a10, 3 // advance dst pointer + j .Lfill + + .align 4 +#if XCHAL_HAVE_LOOPS +#if XCHAL_HAVE_DENSITY + /* (2 mod 4) alignment for loop instruction */ +#else + /* (1 mod 4) alignment for loop instruction */ + .byte 0 + .byte 0 +#endif +#endif +.Ldstunaligned: + +#if XCHAL_HAVE_LOOPS +#if XCHAL_HAVE_DENSITY + _movi.n a8, 0 // set up for the maximum loop count +#else + _movi a8, 0 // set up for the maximum loop count +#endif + loop a8, 2f // loop forever (almost anyway) +#endif +1: l8ui a8, a3, 0 + addi a3, a3, 1 +#if XTENSA_ESP32_PSRAM_CACHE_FIX + nop + nop + nop +#endif + s8i a8, a10, 0 + addi a4, a4, -1 + beqz a4, 3f + addi a10, a10, 1 +#if XCHAL_HAVE_LOOPS + beqz a8, 2f +#else + bnez a8, 1b +#endif +2: j .Lfill + +3: +#if XTENSA_ESP32_PSRAM_CACHE_FIX + memw +#endif + leaf_return +.end schedule + + .size strncpy, . - strncpy + + .global esp_libc_include_strncpy_impl + .type esp_libc_include_strncpy_impl, @function +esp_libc_include_strncpy_impl: + .size esp_libc_include_strncpy_impl, . - esp_libc_include_strncpy_impl diff --git a/components/esp_libc/src/picolibc/esp32_psram/xtensa-asm.h b/components/esp_libc/src/picolibc/esp32_psram/xtensa-asm.h new file mode 100644 index 0000000000..61efcf7cc1 --- /dev/null +++ b/components/esp_libc/src/picolibc/esp32_psram/xtensa-asm.h @@ -0,0 +1,78 @@ +/* + * SPDX-FileCopyrightText: 2006 Tensilica Inc. + * + * SPDX-License-Identifier: MIT + * + * SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD + */ +/* Copyright (c) 2006 Tensilica Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/* Define macros for leaf function entry and return, supporting either the + * standard register windowed ABI or the non-windowed call0 ABI. These + * macros do not allocate any extra stack space, so they only work for + * leaf functions that do not need to spill anything to the stack. */ + +#include + +.macro leaf_entry reg, size +#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ +entry \reg, \size +#else +/* do nothing */ +#endif +.endm + +.macro leaf_return +#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ +retw +#else +ret +#endif +.endm + +.macro src_b r, w0, w1 +#ifdef __XTENSA_EB__ +src \r, \w0, \w1 +#else +src \r, \w1, \w0 +#endif +.endm + +.macro ssa8 r +#ifdef __XTENSA_EB__ +ssa8b \r +#else +ssa8l \r +#endif +.endm + +#if XCHAL_HAVE_BE +#define MASK0 0xff000000 +#define MASK1 0x00ff0000 +#define MASK2 0x0000ff00 +#define MASK3 0x000000ff +#else +#define MASK0 0x000000ff +#define MASK1 0x0000ff00 +#define MASK2 0x00ff0000 +#define MASK3 0xff000000 +#endif diff --git a/tools/ci/check_copyright_config.yaml b/tools/ci/check_copyright_config.yaml index 1df236acf8..865c00fd8b 100644 --- a/tools/ci/check_copyright_config.yaml +++ b/tools/ci/check_copyright_config.yaml @@ -188,6 +188,14 @@ xtensa: - Apache-2.0 #Files added to the xtensa component by us - MIT #Cadence sources +# TODO IDF-15041 - remove this block +esp_libc: + include: + - 'components/esp_libc/**' + allowed_licenses: + - Apache-2.0 #Files added to the esp_libc component by us + - MIT #Cadence sources + tinyusb: include: - 'examples/peripherals/usb/device/tusb_midi/'