Merge branch 'feature/esp32s31_pie_coproc_old_commit' into 'master'

feat: add support for PIE coprocessor on the ESP32-S31

Closes IDF-14867 and IDF-14661

See merge request espressif/esp-idf!45851
This commit is contained in:
Omar Chebib
2026-04-01 15:46:01 +08:00
6 changed files with 292 additions and 28 deletions
@@ -839,6 +839,7 @@ RvCoprocSaveArea* pxPortGetCoprocArea(StaticTask_t* task, bool allocate, int cop
*
* @param coreid Current core
* @param coproc Coprocessor to save context of
* @param owner New owner of the coprocessor. Can be NULL to clear the owner.
*
* @returns Coprocessor former owner's save area, can be NULL if there was no owner yet, can be -1 if
* the former owner is the same as the new owner.
@@ -875,6 +876,67 @@ void vPortCoprocUsedInISR(void* frame)
xt_unhandled_exception(frame);
}
#if CONFIG_IDF_TARGET_ESP32S31
/* On the ESP32-S31, the PIE is only available on core 1, so we need to perform a few checks when core 0 uses a PIE instruction.
* the functions here will help us with that. */
/**
* @brief Called when a task uses a PIE instruction on core 0.
*
* @param task Task that used the PIE instruction
* @param frame Frame of the PIE instruction
*
* @returns The context to save with the current FPU context when the current task was the FPU owner,
* NULL if the task is not the owner of the FPU on the current core.
*/
void* vPortTaskUsedPIEOnCPU0(StaticTask_t* task, void* frame)
{
#if CONFIG_FREERTOS_UNICORE
g_panic_abort = true;
g_panic_abort_details = (char *) "ERROR: PIE coprocessor is not supported in unicore configuration!\n";
xt_unhandled_exception(frame);
return NULL;
#else
void* context_to_save = NULL;
/* Make sure we are not in an interrupt context nor in a critical section */
if (xPortInIsrContext()) {
/* We are in an interrupt context, abort */
vPortCoprocUsedInISR(frame);
}
/* Since we count on crosscore interrupt to reschedule the current task, we must not be in a
* critical section. In other words, we must be able to yield */
if (!xPortCanYield()) {
g_panic_abort = true;
g_panic_abort_details = (char *) "ERROR: PIE coprocessor must not be used in critical sections!\n";
xt_unhandled_exception(frame);
}
/* Check if the current task is the owner of the FPU on the current core. No need to make the following two instructions
* atomic since we are in an exception context, we can't be interrupted by an interrupt. */
if (port_uxCoprocOwner[0][FPU_COPROC_IDX] == task) {
/* Task is the owner of the FPU on the current core, set the new owner to NULL and return the save area to fill */
RvCoprocSaveArea* sa = pxPortUpdateCoprocOwner(0, FPU_COPROC_IDX, NULL);
/* `sa` is not NULL here for sure */
context_to_save = sa->sa_coprocs[FPU_COPROC_IDX];
}
/* Migrate the task to core 1. NOTE: This will override any existing pinning of the task */
vPortTaskPinToCore(task, 1);
/* Raised an error if the scheduler is NOT running on core 0 */
if (!port_xSchedulerRunning[0]) {
/* Scheduler is not running on core 0, raise an error */
g_panic_abort = true;
g_panic_abort_details = (char *) "ERROR: Scheduler is not running on core 0, task must migrate to core 1!\n";
xt_unhandled_exception(frame);
}
/* Send a cross-core interrupt on the current core, it won't be triggered until we return from the exception handler */
esp_crosscore_int_send_yield(0);
return context_to_save;
#endif /* CONFIG_FREERTOS_UNICORE */
}
#endif /* CONFIG_IDF_TARGET_ESP32S31 */
#endif /* SOC_CPU_COPROC_NUM > 0 */
/* ------------------------------------------------ Run Time Stats ------------------------------------------------- */
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2015-2025 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2015-2026 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -544,6 +544,29 @@ generate_coprocessor_routine fpu, FPU_COPROC_IDX, fpu_enable, fpu_save_regs, fpu
#endif /* SOC_CPU_COPROC_NUM > 0 */
#if CONFIG_IDF_TARGET_ESP32S31
/* On the ESP32-S31, the PIE is only available on core 1, so whenever the core 0 uses a PIE instruction, we need to migrate the task to core 1.
* If the task ever used the FPU (and is currently the owner), we need to flush the context to the Task's coproc save area. */
.extern vPortTaskUsedPIEOnCPU0
.global rtos_pie_used_cpu0
.type rtos_pie_used_cpu0, @function
rtos_pie_used_cpu0:
/* Task context on core 0 */
mv s0, ra
lw a0, pxCurrentTCBs // a0 = task
mv a1, sp // a1 = frame
call vPortTaskUsedPIEOnCPU0
/* Returns the context to save the current FPU context, or NULL if the task is not the owner of the FPU on the current core */
beqz a0, rtos_pie_used_cpu0_end
/* As a precaution, enable the FPU. It will be restored to the initialize state when exiting
* the exception handler (mstatus will be restored) */
fpu_enable a1
fpu_save_regs a0
rtos_pie_used_cpu0_end:
jr s0
.size rtos_pie_used_cpu0, .-rtos_pie_used_cpu0
#endif /* CONFIG_IDF_TARGET_ESP32S31 */
/**
* @brief Get current TCB on current core
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2024-2026 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -25,6 +25,15 @@
*/
void pie_vector_signed_add(const int32_t a[4], const int32_t b[4], int32_t dst[4]);
/**
* @brief Performs a XOR operation on two 4-word vectors using the PIE.
*
* @param a First vector
* @param b Second vector
* @param dst Destination to store the result
*/
void pie_vector_xor(const int32_t a[4], const int32_t b[4], int32_t dst[4]);
/* ------------------------------------------------------------------------------------------------------------------ */
typedef struct {
@@ -146,22 +155,23 @@ Expected:
#define TEST_UNPINNED_NUM_ITERS 5
static void check_core_affinity(void)
{
#if CONFIG_IDF_TARGET_ESP32S31
/* On the ESP32-S31, only core 1 has PIE support, so all tasks should be pinned to core 1 */
BaseType_t core_num = 1;
#else
BaseType_t core_num = xTaskGetCoreID(NULL);
#endif
TEST_ASSERT_EQUAL(core_num, xTaskGetCoreID(NULL));
}
static void unpinned_task(void *arg)
{
// Disable scheduling/preemption to make sure current core ID doesn't change
#if ( ( CONFIG_FREERTOS_SMP ) && ( !CONFIG_FREERTOS_UNICORE ) )
vTaskPreemptionDisable(NULL);
#else
vTaskSuspendAll();
#endif
BaseType_t cur_core_num = xPortGetCoreID();
// Check that the task is unpinned
#if !CONFIG_FREERTOS_UNICORE
#if CONFIG_FREERTOS_SMP
TEST_ASSERT_EQUAL(tskNO_AFFINITY, vTaskCoreAffinityGet(NULL));
#else
TEST_ASSERT_EQUAL(tskNO_AFFINITY, xTaskGetCoreID(NULL));
#endif
#endif // !CONFIG_FREERTOS_UNICORE
int32_t a[4] = { 0, 1, 2, 3};
@@ -175,18 +185,8 @@ static void unpinned_task(void *arg)
}
#if !CONFIG_FREERTOS_UNICORE
#if CONFIG_FREERTOS_SMP
TEST_ASSERT_EQUAL(1 << cur_core_num, vTaskCoreAffinityGet(NULL));
#else
TEST_ASSERT_EQUAL(cur_core_num, xTaskGetCoreID(NULL));
#endif
check_core_affinity();
#endif // !CONFIG_FREERTOS_UNICORE
// Re-enable scheduling/preemption
#if ( ( CONFIG_FREERTOS_SMP ) && ( !CONFIG_FREERTOS_UNICORE ) )
vTaskPreemptionEnable(NULL);
#else
xTaskResumeAll();
#endif
// Indicate done and self delete
xTaskNotifyGive((TaskHandle_t)arg);
@@ -253,6 +253,108 @@ TEST_CASE("PIE: Unsolicited context switch between tasks using the PIE", "[freer
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
}
#if CONFIG_IDF_TARGET_ESP32S31
static void core0_fpu_pie(void *arg)
{
/* Use the FPU in this task and make sure we are pinned to core 0 */
float f = 1.0f;
for (int i = 0; i < 10; i++) {
f = f * 2.0f;
}
TEST_ASSERT_EQUAL(1024.0f, f);
/* Check that the task is pinned to core 0 */
TEST_ASSERT_EQUAL(0, xTaskGetCoreID(NULL));
/* Use the PIE in this task and make sure we are migrated to core 1 */
int32_t a[4] = { 0, 1, 2, 3};
int32_t b[4] = { 111, 222, 333, 444 };
int32_t dst[4] = { 0 };
pie_vector_signed_add(a, b, dst);
TEST_ASSERT_EQUAL(1, xTaskGetCoreID(NULL));
for (int i = 0; i < sizeof(a) / sizeof(uint32_t); i++) {
TEST_ASSERT_EQUAL(dst[i], (a[i] + b[i]));
}
/* Try to use the FPU again on the new core */
f = f * 2.0f;
TEST_ASSERT_EQUAL(2048.0f, f);
/* Indicate done and self delete */
xTaskNotifyGive((TaskHandle_t)arg);
vTaskDelete(NULL);
}
TEST_CASE("PIE: Usage in a task that owns the FPU on core 0", "[freertos]")
{
/* Make sure that a task that owns the FPU on core 0 can be migrated to core 1 when using the PIE */
TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle();
xTaskCreatePinnedToCore(core0_fpu_pie, "core0_fpu_pie", 4096,
(void*) unity_task_handle, UNITY_FREERTOS_PRIORITY + 1, NULL, 0);
// Wait for task to complete
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
vTaskDelay(10); // Short delay to allow task memory to be freed
}
#define PIE_INSTR_COUNT 2
static void core0_pie_instr_task(void *arg)
{
pie_params_t *param = (pie_params_t*) arg;
switch (param->cst) {
case 0: {
int32_t a = 1;
int32_t b = 2;
int32_t dst = 0;
/* Opcode of type 0b0110011 */
__asm__ volatile("esp.addx2 %0, %1, %2" : "=r"(dst) : "r"(a), "r"(b));
break;
}
case 1: {
int32_t a[4] = { 0xffffff, 0xaaaaaaaa, 0x11111111, 0xffffffff };
int32_t b[4] = { 0x11111111, 0xaaaaaaaa, 0xffffff, 0xffffffff };
int32_t dst[4] = { 0 };
/* Opcode of type 0bxx11x11 (x: don't care) */
pie_vector_xor(a, b, dst);
break;
}
}
/* Indicate done and wait for deletion */
xTaskNotifyGive(param->main);
vTaskDelay(portMAX_DELAY);
}
/**
* @brief On the ESP32-S3, core 0 does not natively parse PIE instructions due to a hardware quirk.
* As such, the EXT_ILL CSR will not have the PIE bit set when a PIE instruction executes on core 0.
* To handle this, instruction parsing is performed in software. This test case ensures that the software
* parser correctly recognizes PIE instructions.
*/
TEST_CASE("PIE: Core 0 parses PIE instructions properly", "[freertos]")
{
TaskHandle_t task_handle;
pie_params_t param = { 0 };
param.main = xTaskGetCurrentTaskHandle();
/* Create tasks that will run on core 0 only. Each of them will execute one PIE instruction.
* If they don't crash, it's a success.*/
for (int i = 0; i < PIE_INSTR_COUNT; i++) {
param.cst = i;
xTaskCreatePinnedToCore(core0_pie_instr_task, "Core0PIE", 2048, &param, UNITY_FREERTOS_PRIORITY + 1, &task_handle, 0);
/* Wait for the task to complete */
ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
/* Delete the task and start again */
vTaskDelete(task_handle);
}
}
#endif /* CONFIG_IDF_TARGET_ESP32S31 */
#endif /* CONFIG_FREERTOS_NUMBER_OF_CORES > 1 */
#endif /* SOC_CPU_HAS_PIE */
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2024-2026 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -47,4 +47,23 @@ pie_vector_signed_add:
ret
.size pie_vector_signed_add, .-pie_vector_signed_add
/**
* @brief Performs a XOR operation on two 4-word vectors using the PIE.
*
* @param a0 First vector
* @param a1 Second vector
* @param a2 Destination to store the result
*/
.type pie_vector_unsigned_add, @function
.global pie_vector_xor
pie_vector_xor:
esp.vld.128.ip q0, a0, 0
esp.vld.128.ip q1, a1, 0
esp.xorq q2, q0, q1
esp.vst.128.ip q2, a2, 0
ret
.size pie_vector_xor, .-pie_vector_xor
#endif /* SOC_CPU_HAS_PIE */
+27 -3
View File
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2017-2025 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2017-2026 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@@ -264,6 +264,29 @@ _panic_handler:
csrrw a0, EXT_ILL_CSR, zero
#if SOC_CPU_HAS_PIE
/* On the ESP32-S31, the PIE is only available on core 1, so on core 0 we need to parse the
* mtval to check if the PIE bit is set. */
#if CONFIG_IDF_TARGET_ESP32S31
csrr a1, mhartid
bnez a1, _ill_check_pie_bit
/* Check if the instruction is a PIE instruction. The PIE instructions opcodes (lowest 7 bits) are of
* the type:
* 0110011
* xx11x11 (x: don't care)
*/
csrr t0, mtval
/* Start with the special case 0110011 */
andi a1, t0, 0b1111111
addi a1, a1, -0b0110011
beqz a1, rtos_pie_used_cpu0
/* Continue with the opcodes that only care about bits 0, 1, 3 and 4 */
andi t0, t0, 0b0011011
addi a1, t0, -0b0011011
beqz a1, rtos_pie_used_cpu0
/* Not PIE instruction, continue the exception, we can fall-through, the EXT_ILL CSR
* will not have the PIE bit set anyway, we save a branch instruction. */
_ill_check_pie_bit:
#endif
/* Check if the PIE bit is set. */
andi a1, a0, EXT_ILL_RSN_PIE
bnez a1, rtos_save_pie_coproc
@@ -278,8 +301,9 @@ _panic_handler:
/* We cannot check the HWLP bit in a0 since a hardware bug may set this bit even though no HWLP
* instruction was executed in the program at all, so check mtval (`t0`) */
#if SOC_CPU_HAS_HWLOOP
/* HWLP instructions all have an opcode of 0b0101011 */
andi a1, t0, 0b1111111
/* HWLP instructions all have an opcode of 0b0101011 */
csrr t0, mtval
andi a1, t0, 0b1111111
addi a1, a1, -0b0101011
bnez a1, hwlp_not_used
/* HWLP used in an ISR, abort */
@@ -432,6 +432,40 @@ Misc
ESP targets that contain an FPU do not support hardware acceleration for double precision floating point arithmetic (``double``). Instead, ``double`` is implemented via software, hence the behavioral restrictions regarding the ``float`` type do not apply to ``double``. Note that due to the lack of hardware acceleration, ``double`` operations may consume significantly more CPU time in comparison to ``float``.
.. only:: SOC_CPU_HAS_PIE
PIE / AI Coprocessor Usage
^^^^^^^^^^^^^^^^^^^^^^^^^^
Like the Floating Point Unit (FPU), IDF FreeRTOS implements **Lazy Context Switching** for the PIE coprocessor. On a context switch, PIE registers remain untouched until a task executes a PIE instruction. Once a task uses the PIE coprocessor, it is **pinned to the current core**.
.. only:: esp32s31
.. note::
On ESP32-S31, the PIE coprocessor is available **only on Core 1**. If a task executes a PIE instruction while running on Core 0, IDF FreeRTOS migrates the task to Core 1 and pins it there. This migration **overrides** any existing core affinity.
Because of this migration, tasks must **not** use the PIE coprocessor within a critical section or ISR, as doing so will cause a runtime abort.
.. only:: SOC_CPU_HAS_HWLOOP
Hardware Loop (HWLP) Usage
^^^^^^^^^^^^^^^^^^^^^^^^^^
In IDF FreeRTOS, the Hardware Loop (HWLP) unit is handled differently from other coprocessors: it does **not** use Lazy Context Switching.
When a task uses the HWLP and a context switch occurs, the HWLP registers are saved immediately during the interrupt entry path. Later, if the same task is switched back in, all HWLP registers are restored immediately.
In practice, this means that any task that has ever used the HWLP will always have an additional overhead on both context switch out and switch in.
.. only:: SOC_CPU_HAS_DSP
DSP Coprocessor Usage
^^^^^^^^^^^^^^^^^^^^^
On targets that feature the DSP coprocessor, context switching follows the same lazy scheme as the FPU: the coprocessor state is not saved until another task on the same core uses it or the task is switched to another core. When a task uses the DSP coprocessor, IDF FreeRTOS will automatically **pin the task to the current core** it is running on. The DSP coprocessor must not be used from within an interrupt context.
.. -------------------------------------------------- Single Core -----------------------------------------------------
.. _freertos-idf-single-core: