Merge branch 'feat/add_deadlock_debug_feature' into 'master'

feat(openthread): add task block monitor to detect OpenThread mainloop

See merge request espressif/esp-idf!44526
This commit is contained in:
Xu Si Yu
2026-03-12 16:54:27 +08:00
10 changed files with 243 additions and 7 deletions
+22
View File
@@ -623,5 +623,27 @@ menu "OpenThread"
information whenever an OpenThread assert occurs. This can help developers
analyze unexpected failures by providing additional MAC layer context.
config OPENTHREAD_TASK_BLOCK_MONITOR
depends on OPENTHREAD_DEBUG
bool "Enable OpenThread Task Block Monitor"
default n
help
Enable monitoring of OpenThread tasks to detect if a task is blocked
and unable to continue its normal execution loop. Useful for debugging
issues where tasks stop progressing due to deadlocks or resource waits.
On RISC-V targets, in order to get meaningful backtraces when a task is
detected as blocked, ESP_SYSTEM_USE_FRAME_POINTER must be enabled in
ESP System Settings -> Backtracing method.
config OPENTHREAD_TASK_BLOCK_MONITOR_TIMEOUT
depends on OPENTHREAD_TASK_BLOCK_MONITOR
int "Task Block Monitor Timeout (seconds)"
range 1 60
default 20
help
Set the timeout (in seconds) for detecting a blocked task.
The timer is started at the beginning of each task main loop
and stopped at the end. If the timer expires, the task is considered blocked.
endmenu
@@ -0,0 +1,55 @@
/*
* SPDX-FileCopyrightText: 2026 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <esp_err.h>
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#ifdef __cplusplus
extern "C" {
#endif
#if CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR
/**
* @brief This function creates the OpenThread task block monitor.
* The monitor task periodically checks whether the OpenThread mainloop
* has been blocked for more than `CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR_TIMEOUT`
* seconds and prints backtraces when a block is detected.
*
* @return
* - ESP_OK on success
* - ESP_FAIL if the monitor task cannot be created
*
*/
esp_err_t esp_openthread_task_block_monitor_create(void);
/**
* @brief This function deletes the OpenThread task block monitor.
*
* @return
* - ESP_OK on success
*
*/
esp_err_t esp_openthread_task_block_monitor_delete(void);
/**
* @brief This function enables or disables the OpenThread task block monitor.
*
* @note When enabled, the latest mainloop activity timestamp is updated and the
* monitor task starts checking for blocking. When disabled, monitoring is
* temporarily stopped.
*
* @param[in] enable True to enable the monitor, false to disable it.
*
*/
void esp_openthread_task_block_monitor_set(bool enable);
#endif // CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR
#ifdef __cplusplus
}
#endif
+16 -3
View File
@@ -18,6 +18,7 @@
#include "esp_openthread_platform.h"
#include "esp_openthread_sleep.h"
#include "esp_openthread_state.h"
#include "esp_openthread_debug.h"
#include "esp_openthread_task_queue.h"
#include "esp_openthread_types.h"
#include "freertos/FreeRTOS.h"
@@ -27,7 +28,6 @@
#include "openthread/netdata.h"
#include "openthread/tasklet.h"
#include "openthread/thread.h"
#include <cstddef>
#if CONFIG_OPENTHREAD_FTD
#include "openthread/dataset_ftd.h"
@@ -186,6 +186,10 @@ esp_err_t esp_openthread_launch_mainloop(void)
esp_err_t error = ESP_OK;
s_ot_mainloop_running = true;
#if CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR
ESP_ERROR_CHECK(esp_openthread_task_block_monitor_create());
#endif
while (s_ot_mainloop_running) {
FD_ZERO(&mainloop.read_fds);
FD_ZERO(&mainloop.write_fds);
@@ -206,8 +210,14 @@ esp_err_t esp_openthread_launch_mainloop(void)
#endif /* CONFIG_FREERTOS_USE_TICKLESS_IDLE && CONFIG_OPENTHREAD_RADIO_NATIVE */
esp_openthread_lock_release();
if (select(mainloop.max_fd + 1, &mainloop.read_fds, &mainloop.write_fds, &mainloop.error_fds,
&mainloop.timeout) >= 0) {
#if CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR
esp_openthread_task_block_monitor_set(false);
#endif
int result = select(mainloop.max_fd + 1, &mainloop.read_fds, &mainloop.write_fds, &mainloop.error_fds, &mainloop.timeout);
#if CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR
esp_openthread_task_block_monitor_set(true);
#endif
if (result >= 0) {
esp_openthread_lock_acquire(portMAX_DELAY);
error = esp_openthread_platform_process(instance, &mainloop);
while (otTaskletsArePending(instance)) {
@@ -224,6 +234,9 @@ esp_err_t esp_openthread_launch_mainloop(void)
break;
}
}
#if CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR
ESP_ERROR_CHECK(esp_openthread_task_block_monitor_delete());
#endif
return error;
}
@@ -0,0 +1,133 @@
/*
* SPDX-FileCopyrightText: 2026 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "sdkconfig.h"
#include <stdatomic.h>
#include <stdbool.h>
#include "esp_check.h"
#include "esp_log.h"
#include "esp_timer.h"
#include "esp_debug_helpers.h"
#include "esp_openthread_debug.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#if CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR
#include "esp_private/eh_frame_parser.h"
#include "esp_private/esp_system_attr.h"
#include "esp_private/esp_cpu_internal.h"
#include "esp_private/fp_unwind.h"
#include "esp_private/panic_internal.h"
#include "freertos/freertos_debug.h"
static const char *TAG = "OT_DEBUG";
static TaskHandle_t s_ot_task_block_monitor_task = NULL;
static atomic_llong s_ot_task_block_monitor_latest_time = 0;
static atomic_bool s_ot_task_block_monitor_active = false;
static const int64_t s_ot_task_block_monitor_timeout = CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR_TIMEOUT * 1000000;
static bool is_backtrace_printed = false;
#if CONFIG_IDF_TARGET_ARCH_RISCV
static esp_err_t ESP_SYSTEM_IRAM_ATTR esp_task_backtrace_print(TaskHandle_t target_task)
{
TaskSnapshot_t snapshot = {};
void *frame = NULL;
vTaskGetSnapshot(target_task, &snapshot);
frame = snapshot.pxTopOfStack;
char *name = pcTaskGetName(target_task);
ESP_LOGI(TAG, "Target Task Backtrace: %s", name ? name : "No Name");
#if CONFIG_ESP_SYSTEM_USE_EH_FRAME
esp_eh_frame_print_backtrace(frame);
#elif CONFIG_ESP_SYSTEM_USE_FRAME_POINTER
esp_fp_print_backtrace(frame);
#else
esp_cpu_frame_t backtrace_frame = {};
const int current_core = xPortGetCoreID();
memset(&backtrace_frame, 0, sizeof(esp_cpu_frame_t));
memcpy(&backtrace_frame, frame, sizeof(esp_cpu_frame_t));
panic_prepare_frame_from_ctx(&backtrace_frame);
panic_print_registers(&backtrace_frame, current_core);
esp_rom_printf("\r\n");
esp_rom_printf("Please enable CONFIG_ESP_SYSTEM_USE_FRAME_POINTER option to have a full backtrace.\r\n");
#endif
return ESP_OK;
}
#endif
static void ot_debug_monitor_task(void *arg)
{
(void)arg;
while (true) {
if (atomic_load(&s_ot_task_block_monitor_active)) {
int64_t elapsed = esp_timer_get_time() - atomic_load(&s_ot_task_block_monitor_latest_time);
if (elapsed > s_ot_task_block_monitor_timeout && !is_backtrace_printed) {
is_backtrace_printed = true;
ESP_LOGW(TAG,
"OpenThread mainloop blocked for more than %d seconds, printing all tasks backtrace",
CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR_TIMEOUT);
#if CONFIG_IDF_TARGET_ARCH_XTENSA
esp_backtrace_print_all_tasks(10);
#elif CONFIG_IDF_TARGET_ARCH_RISCV
TaskHandle_t handle = xTaskGetHandle(CONFIG_OPENTHREAD_TASK_NAME);
if (handle) {
esp_task_backtrace_print(handle);
}
#endif
} else if (elapsed <= s_ot_task_block_monitor_timeout) {
is_backtrace_printed = false;
}
}
vTaskDelay(pdMS_TO_TICKS(1000));
}
}
esp_err_t esp_openthread_task_block_monitor_create(void)
{
ESP_RETURN_ON_FALSE(s_ot_task_block_monitor_task == NULL, ESP_OK, TAG, "Task block monitor already created");
BaseType_t ret = xTaskCreate(ot_debug_monitor_task, "ot_task_monitor", 3072, NULL, CONFIG_OPENTHREAD_TASK_PRIORITY, &s_ot_task_block_monitor_task);
ESP_RETURN_ON_FALSE(ret == pdPASS, ESP_FAIL, TAG, "Failed to create OpenThread task block monitor");
ESP_LOGI(TAG, "OpenThread task block monitor created");
esp_openthread_task_block_monitor_set(true);
return ESP_OK;
}
esp_err_t esp_openthread_task_block_monitor_delete(void)
{
ESP_RETURN_ON_FALSE(s_ot_task_block_monitor_task != NULL, ESP_OK, TAG, "Task block monitor not created");
esp_openthread_task_block_monitor_set(false);
vTaskDelete(s_ot_task_block_monitor_task);
s_ot_task_block_monitor_task = NULL;
ESP_LOGI(TAG, "OpenThread task block monitor deleted");
return ESP_OK;
}
void esp_openthread_task_block_monitor_set(bool enable)
{
if (enable) {
atomic_store(&s_ot_task_block_monitor_latest_time, esp_timer_get_time());
atomic_store(&s_ot_task_block_monitor_active, true);
} else {
atomic_store(&s_ot_task_block_monitor_active, false);
}
}
#endif // CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR
@@ -29,6 +29,8 @@ examples/openthread/ot_br:
enable:
- if: ((SOC_WIFI_SUPPORTED == 1 and IDF_TARGET != "esp32c61") or IDF_TARGET == "esp32p4") and CONFIG_NAME != "native_radio"
- if: SOC_WIFI_SUPPORTED == 1 and (SOC_IEEE802154_SUPPORTED == 1 and CONFIG_NAME == "native_radio")
disable:
- if: IDF_TARGET in ["esp32", "esp32s2", "esp32s3"] and CONFIG_NAME == "br_debug_riscv"
disable_test:
- if: IDF_TARGET not in ["esp32s3"]
reason: only test on esp32s3
+3 -3
View File
@@ -1,5 +1,5 @@
# Name, Type, SubType, Offset, Size, Flags
# Note: if you have increased the bootloader size, make sure to update the offsets to avoid overlap
nvs, data, nvs, 0x9000, 0x6000,
phy_init, data, phy, 0xf000, 0x1000,
factory, app, factory, 0x10000, 1900K,
nvs, data, nvs, , 0x6000,
phy_init, data, phy, , 0x1000,
factory, app, factory, , 2000K,
1 # Name, Type, SubType, Offset, Size, Flags
2 # Note: if you have increased the bootloader size, make sure to update the offsets to avoid overlap
3 nvs, data, nvs, 0x9000, 0x6000, nvs, data, nvs, , 0x6000,
4 phy_init, data, phy, 0xf000, 0x1000, phy_init, data, phy, , 0x1000,
5 factory, app, factory, 0x10000, 1900K, factory, app, factory, , 2000K,
@@ -0,0 +1,2 @@
CONFIG_OPENTHREAD_DEBUG=y
CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR=y
@@ -0,0 +1,3 @@
CONFIG_ESP_SYSTEM_USE_FRAME_POINTER=y
CONFIG_OPENTHREAD_DEBUG=y
CONFIG_OPENTHREAD_TASK_BLOCK_MONITOR=y
+5 -1
View File
@@ -4,7 +4,6 @@
CONFIG_PARTITION_TABLE_CUSTOM=y
CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="partitions.csv"
CONFIG_PARTITION_TABLE_FILENAME="partitions.csv"
CONFIG_PARTITION_TABLE_OFFSET=0x8000
CONFIG_PARTITION_TABLE_MD5=y
# end of Partition Table
@@ -57,3 +56,8 @@ CONFIG_EXAMPLE_CONNECT_THREAD=n
CONFIG_ESP_SYSTEM_EVENT_TASK_STACK_SIZE=3584
CONFIG_ESP_MAIN_TASK_STACK_SIZE=6144
# end of ESP System Settings
#
# Serial flasher config
#
CONFIG_ESPTOOLPY_FLASHSIZE_4MB=y
@@ -5,3 +5,5 @@ CONFIG_OPENTHREAD_BORDER_ROUTER_AUTO_START=y
# Enable PPP support as a workaround to ensure LWIP thread-lib compatibility for Ethernet builds
CONFIG_LWIP_PPP_SUPPORT=y
CONFIG_LWIP_PPP_SERVER_SUPPORT=y
# Increase size of bootloader due to frame pointer. Only overflowed on P4.
CONFIG_PARTITION_TABLE_OFFSET=0x9000