From 7e09b471ab9fcf126558941d3ec3aadb4edf22c8 Mon Sep 17 00:00:00 2001 From: Jakub Kocka Date: Tue, 23 Sep 2025 09:59:27 +0200 Subject: [PATCH] fix: Updated locale handling to prevent issues with build tools Closes https://github.com/espressif/esp-idf/issues/17542 --- tools/idf.py | 109 +++++++++++++++++++++---------- tools/test_idf_py/test_idf_py.py | 37 +++++++++++ 2 files changed, 112 insertions(+), 34 deletions(-) diff --git a/tools/idf.py b/tools/idf.py index 6ab1d6a983..50b389a1a7 100755 --- a/tools/idf.py +++ b/tools/idf.py @@ -978,47 +978,83 @@ def expand_file_arguments(argv: list[Any]) -> list[Any]: return argv -def _valid_unicode_config() -> codecs.CodecInfo | bool: - # With python 3 unicode environment is required +def _valid_unicode_config() -> bool: try: - return codecs.lookup(locale.getpreferredencoding()).name != 'ascii' + _, encoding = locale.getlocale() + if encoding is None: + return False + encoding_name = codecs.lookup(encoding).name + # Check if the encoding is Unicode-capable (UTF-8, UTF-16, UTF-32 variants) + return encoding_name.startswith(('utf-8', 'utf-16', 'utf-32')) except Exception: return False def _find_usable_locale() -> str: - try: - locales = ( - subprocess.Popen(['locale', '-a'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - .communicate()[0] - .decode('ascii', 'replace') - ) - except OSError: - locales = '' + """ + Find the best locale for Unicode support. + All the locales from the locale.locale_alias are checked if present on the system. + The unicode locales are filtered. + The best locale is selected based on the user preferred locale, then c.utf8, then universal, then any english. + """ + locale_name, encoding = locale.getlocale() - usable_locales: list[str] = [] - for line in locales.splitlines(): - locale = line.strip() - locale_name = locale.lower().replace('-', '') + err_msg = ( + 'Support for Unicode is required, ' + 'locale ' + f'{(str(locale_name) + " with " + str(encoding)) if locale_name is not None else "default"} ' + 'encoding found on your system is not sufficient.' + ' Please refer to the manual for your operating system for details on locale installation.' + ) - # C.UTF-8 is the best option, if supported - if locale_name == 'c.utf8': - return locale + if locale_name is None: + raise FatalError(err_msg) - if locale_name.endswith('.utf8'): - # Make a preference of english locales - if locale.startswith('en_'): - usable_locales.insert(0, locale) - else: - usable_locales.append(locale) + # (priority, locale_name, encoding) - lower priority = higher preference + best_locale: tuple[int, str, str] | None = None + found_locale: tuple[int, str, str] | None = None + for lcl in locale.locale_alias.items(): + try: + # try to set all encodings from the locale.locale_alias + locale.setlocale(locale.LC_ALL, lcl[1]) + if 'utf' in str(lcl[1]).lower().replace('-', ''): + # filter unicode locales + lcl_alias_name = lcl[0].lower() + if str(locale_name).lower().replace(' ', '-') in lcl_alias_name: + # user preferred language has unicode encoding (highest priority -1) + found_locale = (-1, lcl[0], lcl[1]) + elif 'c.utf8' == lcl_alias_name: + # c.utf should be on most systems (second priority 0) + found_locale = (0, lcl[0], lcl[1]) + elif 'univ' in lcl_alias_name: + # universal unicode locale (third priority 1) + found_locale = (1, lcl[0], lcl[1]) + elif 'en_' in lcl_alias_name: + # any english unicode locale (fourth priority 2) + found_locale = (2, lcl[0], lcl[1]) + if found_locale is not None and (best_locale is None or found_locale[0] < best_locale[0]): + best_locale = found_locale + if best_locale[0] <= 1: + break + except locale.Error: + # if locale is not possible to set, skip - not in user locales + pass - if not usable_locales: + if best_locale is None: + raise FatalError(err_msg) + + # return encoding of the best locale + # Windows does not support easy environment variable setting + # We need to raise an error but suggest to use a specific locale + if os.name == 'nt': raise FatalError( - 'Support for Unicode filenames is required, but no suitable UTF-8 locale was found on your system.' - ' Please refer to the manual for your operating system for details on locale reconfiguration.' + err_msg + + '\n' + + 'For example, a sufficient locale that was found on the system ' + + f'{best_locale[1]} with encoding {best_locale[2]}' ) - - return usable_locales[0] + else: + return str(best_locale[2]) if __name__ == '__main__': @@ -1028,16 +1064,21 @@ if __name__ == '__main__': 'MSys/Mingw is no longer supported. Please follow the getting started guide of the ' 'documentation in order to set up a suitiable environment, or continue at your own risk.' ) - elif os.name == 'posix' and not _valid_unicode_config(): - # Trying to find best utf-8 locale available on the system and restart python with it + elif os.name in ('posix', 'nt') and not _valid_unicode_config(): + # Trying to find best unicode locale available on the system and restart python with it best_locale = _find_usable_locale() print_warning( - 'Your environment is not configured to handle unicode filenames outside of ASCII range.' - f' Environment variable LC_ALL is temporary set to {best_locale} for unicode support.' + 'Your environment is not configured to handle unicode characters.' + ' Environment variable LC_CTYPE is temporary set to ' + f'{best_locale} (found on the system) for unicode support.' ) - os.environ['LC_ALL'] = best_locale + # Unset LC_ALL if it exists, as it takes precedence over LC_CTYPE + # This prevents infinite loops when LC_ALL is set to a non-Unicode locale + if 'LC_ALL' in os.environ: + del os.environ['LC_ALL'] + os.environ['LC_CTYPE'] = best_locale ret = subprocess.call([sys.executable] + sys.argv, env=os.environ) if ret: raise SystemExit(ret) diff --git a/tools/test_idf_py/test_idf_py.py b/tools/test_idf_py/test_idf_py.py index 18b1dfa16b..227529480c 100755 --- a/tools/test_idf_py/test_idf_py.py +++ b/tools/test_idf_py/test_idf_py.py @@ -11,6 +11,7 @@ from typing import Any from unittest import TestCase from unittest import main from unittest import mock +from unittest import skipIf import jsonschema @@ -623,5 +624,41 @@ class TestUF2Commands(TestWrapperCommands): os.environ.pop('ESPPORT') +@skipIf(os.name == 'nt', 'Locale handling differs on Windows') +class TestUserLocale(TestWrapperCommands): + """ + Test if user locale check works as expected. + """ + + def test_user_locale_no_unicode(self): + import locale + + original_locale = locale.setlocale(locale.LC_ALL) + + for lcl in locale.locale_alias.items(): + lcl_encoding = str(lcl[1]).lower().replace('-', '') + if 'utf' not in lcl_encoding: + # Try to set any non-unicode locale - break if successful + try: + locale.setlocale(locale.LC_ALL, lcl[1]) + os.environ['LC_ALL'] = lcl[1] # Testing with LC_ALL if it is ignored + os.environ['LC_CTYPE'] = lcl[1] + break + except locale.Error: + pass + + command = [sys.executable, idf_py_path, '--help'] + try: + output = subprocess.check_output(command, env=os.environ, stderr=subprocess.STDOUT).decode( + 'utf-8', 'ignore' + ) + except subprocess.CalledProcessError as e: + # Process may exit with code 2 if no UTF locale is found, but warning should still be printed + output = e.output.decode('utf-8', 'ignore') + + locale.setlocale(locale.LC_ALL, original_locale) + self.assertIn('Support for Unicode is required', output) + + if __name__ == '__main__': main()