From e4fa15b50a8172daf53bd661e0efc4e6be8b36d9 Mon Sep 17 00:00:00 2001 From: Frantisek Hrbata Date: Tue, 7 Apr 2026 11:09:30 +0200 Subject: [PATCH] feat(ldgen): cache parsed fragment files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Profiling shows that parsing the linker fragment (.lf) files with pyparsing is the single largest cost in ldgen (~58% of total runtime). parse_fragment_file() rebuilds its grammar from scratch on every call because the conditional parser closure has to capture sdkconfig to evaluate if/elif/else at parse time. Add a cache that pickles the parsed FragmentFile object list to .lfcache. The cache is keyed on fragment file paths+mtimes plus sdkconfig and kconfig mtimes — sdkconfig is included because fragment if/elif/else blocks are evaluated against it at parse time. This avoids the pyparsing cost on rebuilds where ldgen has to run but the fragment files themselves are unchanged — for example when a function is added or removed in a source file. Cache hits produce the same sections.ld as a full parse, and any pickle load failure falls through to a normal parse, so cache corruption or a Python upgrade cannot produce a wrong build. Print "Skipping linker fragment parsing, fragment files unchanged" on the lf cache hit path so build logs show when the optimization fired — this makes it easy to diagnose user reports of unexpected ldgen behavior. The LDGEN_NO_CACHE environment variable disables the lf cache. When set, ldgen falls through to a normal parse without consulting or writing the cache file. Measured on wifi_station (205 libs, 67 .lf files), median of 10 runs: before after rebuild, section change, .lf unchanged 5.82s 1.34s Signed-off-by: Frantisek Hrbata --- tools/ldgen/ldgen.py | 87 ++++++++++++++++++++++--- tools/ldgen/ldgen/fragments.py | 13 +++- tools/test_build_system/test_rebuild.py | 28 ++++++++ 3 files changed, 116 insertions(+), 12 deletions(-) diff --git a/tools/ldgen/ldgen.py b/tools/ldgen/ldgen.py index 49935c6f9f..407ab2a51a 100755 --- a/tools/ldgen/ldgen.py +++ b/tools/ldgen/ldgen.py @@ -8,6 +8,7 @@ import errno import hashlib import json import os +import pickle import re import subprocess import sys @@ -68,6 +69,55 @@ def _save_fingerprint(output_path, fingerprint): pass +def _compute_lf_cache_key(fragment_files, config_file, kconfig_file): + """Compute a cache key for parsed fragment files. + + Keyed on fragment file paths+mtimes plus sdkconfig and kconfig mtimes, + because fragment parsing evaluates `if/elif/else` conditional blocks + against sdkconfig at parse time — so a sdkconfig change can change the + parsed FragmentFile output even if the fragment files themselves are + unchanged. + """ + hasher = hashlib.md5() + paths = [p.name if hasattr(p, 'name') else p for p in fragment_files] + paths += [p for p in (config_file, kconfig_file) if p] + for path in sorted(paths): + try: + hasher.update(f'{path}:{os.path.getmtime(path)}'.encode()) + except OSError: + return None + return hasher.hexdigest() + + +def _load_lf_cache(cache_path, key): + """Load parsed FragmentFile list from cache if key matches. + + The lf cache is written and read only by ldgen, in the same build + directory where sections.ld, compiled object files, and the rest of + the build state already live. The trust boundary matches the build + system's trust boundary: anyone who can modify .lfcache can + also modify sections.ld, *.o, or the toolchain binaries directly. + pickle is safe in this context; it is not exposed to untrusted input. + """ + try: + with open(cache_path, 'rb') as f: + data = pickle.load(f) + if isinstance(data, dict) and data.get('key') == key: + return data.get('fragments') + except (OSError, pickle.UnpicklingError, EOFError, AttributeError, ImportError, ValueError): + pass + return None + + +def _save_lf_cache(cache_path, key, fragments): + """Save parsed FragmentFile list for next run.""" + try: + with open(cache_path, 'wb') as f: + pickle.dump({'key': key, 'fragments': fragments}, f, pickle.HIGHEST_PROTOCOL) + except (OSError, pickle.PicklingError): + pass + + def _update_environment(args): env = [(name, value) for (name, value) in (e.split('=', 1) for e in args.env)] for name, value in env: @@ -196,15 +246,34 @@ def main(): sdkconfig = SDKConfig(kconfig_file, config_file) - for fragment_file in fragment_files: - try: - fragment_file = parse_fragment_file(fragment_file, sdkconfig) - except (ParseException, ParseFatalException) as e: - # ParseException is raised on incorrect grammar - # ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate - # keys, key unsupported by fragment, unexpected number of values, etc.) - raise LdGenFailure(f'failed to parse {fragment_file}\n{e}') - generation_model.add_fragments_from_file(fragment_file) + # Try to load parsed fragment files from cache. The lf cache is + # complementary to the fingerprint skip above: if we get here, section + # names changed, but the fragment files themselves may still be + # identical and don't need re-parsing. + lf_cache_path = None if no_cache or not output_path else output_path + '.lfcache' + lf_cache_key = None if no_cache else _compute_lf_cache_key(fragment_files, config_file, kconfig_file) + parsed_fragments = None + if lf_cache_path and lf_cache_key: + parsed_fragments = _load_lf_cache(lf_cache_path, lf_cache_key) + if parsed_fragments is not None: + print('Skipping linker fragment parsing, fragment files unchanged') + + if parsed_fragments is None: + parsed_fragments = [] + for fragment_file in fragment_files: + try: + parsed = parse_fragment_file(fragment_file, sdkconfig) + except (ParseException, ParseFatalException) as e: + # ParseException is raised on incorrect grammar + # ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate + # keys, key unsupported by fragment, unexpected number of values, etc.) + raise LdGenFailure(f'failed to parse {fragment_file}\n{e}') + parsed_fragments.append(parsed) + if lf_cache_path and lf_cache_key: + _save_lf_cache(lf_cache_path, lf_cache_key, parsed_fragments) + + for parsed in parsed_fragments: + generation_model.add_fragments_from_file(parsed) non_contiguous_sram = sdkconfig.evaluate_expression('SOC_MEM_NON_CONTIGUOUS_SRAM') mapping_rules = generation_model.generate(sections_infos, non_contiguous_sram) diff --git a/tools/ldgen/ldgen/fragments.py b/tools/ldgen/ldgen/fragments.py index d4f15f43c6..e0290fe229 100644 --- a/tools/ldgen/ldgen/fragments.py +++ b/tools/ldgen/ldgen/fragments.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD +# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD # SPDX-License-Identifier: Apache-2.0 # from typing import Any @@ -475,9 +475,16 @@ def parse_fragment_file(path, sdkconfig): fragment = section | scheme | mapping | get_conditional_stmt(section | scheme | mapping) parser = ZeroOrMore(fragment).ignore(comment).set_parse_action(parse) fragment_file = parser.parse_file(path, parse_all=True)[0] - fragment_file.path = path + # Normalize to a path string — `path` may be a file-like object when + # called via the --fragments CLI path, and file objects are not + # picklable. Storing the path as a string keeps the parsed FragmentFile + # picklable for the lf cache regardless of how ldgen was invoked. The + # file object itself is not needed — .path is only read for error + # messages, so a plain string identifier is sufficient. + path_str = path.name if hasattr(path, 'name') else path + fragment_file.path = path_str for frag in fragment_file.fragments: - frag.path = path + frag.path = path_str return fragment_file diff --git a/tools/test_build_system/test_rebuild.py b/tools/test_build_system/test_rebuild.py index c59aa86869..355c6b69c6 100644 --- a/tools/test_build_system/test_rebuild.py +++ b/tools/test_build_system/test_rebuild.py @@ -174,6 +174,34 @@ def test_rebuild_ldgen_fingerprint(idf_py: IdfPyFunc, test_app_copy: Path) -> No assert skip_msg not in result.stdout, f'unexpected {skip_msg!r} in build output after fragment change' +def test_rebuild_ldgen_lf_cache(idf_py: IdfPyFunc, test_app_copy: Path) -> None: + """Verify the ldgen lf cache: when section names change but fragment files + don't, parsed FragmentFile objects are loaded from cache rather than + re-parsed, and an informational message is printed. + """ + cache_msg = 'Skipping linker fragment parsing, fragment files unchanged' + + logging.info('initial build') + idf_py('build') + + logging.info( + 'adding a new function changes section names but leaves fragment files unchanged - lf cache should hit' + ) + replace_in_file( + test_app_copy / 'main' / 'build_test_app.c', + '// placeholder_before_main', + 'void test_ldgen_lf_cache_extra_func(void) {}', + ) + result = idf_py('build') + assert cache_msg in result.stdout, f'expected {cache_msg!r} in build output' + + logging.info('touching a fragment file invalidates the lf cache') + idf_path = Path(os.environ['IDF_PATH']) + (idf_path / 'components/esp_common/common.lf').touch() + result = idf_py('build') + assert cache_msg not in result.stdout, f'unexpected {cache_msg!r} in build output after fragment change' + + @pytest.mark.usefixtures('idf_copy') def test_rebuild_version_change(idf_py: IdfPyFunc, test_app_copy: Path) -> None: idf_path = Path(os.environ['IDF_PATH'])