diff --git a/tools/ldgen/ldgen.py b/tools/ldgen/ldgen.py index b07bfd0967..407ab2a51a 100755 --- a/tools/ldgen/ldgen.py +++ b/tools/ldgen/ldgen.py @@ -1,12 +1,15 @@ #!/usr/bin/env python # -# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD +# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD # SPDX-License-Identifier: Apache-2.0 # import argparse import errno +import hashlib import json import os +import pickle +import re import subprocess import sys import tempfile @@ -21,6 +24,99 @@ from ldgen.sdkconfig import SDKConfig from pyparsing import ParseException from pyparsing import ParseFatalException +_RE_SECTION_NAME = re.compile(r'^\s*\d+\s+(\.\S+)', re.MULTILINE) + + +def _compute_fingerprint(sections_infos, fragment_files, config_file, kconfig_file): + """Compute a fingerprint from section names and mtimes of all inputs.""" + hasher = hashlib.md5() + + # Section names from objdump output + for archive, info in sorted(sections_infos.sections.items()): + names = _RE_SECTION_NAME.findall(info.content) + hasher.update((archive + ':' + ','.join(names)).encode()) + + # Mtimes of fragment files, sdkconfig, kconfig + input_files = [p.name if hasattr(p, 'name') else p for p in fragment_files] + input_files += [p for p in (config_file, kconfig_file) if p] + for path in input_files: + try: + hasher.update(f'{path}:{os.path.getmtime(path)}'.encode()) + except OSError: + return None + + return hasher.hexdigest() + + +def _can_skip_generation(output_path, fingerprint): + """Check if fingerprint matches cached value from previous run.""" + try: + with open(output_path + '.fingerprint') as f: + if f.read().strip() == fingerprint: + os.utime(output_path, None) + return True + except OSError: + pass + return False + + +def _save_fingerprint(output_path, fingerprint): + """Save fingerprint for next run.""" + try: + with open(output_path + '.fingerprint', 'w') as f: + f.write(fingerprint) + except OSError: + pass + + +def _compute_lf_cache_key(fragment_files, config_file, kconfig_file): + """Compute a cache key for parsed fragment files. + + Keyed on fragment file paths+mtimes plus sdkconfig and kconfig mtimes, + because fragment parsing evaluates `if/elif/else` conditional blocks + against sdkconfig at parse time — so a sdkconfig change can change the + parsed FragmentFile output even if the fragment files themselves are + unchanged. + """ + hasher = hashlib.md5() + paths = [p.name if hasattr(p, 'name') else p for p in fragment_files] + paths += [p for p in (config_file, kconfig_file) if p] + for path in sorted(paths): + try: + hasher.update(f'{path}:{os.path.getmtime(path)}'.encode()) + except OSError: + return None + return hasher.hexdigest() + + +def _load_lf_cache(cache_path, key): + """Load parsed FragmentFile list from cache if key matches. + + The lf cache is written and read only by ldgen, in the same build + directory where sections.ld, compiled object files, and the rest of + the build state already live. The trust boundary matches the build + system's trust boundary: anyone who can modify .lfcache can + also modify sections.ld, *.o, or the toolchain binaries directly. + pickle is safe in this context; it is not exposed to untrusted input. + """ + try: + with open(cache_path, 'rb') as f: + data = pickle.load(f) + if isinstance(data, dict) and data.get('key') == key: + return data.get('fragments') + except (OSError, pickle.UnpicklingError, EOFError, AttributeError, ImportError, ValueError): + pass + return None + + +def _save_lf_cache(cache_path, key, fragments): + """Save parsed FragmentFile list for next run.""" + try: + with open(cache_path, 'wb') as f: + pickle.dump({'key': key, 'fragments': fragments}, f, pickle.HIGHEST_PROTOCOL) + except (OSError, pickle.PicklingError): + pass + def _update_environment(args): env = [(name, value) for (name, value) in (e.split('=', 1) for e in args.env)] @@ -114,6 +210,10 @@ def main(): else: check_mapping_exceptions = None + no_cache = os.environ.get('LDGEN_NO_CACHE') == '1' + if no_cache: + print('Linker script generation caches disabled by LDGEN_NO_CACHE') + try: sections_infos = EntityDB() for library in libraries_file: @@ -125,6 +225,20 @@ def main(): dump.name = library sections_infos.add_sections_info(dump) + # Check if we can skip generation entirely — section names and other + # inputs unchanged since last run. + fingerprint = ( + None if no_cache else _compute_fingerprint(sections_infos, fragment_files, config_file, kconfig_file) + ) + if ( + output_path + and fingerprint + and os.path.exists(output_path) + and _can_skip_generation(output_path, fingerprint) + ): + print('Skipping linker script generation, section names unchanged') + sys.exit(0) + mutable_libs = [lib.strip() for lib in mutable_libraries_file] generation_model = Generation(check_mapping, check_mapping_exceptions, mutable_libs, args.debug) @@ -132,15 +246,34 @@ def main(): sdkconfig = SDKConfig(kconfig_file, config_file) - for fragment_file in fragment_files: - try: - fragment_file = parse_fragment_file(fragment_file, sdkconfig) - except (ParseException, ParseFatalException) as e: - # ParseException is raised on incorrect grammar - # ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate - # keys, key unsupported by fragment, unexpected number of values, etc.) - raise LdGenFailure(f'failed to parse {fragment_file}\n{e}') - generation_model.add_fragments_from_file(fragment_file) + # Try to load parsed fragment files from cache. The lf cache is + # complementary to the fingerprint skip above: if we get here, section + # names changed, but the fragment files themselves may still be + # identical and don't need re-parsing. + lf_cache_path = None if no_cache or not output_path else output_path + '.lfcache' + lf_cache_key = None if no_cache else _compute_lf_cache_key(fragment_files, config_file, kconfig_file) + parsed_fragments = None + if lf_cache_path and lf_cache_key: + parsed_fragments = _load_lf_cache(lf_cache_path, lf_cache_key) + if parsed_fragments is not None: + print('Skipping linker fragment parsing, fragment files unchanged') + + if parsed_fragments is None: + parsed_fragments = [] + for fragment_file in fragment_files: + try: + parsed = parse_fragment_file(fragment_file, sdkconfig) + except (ParseException, ParseFatalException) as e: + # ParseException is raised on incorrect grammar + # ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate + # keys, key unsupported by fragment, unexpected number of values, etc.) + raise LdGenFailure(f'failed to parse {fragment_file}\n{e}') + parsed_fragments.append(parsed) + if lf_cache_path and lf_cache_key: + _save_lf_cache(lf_cache_path, lf_cache_key, parsed_fragments) + + for parsed in parsed_fragments: + generation_model.add_fragments_from_file(parsed) non_contiguous_sram = sdkconfig.evaluate_expression('SOC_MEM_NON_CONTIGUOUS_SRAM') mapping_rules = generation_model.generate(sections_infos, non_contiguous_sram) @@ -163,6 +296,9 @@ def main(): output_path, 'w', encoding='utf-8' ) as f: # only create output file after generation has succeeded f.write(output.read()) + + if output_path and fingerprint: + _save_fingerprint(output_path, fingerprint) except LdGenFailure as e: print(f'linker script generation failed for {input_file.name}\nERROR: {e}') sys.exit(1) diff --git a/tools/ldgen/ldgen/fragments.py b/tools/ldgen/ldgen/fragments.py index d4f15f43c6..e0290fe229 100644 --- a/tools/ldgen/ldgen/fragments.py +++ b/tools/ldgen/ldgen/fragments.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD +# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD # SPDX-License-Identifier: Apache-2.0 # from typing import Any @@ -475,9 +475,16 @@ def parse_fragment_file(path, sdkconfig): fragment = section | scheme | mapping | get_conditional_stmt(section | scheme | mapping) parser = ZeroOrMore(fragment).ignore(comment).set_parse_action(parse) fragment_file = parser.parse_file(path, parse_all=True)[0] - fragment_file.path = path + # Normalize to a path string — `path` may be a file-like object when + # called via the --fragments CLI path, and file objects are not + # picklable. Storing the path as a string keeps the parsed FragmentFile + # picklable for the lf cache regardless of how ldgen was invoked. The + # file object itself is not needed — .path is only read for error + # messages, so a plain string identifier is sufficient. + path_str = path.name if hasattr(path, 'name') else path + fragment_file.path = path_str for frag in fragment_file.fragments: - frag.path = path + frag.path = path_str return fragment_file diff --git a/tools/test_build_system/test_rebuild.py b/tools/test_build_system/test_rebuild.py index 430c0cc1b1..355c6b69c6 100644 --- a/tools/test_build_system/test_rebuild.py +++ b/tools/test_build_system/test_rebuild.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: 2022-2025 Espressif Systems (Shanghai) CO LTD +# SPDX-FileCopyrightText: 2022-2026 Espressif Systems (Shanghai) CO LTD # SPDX-License-Identifier: Apache-2.0 # These tests check whether the build system rebuilds some files or not # depending on the changes to the project. @@ -143,6 +143,65 @@ def test_rebuild_linker(idf_py: IdfPyFunc) -> None: rebuild_and_check(idf_py, APP_BINS, BOOTLOADER_BINS + PARTITION_BIN) +def test_rebuild_ldgen_fingerprint(idf_py: IdfPyFunc, test_app_copy: Path) -> None: + """Verify the ldgen fingerprint skip path: when section names and other + inputs are unchanged, ldgen exits early without regenerating sections.ld + and prints an informational message. + """ + skip_msg = 'Skipping linker script generation, section names unchanged' + app_c = test_app_copy / 'main' / 'build_test_app.c' + + # Seed app_main with a printf so the first build establishes the string + # literal and its .rodata section. Later we only change the arithmetic + # constant, which modifies .text.app_main contents but keeps section + # names unchanged — exactly the case the fingerprint should optimize. + replace_in_file(app_c, '// placeholder_inside_main', 'printf("value = %d\\n", 1 + 2);') + + logging.info('initial build') + idf_py('build') + + logging.info( + 'changing the printed calculation modifies .text.app_main but keeps all section names - fingerprint should hit' + ) + replace_in_file(app_c, '1 + 2', '3 + 4') + result = idf_py('build') + assert skip_msg in result.stdout, f'expected {skip_msg!r} in build output' + + logging.info('touching a fragment file invalidates the fingerprint') + idf_path = Path(os.environ['IDF_PATH']) + (idf_path / 'components/esp_common/common.lf').touch() + result = idf_py('build') + assert skip_msg not in result.stdout, f'unexpected {skip_msg!r} in build output after fragment change' + + +def test_rebuild_ldgen_lf_cache(idf_py: IdfPyFunc, test_app_copy: Path) -> None: + """Verify the ldgen lf cache: when section names change but fragment files + don't, parsed FragmentFile objects are loaded from cache rather than + re-parsed, and an informational message is printed. + """ + cache_msg = 'Skipping linker fragment parsing, fragment files unchanged' + + logging.info('initial build') + idf_py('build') + + logging.info( + 'adding a new function changes section names but leaves fragment files unchanged - lf cache should hit' + ) + replace_in_file( + test_app_copy / 'main' / 'build_test_app.c', + '// placeholder_before_main', + 'void test_ldgen_lf_cache_extra_func(void) {}', + ) + result = idf_py('build') + assert cache_msg in result.stdout, f'expected {cache_msg!r} in build output' + + logging.info('touching a fragment file invalidates the lf cache') + idf_path = Path(os.environ['IDF_PATH']) + (idf_path / 'components/esp_common/common.lf').touch() + result = idf_py('build') + assert cache_msg not in result.stdout, f'unexpected {cache_msg!r} in build output after fragment change' + + @pytest.mark.usefixtures('idf_copy') def test_rebuild_version_change(idf_py: IdfPyFunc, test_app_copy: Path) -> None: idf_path = Path(os.environ['IDF_PATH'])