Merge branch 'feat/ldgen_skip_generation' into 'master'

feat(ldgen): skip generation when section names unchanged Closes IDFGH-17454 See merge request espressif/esp-idf!47278
2026-04-27 11:03:11 +00:00 · 2026-04-20 11:06:07 +02:00
parent 9a5af5a7c9 e4fa15b50a
commit 4cd39bc28f
3 changed files with 216 additions and 14 deletions
@@ -1,12 +1,15 @@
 #!/usr/bin/env python
 #
-# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD
+# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD
 # SPDX-License-Identifier: Apache-2.0
 #
 import argparse
 import errno
+import hashlib
 import json
 import os
+import pickle
+import re
 import subprocess
 import sys
 import tempfile
@@ -21,6 +24,99 @@ from ldgen.sdkconfig import SDKConfig
 from pyparsing import ParseException
 from pyparsing import ParseFatalException

+_RE_SECTION_NAME = re.compile(r'^\s*\d+\s+(\.\S+)', re.MULTILINE)
+
+
+def _compute_fingerprint(sections_infos, fragment_files, config_file, kconfig_file):
+    """Compute a fingerprint from section names and mtimes of all inputs."""
+    hasher = hashlib.md5()
+
+    # Section names from objdump output
+    for archive, info in sorted(sections_infos.sections.items()):
+        names = _RE_SECTION_NAME.findall(info.content)
+        hasher.update((archive + ':' + ','.join(names)).encode())
+
+    # Mtimes of fragment files, sdkconfig, kconfig
+    input_files = [p.name if hasattr(p, 'name') else p for p in fragment_files]
+    input_files += [p for p in (config_file, kconfig_file) if p]
+    for path in input_files:
+        try:
+            hasher.update(f'{path}:{os.path.getmtime(path)}'.encode())
+        except OSError:
+            return None
+
+    return hasher.hexdigest()
+
+
+def _can_skip_generation(output_path, fingerprint):
+    """Check if fingerprint matches cached value from previous run."""
+    try:
+        with open(output_path + '.fingerprint') as f:
+            if f.read().strip() == fingerprint:
+                os.utime(output_path, None)
+                return True
+    except OSError:
+        pass
+    return False
+
+
+def _save_fingerprint(output_path, fingerprint):
+    """Save fingerprint for next run."""
+    try:
+        with open(output_path + '.fingerprint', 'w') as f:
+            f.write(fingerprint)
+    except OSError:
+        pass
+
+
+def _compute_lf_cache_key(fragment_files, config_file, kconfig_file):
+    """Compute a cache key for parsed fragment files.
+
+    Keyed on fragment file paths+mtimes plus sdkconfig and kconfig mtimes,
+    because fragment parsing evaluates `if/elif/else` conditional blocks
+    against sdkconfig at parse time — so a sdkconfig change can change the
+    parsed FragmentFile output even if the fragment files themselves are
+    unchanged.
+    """
+    hasher = hashlib.md5()
+    paths = [p.name if hasattr(p, 'name') else p for p in fragment_files]
+    paths += [p for p in (config_file, kconfig_file) if p]
+    for path in sorted(paths):
+        try:
+            hasher.update(f'{path}:{os.path.getmtime(path)}'.encode())
+        except OSError:
+            return None
+    return hasher.hexdigest()
+
+
+def _load_lf_cache(cache_path, key):
+    """Load parsed FragmentFile list from cache if key matches.
+
+    The lf cache is written and read only by ldgen, in the same build
+    directory where sections.ld, compiled object files, and the rest of
+    the build state already live. The trust boundary matches the build
+    system's trust boundary: anyone who can modify <output>.lfcache can
+    also modify sections.ld, *.o, or the toolchain binaries directly.
+    pickle is safe in this context; it is not exposed to untrusted input.
+    """
+    try:
+        with open(cache_path, 'rb') as f:
+            data = pickle.load(f)
+        if isinstance(data, dict) and data.get('key') == key:
+            return data.get('fragments')
+    except (OSError, pickle.UnpicklingError, EOFError, AttributeError, ImportError, ValueError):
+        pass
+    return None
+
+
+def _save_lf_cache(cache_path, key, fragments):
+    """Save parsed FragmentFile list for next run."""
+    try:
+        with open(cache_path, 'wb') as f:
+            pickle.dump({'key': key, 'fragments': fragments}, f, pickle.HIGHEST_PROTOCOL)
+    except (OSError, pickle.PicklingError):
+        pass
+

 def _update_environment(args):
    env = [(name, value) for (name, value) in (e.split('=', 1) for e in args.env)]
@@ -114,6 +210,10 @@ def main():
    else:
        check_mapping_exceptions = None

+    no_cache = os.environ.get('LDGEN_NO_CACHE') == '1'
+    if no_cache:
+        print('Linker script generation caches disabled by LDGEN_NO_CACHE')
+
    try:
        sections_infos = EntityDB()
        for library in libraries_file:
@@ -125,6 +225,20 @@ def main():
                dump.name = library
                sections_infos.add_sections_info(dump)

+        # Check if we can skip generation entirely — section names and other
+        # inputs unchanged since last run.
+        fingerprint = (
+            None if no_cache else _compute_fingerprint(sections_infos, fragment_files, config_file, kconfig_file)
+        )
+        if (
+            output_path
+            and fingerprint
+            and os.path.exists(output_path)
+            and _can_skip_generation(output_path, fingerprint)
+        ):
+            print('Skipping linker script generation, section names unchanged')
+            sys.exit(0)
+
        mutable_libs = [lib.strip() for lib in mutable_libraries_file]
        generation_model = Generation(check_mapping, check_mapping_exceptions, mutable_libs, args.debug)

@@ -132,15 +246,34 @@ def main():

        sdkconfig = SDKConfig(kconfig_file, config_file)

-        for fragment_file in fragment_files:
-            try:
-                fragment_file = parse_fragment_file(fragment_file, sdkconfig)
-            except (ParseException, ParseFatalException) as e:
-                # ParseException is raised on incorrect grammar
-                # ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
-                # keys, key unsupported by fragment, unexpected number of values, etc.)
-                raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
-            generation_model.add_fragments_from_file(fragment_file)
+        # Try to load parsed fragment files from cache. The lf cache is
+        # complementary to the fingerprint skip above: if we get here, section
+        # names changed, but the fragment files themselves may still be
+        # identical and don't need re-parsing.
+        lf_cache_path = None if no_cache or not output_path else output_path + '.lfcache'
+        lf_cache_key = None if no_cache else _compute_lf_cache_key(fragment_files, config_file, kconfig_file)
+        parsed_fragments = None
+        if lf_cache_path and lf_cache_key:
+            parsed_fragments = _load_lf_cache(lf_cache_path, lf_cache_key)
+            if parsed_fragments is not None:
+                print('Skipping linker fragment parsing, fragment files unchanged')
+
+        if parsed_fragments is None:
+            parsed_fragments = []
+            for fragment_file in fragment_files:
+                try:
+                    parsed = parse_fragment_file(fragment_file, sdkconfig)
+                except (ParseException, ParseFatalException) as e:
+                    # ParseException is raised on incorrect grammar
+                    # ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
+                    # keys, key unsupported by fragment, unexpected number of values, etc.)
+                    raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
+                parsed_fragments.append(parsed)
+            if lf_cache_path and lf_cache_key:
+                _save_lf_cache(lf_cache_path, lf_cache_key, parsed_fragments)
+
+        for parsed in parsed_fragments:
+            generation_model.add_fragments_from_file(parsed)

        non_contiguous_sram = sdkconfig.evaluate_expression('SOC_MEM_NON_CONTIGUOUS_SRAM')
        mapping_rules = generation_model.generate(sections_infos, non_contiguous_sram)
@@ -163,6 +296,9 @@ def main():
                output_path, 'w', encoding='utf-8'
            ) as f:  # only create output file after generation has succeeded
                f.write(output.read())
+
+        if output_path and fingerprint:
+            _save_fingerprint(output_path, fingerprint)
    except LdGenFailure as e:
        print(f'linker script generation failed for {input_file.name}\nERROR: {e}')
        sys.exit(1)
@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD
+# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD
 # SPDX-License-Identifier: Apache-2.0
 #
 from typing import Any
@@ -475,9 +475,16 @@ def parse_fragment_file(path, sdkconfig):
    fragment = section | scheme | mapping | get_conditional_stmt(section | scheme | mapping)
    parser = ZeroOrMore(fragment).ignore(comment).set_parse_action(parse)
    fragment_file = parser.parse_file(path, parse_all=True)[0]
-    fragment_file.path = path
+    # Normalize to a path string — `path` may be a file-like object when
+    # called via the --fragments CLI path, and file objects are not
+    # picklable. Storing the path as a string keeps the parsed FragmentFile
+    # picklable for the lf cache regardless of how ldgen was invoked. The
+    # file object itself is not needed — .path is only read for error
+    # messages, so a plain string identifier is sufficient.
+    path_str = path.name if hasattr(path, 'name') else path
+    fragment_file.path = path_str

    for frag in fragment_file.fragments:
-        frag.path = path
+        frag.path = path_str

    return fragment_file
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2022-2025 Espressif Systems (Shanghai) CO LTD
+# SPDX-FileCopyrightText: 2022-2026 Espressif Systems (Shanghai) CO LTD
 # SPDX-License-Identifier: Apache-2.0
 # These tests check whether the build system rebuilds some files or not
 # depending on the changes to the project.
@@ -143,6 +143,65 @@ def test_rebuild_linker(idf_py: IdfPyFunc) -> None:
    rebuild_and_check(idf_py, APP_BINS, BOOTLOADER_BINS + PARTITION_BIN)


+def test_rebuild_ldgen_fingerprint(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
+    """Verify the ldgen fingerprint skip path: when section names and other
+    inputs are unchanged, ldgen exits early without regenerating sections.ld
+    and prints an informational message.
+    """
+    skip_msg = 'Skipping linker script generation, section names unchanged'
+    app_c = test_app_copy / 'main' / 'build_test_app.c'
+
+    # Seed app_main with a printf so the first build establishes the string
+    # literal and its .rodata section. Later we only change the arithmetic
+    # constant, which modifies .text.app_main contents but keeps section
+    # names unchanged — exactly the case the fingerprint should optimize.
+    replace_in_file(app_c, '// placeholder_inside_main', 'printf("value = %d\\n", 1 + 2);')
+
+    logging.info('initial build')
+    idf_py('build')
+
+    logging.info(
+        'changing the printed calculation modifies .text.app_main but keeps all section names - fingerprint should hit'
+    )
+    replace_in_file(app_c, '1 + 2', '3 + 4')
+    result = idf_py('build')
+    assert skip_msg in result.stdout, f'expected {skip_msg!r} in build output'
+
+    logging.info('touching a fragment file invalidates the fingerprint')
+    idf_path = Path(os.environ['IDF_PATH'])
+    (idf_path / 'components/esp_common/common.lf').touch()
+    result = idf_py('build')
+    assert skip_msg not in result.stdout, f'unexpected {skip_msg!r} in build output after fragment change'
+
+
+def test_rebuild_ldgen_lf_cache(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
+    """Verify the ldgen lf cache: when section names change but fragment files
+    don't, parsed FragmentFile objects are loaded from cache rather than
+    re-parsed, and an informational message is printed.
+    """
+    cache_msg = 'Skipping linker fragment parsing, fragment files unchanged'
+
+    logging.info('initial build')
+    idf_py('build')
+
+    logging.info(
+        'adding a new function changes section names but leaves fragment files unchanged - lf cache should hit'
+    )
+    replace_in_file(
+        test_app_copy / 'main' / 'build_test_app.c',
+        '// placeholder_before_main',
+        'void test_ldgen_lf_cache_extra_func(void) {}',
+    )
+    result = idf_py('build')
+    assert cache_msg in result.stdout, f'expected {cache_msg!r} in build output'
+
+    logging.info('touching a fragment file invalidates the lf cache')
+    idf_path = Path(os.environ['IDF_PATH'])
+    (idf_path / 'components/esp_common/common.lf').touch()
+    result = idf_py('build')
+    assert cache_msg not in result.stdout, f'unexpected {cache_msg!r} in build output after fragment change'
+
+
@pytest.mark.usefixtures('idf_copy')
 def test_rebuild_version_change(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
    idf_path = Path(os.environ['IDF_PATH'])