From e1f43e15b7db08bb76197ae3131b4351ac70cb61 Mon Sep 17 00:00:00 2001
From: Frantisek Hrbata <frantisek.hrbata@espressif.com>
Date: Fri, 3 Apr 2026 15:04:06 +0200
Subject: [PATCH 1/2] feat(ldgen): skip generation when section names unchanged
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After running objdump on all libraries, extract section names from the
stored raw output using regex and compute a fingerprint (MD5 hash of
section names + mtimes of fragment files, sdkconfig, and kconfig).

If the fingerprint matches the cached value from the previous run,
skip the expensive pyparsing + generation step entirely and touch
the output file to update its timestamp. The fingerprint file is
stored next to the output (e.g. sections.ld.fingerprint) in the build
directory.

This avoids the pyparsing bottleneck for the common case of editing
function bodies without adding or removing symbols — section names
in the object files are unchanged, so the generated sections.ld
would be identical. When sections do change, the full generation
runs and the fingerprint is updated.

Print "Skipping linker script generation, section names unchanged"
on the fingerprint hit path so build logs show when the optimization
fired. This makes it easy to diagnose user reports of unexpected
ldgen behavior — the build log directly shows whether the cache was
used.

Add an LDGEN_NO_CACHE environment variable to disable the cache as a
workaround in case the optimization causes problems. When set, ldgen
prints "Linker script generation caches disabled by LDGEN_NO_CACHE"
and runs full generation as before, leaving any existing fingerprint
file untouched.

Measured on wifi_station (205 libs, 67 .lf files), median of 10 runs:

                                       before   after
  rebuild, section names unchanged      5.82s   0.82s

Closes https://github.com/espressif/esp-idf/issues/18408

Signed-off-by: Frantisek Hrbata <frantisek.hrbata@espressif.com>
---
 tools/ldgen/ldgen.py                    | 69 ++++++++++++++++++++++++-
 tools/test_build_system/test_rebuild.py | 33 +++++++++++-
 2 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/tools/ldgen/ldgen.py b/tools/ldgen/ldgen.py
index b07bfd0967..49935c6f9f 100755
--- a/tools/ldgen/ldgen.py
+++ b/tools/ldgen/ldgen.py
@@ -1,12 +1,14 @@
 #!/usr/bin/env python
 #
-# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD
+# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD
 # SPDX-License-Identifier: Apache-2.0
 #
 import argparse
 import errno
+import hashlib
 import json
 import os
+import re
 import subprocess
 import sys
 import tempfile
@@ -21,6 +23,50 @@ from ldgen.sdkconfig import SDKConfig
 from pyparsing import ParseException
 from pyparsing import ParseFatalException
 
+_RE_SECTION_NAME = re.compile(r'^\s*\d+\s+(\.\S+)', re.MULTILINE)
+
+
+def _compute_fingerprint(sections_infos, fragment_files, config_file, kconfig_file):
+    """Compute a fingerprint from section names and mtimes of all inputs."""
+    hasher = hashlib.md5()
+
+    # Section names from objdump output
+    for archive, info in sorted(sections_infos.sections.items()):
+        names = _RE_SECTION_NAME.findall(info.content)
+        hasher.update((archive + ':' + ','.join(names)).encode())
+
+    # Mtimes of fragment files, sdkconfig, kconfig
+    input_files = [p.name if hasattr(p, 'name') else p for p in fragment_files]
+    input_files += [p for p in (config_file, kconfig_file) if p]
+    for path in input_files:
+        try:
+            hasher.update(f'{path}:{os.path.getmtime(path)}'.encode())
+        except OSError:
+            return None
+
+    return hasher.hexdigest()
+
+
+def _can_skip_generation(output_path, fingerprint):
+    """Check if fingerprint matches cached value from previous run."""
+    try:
+        with open(output_path + '.fingerprint') as f:
+            if f.read().strip() == fingerprint:
+                os.utime(output_path, None)
+                return True
+    except OSError:
+        pass
+    return False
+
+
+def _save_fingerprint(output_path, fingerprint):
+    """Save fingerprint for next run."""
+    try:
+        with open(output_path + '.fingerprint', 'w') as f:
+            f.write(fingerprint)
+    except OSError:
+        pass
+
 
 def _update_environment(args):
     env = [(name, value) for (name, value) in (e.split('=', 1) for e in args.env)]
@@ -114,6 +160,10 @@ def main():
     else:
         check_mapping_exceptions = None
 
+    no_cache = os.environ.get('LDGEN_NO_CACHE') == '1'
+    if no_cache:
+        print('Linker script generation caches disabled by LDGEN_NO_CACHE')
+
     try:
         sections_infos = EntityDB()
         for library in libraries_file:
@@ -125,6 +175,20 @@ def main():
                 dump.name = library
                 sections_infos.add_sections_info(dump)
 
+        # Check if we can skip generation entirely — section names and other
+        # inputs unchanged since last run.
+        fingerprint = (
+            None if no_cache else _compute_fingerprint(sections_infos, fragment_files, config_file, kconfig_file)
+        )
+        if (
+            output_path
+            and fingerprint
+            and os.path.exists(output_path)
+            and _can_skip_generation(output_path, fingerprint)
+        ):
+            print('Skipping linker script generation, section names unchanged')
+            sys.exit(0)
+
         mutable_libs = [lib.strip() for lib in mutable_libraries_file]
         generation_model = Generation(check_mapping, check_mapping_exceptions, mutable_libs, args.debug)
 
@@ -163,6 +227,9 @@ def main():
                 output_path, 'w', encoding='utf-8'
             ) as f:  # only create output file after generation has succeeded
                 f.write(output.read())
+
+        if output_path and fingerprint:
+            _save_fingerprint(output_path, fingerprint)
     except LdGenFailure as e:
         print(f'linker script generation failed for {input_file.name}\nERROR: {e}')
         sys.exit(1)
diff --git a/tools/test_build_system/test_rebuild.py b/tools/test_build_system/test_rebuild.py
index 430c0cc1b1..c59aa86869 100644
--- a/tools/test_build_system/test_rebuild.py
+++ b/tools/test_build_system/test_rebuild.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2022-2025 Espressif Systems (Shanghai) CO LTD
+# SPDX-FileCopyrightText: 2022-2026 Espressif Systems (Shanghai) CO LTD
 # SPDX-License-Identifier: Apache-2.0
 # These tests check whether the build system rebuilds some files or not
 # depending on the changes to the project.
@@ -143,6 +143,37 @@ def test_rebuild_linker(idf_py: IdfPyFunc) -> None:
     rebuild_and_check(idf_py, APP_BINS, BOOTLOADER_BINS + PARTITION_BIN)
 
 
+def test_rebuild_ldgen_fingerprint(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
+    """Verify the ldgen fingerprint skip path: when section names and other
+    inputs are unchanged, ldgen exits early without regenerating sections.ld
+    and prints an informational message.
+    """
+    skip_msg = 'Skipping linker script generation, section names unchanged'
+    app_c = test_app_copy / 'main' / 'build_test_app.c'
+
+    # Seed app_main with a printf so the first build establishes the string
+    # literal and its .rodata section. Later we only change the arithmetic
+    # constant, which modifies .text.app_main contents but keeps section
+    # names unchanged — exactly the case the fingerprint should optimize.
+    replace_in_file(app_c, '// placeholder_inside_main', 'printf("value = %d\\n", 1 + 2);')
+
+    logging.info('initial build')
+    idf_py('build')
+
+    logging.info(
+        'changing the printed calculation modifies .text.app_main but keeps all section names - fingerprint should hit'
+    )
+    replace_in_file(app_c, '1 + 2', '3 + 4')
+    result = idf_py('build')
+    assert skip_msg in result.stdout, f'expected {skip_msg!r} in build output'
+
+    logging.info('touching a fragment file invalidates the fingerprint')
+    idf_path = Path(os.environ['IDF_PATH'])
+    (idf_path / 'components/esp_common/common.lf').touch()
+    result = idf_py('build')
+    assert skip_msg not in result.stdout, f'unexpected {skip_msg!r} in build output after fragment change'
+
+
 @pytest.mark.usefixtures('idf_copy')
 def test_rebuild_version_change(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
     idf_path = Path(os.environ['IDF_PATH'])

From e4fa15b50a8172daf53bd661e0efc4e6be8b36d9 Mon Sep 17 00:00:00 2001
From: Frantisek Hrbata <frantisek.hrbata@espressif.com>
Date: Tue, 7 Apr 2026 11:09:30 +0200
Subject: [PATCH 2/2] feat(ldgen): cache parsed fragment files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Profiling shows that parsing the linker fragment (.lf) files with
pyparsing is the single largest cost in ldgen (~58% of total runtime).
parse_fragment_file() rebuilds its grammar from scratch on every call
because the conditional parser closure has to capture sdkconfig to
evaluate if/elif/else at parse time.

Add a cache that pickles the parsed FragmentFile object list to
<output>.lfcache. The cache is keyed on fragment file paths+mtimes
plus sdkconfig and kconfig mtimes — sdkconfig is included because
fragment if/elif/else blocks are evaluated against it at parse time.

This avoids the pyparsing cost on rebuilds where ldgen has to run
but the fragment files themselves are unchanged — for example when
a function is added or removed in a source file. Cache hits produce
the same sections.ld as a full parse, and any pickle load failure
falls through to a normal parse, so cache corruption or a Python
upgrade cannot produce a wrong build.

Print "Skipping linker fragment parsing, fragment files unchanged"
on the lf cache hit path so build logs show when the optimization
fired — this makes it easy to diagnose user reports of unexpected
ldgen behavior.

The LDGEN_NO_CACHE environment variable disables the lf cache. When
set, ldgen falls through to a normal parse without consulting or
writing the cache file.

Measured on wifi_station (205 libs, 67 .lf files), median of 10 runs:

                                          before   after
  rebuild, section change, .lf unchanged   5.82s   1.34s

Signed-off-by: Frantisek Hrbata <frantisek.hrbata@espressif.com>
---
 tools/ldgen/ldgen.py                    | 87 ++++++++++++++++++++++---
 tools/ldgen/ldgen/fragments.py          | 13 +++-
 tools/test_build_system/test_rebuild.py | 28 ++++++++
 3 files changed, 116 insertions(+), 12 deletions(-)

diff --git a/tools/ldgen/ldgen.py b/tools/ldgen/ldgen.py
index 49935c6f9f..407ab2a51a 100755
--- a/tools/ldgen/ldgen.py
+++ b/tools/ldgen/ldgen.py
@@ -8,6 +8,7 @@ import errno
 import hashlib
 import json
 import os
+import pickle
 import re
 import subprocess
 import sys
@@ -68,6 +69,55 @@ def _save_fingerprint(output_path, fingerprint):
         pass
 
 
+def _compute_lf_cache_key(fragment_files, config_file, kconfig_file):
+    """Compute a cache key for parsed fragment files.
+
+    Keyed on fragment file paths+mtimes plus sdkconfig and kconfig mtimes,
+    because fragment parsing evaluates `if/elif/else` conditional blocks
+    against sdkconfig at parse time — so a sdkconfig change can change the
+    parsed FragmentFile output even if the fragment files themselves are
+    unchanged.
+    """
+    hasher = hashlib.md5()
+    paths = [p.name if hasattr(p, 'name') else p for p in fragment_files]
+    paths += [p for p in (config_file, kconfig_file) if p]
+    for path in sorted(paths):
+        try:
+            hasher.update(f'{path}:{os.path.getmtime(path)}'.encode())
+        except OSError:
+            return None
+    return hasher.hexdigest()
+
+
+def _load_lf_cache(cache_path, key):
+    """Load parsed FragmentFile list from cache if key matches.
+
+    The lf cache is written and read only by ldgen, in the same build
+    directory where sections.ld, compiled object files, and the rest of
+    the build state already live. The trust boundary matches the build
+    system's trust boundary: anyone who can modify <output>.lfcache can
+    also modify sections.ld, *.o, or the toolchain binaries directly.
+    pickle is safe in this context; it is not exposed to untrusted input.
+    """
+    try:
+        with open(cache_path, 'rb') as f:
+            data = pickle.load(f)
+        if isinstance(data, dict) and data.get('key') == key:
+            return data.get('fragments')
+    except (OSError, pickle.UnpicklingError, EOFError, AttributeError, ImportError, ValueError):
+        pass
+    return None
+
+
+def _save_lf_cache(cache_path, key, fragments):
+    """Save parsed FragmentFile list for next run."""
+    try:
+        with open(cache_path, 'wb') as f:
+            pickle.dump({'key': key, 'fragments': fragments}, f, pickle.HIGHEST_PROTOCOL)
+    except (OSError, pickle.PicklingError):
+        pass
+
+
 def _update_environment(args):
     env = [(name, value) for (name, value) in (e.split('=', 1) for e in args.env)]
     for name, value in env:
@@ -196,15 +246,34 @@ def main():
 
         sdkconfig = SDKConfig(kconfig_file, config_file)
 
-        for fragment_file in fragment_files:
-            try:
-                fragment_file = parse_fragment_file(fragment_file, sdkconfig)
-            except (ParseException, ParseFatalException) as e:
-                # ParseException is raised on incorrect grammar
-                # ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
-                # keys, key unsupported by fragment, unexpected number of values, etc.)
-                raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
-            generation_model.add_fragments_from_file(fragment_file)
+        # Try to load parsed fragment files from cache. The lf cache is
+        # complementary to the fingerprint skip above: if we get here, section
+        # names changed, but the fragment files themselves may still be
+        # identical and don't need re-parsing.
+        lf_cache_path = None if no_cache or not output_path else output_path + '.lfcache'
+        lf_cache_key = None if no_cache else _compute_lf_cache_key(fragment_files, config_file, kconfig_file)
+        parsed_fragments = None
+        if lf_cache_path and lf_cache_key:
+            parsed_fragments = _load_lf_cache(lf_cache_path, lf_cache_key)
+            if parsed_fragments is not None:
+                print('Skipping linker fragment parsing, fragment files unchanged')
+
+        if parsed_fragments is None:
+            parsed_fragments = []
+            for fragment_file in fragment_files:
+                try:
+                    parsed = parse_fragment_file(fragment_file, sdkconfig)
+                except (ParseException, ParseFatalException) as e:
+                    # ParseException is raised on incorrect grammar
+                    # ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
+                    # keys, key unsupported by fragment, unexpected number of values, etc.)
+                    raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
+                parsed_fragments.append(parsed)
+            if lf_cache_path and lf_cache_key:
+                _save_lf_cache(lf_cache_path, lf_cache_key, parsed_fragments)
+
+        for parsed in parsed_fragments:
+            generation_model.add_fragments_from_file(parsed)
 
         non_contiguous_sram = sdkconfig.evaluate_expression('SOC_MEM_NON_CONTIGUOUS_SRAM')
         mapping_rules = generation_model.generate(sections_infos, non_contiguous_sram)
diff --git a/tools/ldgen/ldgen/fragments.py b/tools/ldgen/ldgen/fragments.py
index d4f15f43c6..e0290fe229 100644
--- a/tools/ldgen/ldgen/fragments.py
+++ b/tools/ldgen/ldgen/fragments.py
@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD
+# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD
 # SPDX-License-Identifier: Apache-2.0
 #
 from typing import Any
@@ -475,9 +475,16 @@ def parse_fragment_file(path, sdkconfig):
     fragment = section | scheme | mapping | get_conditional_stmt(section | scheme | mapping)
     parser = ZeroOrMore(fragment).ignore(comment).set_parse_action(parse)
     fragment_file = parser.parse_file(path, parse_all=True)[0]
-    fragment_file.path = path
+    # Normalize to a path string — `path` may be a file-like object when
+    # called via the --fragments CLI path, and file objects are not
+    # picklable. Storing the path as a string keeps the parsed FragmentFile
+    # picklable for the lf cache regardless of how ldgen was invoked. The
+    # file object itself is not needed — .path is only read for error
+    # messages, so a plain string identifier is sufficient.
+    path_str = path.name if hasattr(path, 'name') else path
+    fragment_file.path = path_str
 
     for frag in fragment_file.fragments:
-        frag.path = path
+        frag.path = path_str
 
     return fragment_file
diff --git a/tools/test_build_system/test_rebuild.py b/tools/test_build_system/test_rebuild.py
index c59aa86869..355c6b69c6 100644
--- a/tools/test_build_system/test_rebuild.py
+++ b/tools/test_build_system/test_rebuild.py
@@ -174,6 +174,34 @@ def test_rebuild_ldgen_fingerprint(idf_py: IdfPyFunc, test_app_copy: Path) -> No
     assert skip_msg not in result.stdout, f'unexpected {skip_msg!r} in build output after fragment change'
 
 
+def test_rebuild_ldgen_lf_cache(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
+    """Verify the ldgen lf cache: when section names change but fragment files
+    don't, parsed FragmentFile objects are loaded from cache rather than
+    re-parsed, and an informational message is printed.
+    """
+    cache_msg = 'Skipping linker fragment parsing, fragment files unchanged'
+
+    logging.info('initial build')
+    idf_py('build')
+
+    logging.info(
+        'adding a new function changes section names but leaves fragment files unchanged - lf cache should hit'
+    )
+    replace_in_file(
+        test_app_copy / 'main' / 'build_test_app.c',
+        '// placeholder_before_main',
+        'void test_ldgen_lf_cache_extra_func(void) {}',
+    )
+    result = idf_py('build')
+    assert cache_msg in result.stdout, f'expected {cache_msg!r} in build output'
+
+    logging.info('touching a fragment file invalidates the lf cache')
+    idf_path = Path(os.environ['IDF_PATH'])
+    (idf_path / 'components/esp_common/common.lf').touch()
+    result = idf_py('build')
+    assert cache_msg not in result.stdout, f'unexpected {cache_msg!r} in build output after fragment change'
+
+
 @pytest.mark.usefixtures('idf_copy')
 def test_rebuild_version_change(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
     idf_path = Path(os.environ['IDF_PATH'])