From e4fa15b50a8172daf53bd661e0efc4e6be8b36d9 Mon Sep 17 00:00:00 2001
From: Frantisek Hrbata <frantisek.hrbata@espressif.com>
Date: Tue, 7 Apr 2026 11:09:30 +0200
Subject: [PATCH] feat(ldgen): cache parsed fragment files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Profiling shows that parsing the linker fragment (.lf) files with
pyparsing is the single largest cost in ldgen (~58% of total runtime).
parse_fragment_file() rebuilds its grammar from scratch on every call
because the conditional parser closure has to capture sdkconfig to
evaluate if/elif/else at parse time.

Add a cache that pickles the parsed FragmentFile object list to
<output>.lfcache. The cache is keyed on fragment file paths+mtimes
plus sdkconfig and kconfig mtimes — sdkconfig is included because
fragment if/elif/else blocks are evaluated against it at parse time.

This avoids the pyparsing cost on rebuilds where ldgen has to run
but the fragment files themselves are unchanged — for example when
a function is added or removed in a source file. Cache hits produce
the same sections.ld as a full parse, and any pickle load failure
falls through to a normal parse, so cache corruption or a Python
upgrade cannot produce a wrong build.

Print "Skipping linker fragment parsing, fragment files unchanged"
on the lf cache hit path so build logs show when the optimization
fired — this makes it easy to diagnose user reports of unexpected
ldgen behavior.

The LDGEN_NO_CACHE environment variable disables the lf cache. When
set, ldgen falls through to a normal parse without consulting or
writing the cache file.

Measured on wifi_station (205 libs, 67 .lf files), median of 10 runs:

                                          before   after
  rebuild, section change, .lf unchanged   5.82s   1.34s

Signed-off-by: Frantisek Hrbata <frantisek.hrbata@espressif.com>
---
 tools/ldgen/ldgen.py                    | 87 ++++++++++++++++++++++---
 tools/ldgen/ldgen/fragments.py          | 13 +++-
 tools/test_build_system/test_rebuild.py | 28 ++++++++
 3 files changed, 116 insertions(+), 12 deletions(-)
diff --git a/tools/ldgen/ldgen.py b/tools/ldgen/ldgen.py
index 49935c6f9f..407ab2a51a 100755
--- a/tools/ldgen/ldgen.py
+++ b/tools/ldgen/ldgen.py
@@ -8,6 +8,7 @@ import errno
 import hashlib
 import json
 import os
+import pickle
 import re
 import subprocess
 import sys
@@ -68,6 +69,55 @@ def _save_fingerprint(output_path, fingerprint):
         pass
 
 
+def _compute_lf_cache_key(fragment_files, config_file, kconfig_file):
+    """Compute a cache key for parsed fragment files.
+
+    Keyed on fragment file paths+mtimes plus sdkconfig and kconfig mtimes,
+    because fragment parsing evaluates `if/elif/else` conditional blocks
+    against sdkconfig at parse time — so a sdkconfig change can change the
+    parsed FragmentFile output even if the fragment files themselves are
+    unchanged.
+    """
+    hasher = hashlib.md5()
+    paths = [p.name if hasattr(p, 'name') else p for p in fragment_files]
+    paths += [p for p in (config_file, kconfig_file) if p]
+    for path in sorted(paths):
+        try:
+            hasher.update(f'{path}:{os.path.getmtime(path)}'.encode())
+        except OSError:
+            return None
+    return hasher.hexdigest()
+
+
+def _load_lf_cache(cache_path, key):
+    """Load parsed FragmentFile list from cache if key matches.
+
+    The lf cache is written and read only by ldgen, in the same build
+    directory where sections.ld, compiled object files, and the rest of
+    the build state already live. The trust boundary matches the build
+    system's trust boundary: anyone who can modify <output>.lfcache can
+    also modify sections.ld, *.o, or the toolchain binaries directly.
+    pickle is safe in this context; it is not exposed to untrusted input.
+    """
+    try:
+        with open(cache_path, 'rb') as f:
+            data = pickle.load(f)
+        if isinstance(data, dict) and data.get('key') == key:
+            return data.get('fragments')
+    except (OSError, pickle.UnpicklingError, EOFError, AttributeError, ImportError, ValueError):
+        pass
+    return None
+
+
+def _save_lf_cache(cache_path, key, fragments):
+    """Save parsed FragmentFile list for next run."""
+    try:
+        with open(cache_path, 'wb') as f:
+            pickle.dump({'key': key, 'fragments': fragments}, f, pickle.HIGHEST_PROTOCOL)
+    except (OSError, pickle.PicklingError):
+        pass
+
+
 def _update_environment(args):
     env = [(name, value) for (name, value) in (e.split('=', 1) for e in args.env)]
     for name, value in env:
@@ -196,15 +246,34 @@ def main():
 
         sdkconfig = SDKConfig(kconfig_file, config_file)
 
-        for fragment_file in fragment_files:
-            try:
-                fragment_file = parse_fragment_file(fragment_file, sdkconfig)
-            except (ParseException, ParseFatalException) as e:
-                # ParseException is raised on incorrect grammar
-                # ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
-                # keys, key unsupported by fragment, unexpected number of values, etc.)
-                raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
-            generation_model.add_fragments_from_file(fragment_file)
+        # Try to load parsed fragment files from cache. The lf cache is
+        # complementary to the fingerprint skip above: if we get here, section
+        # names changed, but the fragment files themselves may still be
+        # identical and don't need re-parsing.
+        lf_cache_path = None if no_cache or not output_path else output_path + '.lfcache'
+        lf_cache_key = None if no_cache else _compute_lf_cache_key(fragment_files, config_file, kconfig_file)
+        parsed_fragments = None
+        if lf_cache_path and lf_cache_key:
+            parsed_fragments = _load_lf_cache(lf_cache_path, lf_cache_key)
+            if parsed_fragments is not None:
+                print('Skipping linker fragment parsing, fragment files unchanged')
+
+        if parsed_fragments is None:
+            parsed_fragments = []
+            for fragment_file in fragment_files:
+                try:
+                    parsed = parse_fragment_file(fragment_file, sdkconfig)
+                except (ParseException, ParseFatalException) as e:
+                    # ParseException is raised on incorrect grammar
+                    # ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
+                    # keys, key unsupported by fragment, unexpected number of values, etc.)
+                    raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
+                parsed_fragments.append(parsed)
+            if lf_cache_path and lf_cache_key:
+                _save_lf_cache(lf_cache_path, lf_cache_key, parsed_fragments)
+
+        for parsed in parsed_fragments:
+            generation_model.add_fragments_from_file(parsed)
 
         non_contiguous_sram = sdkconfig.evaluate_expression('SOC_MEM_NON_CONTIGUOUS_SRAM')
         mapping_rules = generation_model.generate(sections_infos, non_contiguous_sram)
diff --git a/tools/ldgen/ldgen/fragments.py b/tools/ldgen/ldgen/fragments.py
index d4f15f43c6..e0290fe229 100644
--- a/tools/ldgen/ldgen/fragments.py
+++ b/tools/ldgen/ldgen/fragments.py
@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD
+# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD
 # SPDX-License-Identifier: Apache-2.0
 #
 from typing import Any
@@ -475,9 +475,16 @@ def parse_fragment_file(path, sdkconfig):
     fragment = section | scheme | mapping | get_conditional_stmt(section | scheme | mapping)
     parser = ZeroOrMore(fragment).ignore(comment).set_parse_action(parse)
     fragment_file = parser.parse_file(path, parse_all=True)[0]
-    fragment_file.path = path
+    # Normalize to a path string — `path` may be a file-like object when
+    # called via the --fragments CLI path, and file objects are not
+    # picklable. Storing the path as a string keeps the parsed FragmentFile
+    # picklable for the lf cache regardless of how ldgen was invoked. The
+    # file object itself is not needed — .path is only read for error
+    # messages, so a plain string identifier is sufficient.
+    path_str = path.name if hasattr(path, 'name') else path
+    fragment_file.path = path_str
 
     for frag in fragment_file.fragments:
-        frag.path = path
+        frag.path = path_str
 
     return fragment_file
diff --git a/tools/test_build_system/test_rebuild.py b/tools/test_build_system/test_rebuild.py
index c59aa86869..355c6b69c6 100644
--- a/tools/test_build_system/test_rebuild.py
+++ b/tools/test_build_system/test_rebuild.py
@@ -174,6 +174,34 @@ def test_rebuild_ldgen_fingerprint(idf_py: IdfPyFunc, test_app_copy: Path) -> No
     assert skip_msg not in result.stdout, f'unexpected {skip_msg!r} in build output after fragment change'
 
 
+def test_rebuild_ldgen_lf_cache(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
+    """Verify the ldgen lf cache: when section names change but fragment files
+    don't, parsed FragmentFile objects are loaded from cache rather than
+    re-parsed, and an informational message is printed.
+    """
+    cache_msg = 'Skipping linker fragment parsing, fragment files unchanged'
+
+    logging.info('initial build')
+    idf_py('build')
+
+    logging.info(
+        'adding a new function changes section names but leaves fragment files unchanged - lf cache should hit'
+    )
+    replace_in_file(
+        test_app_copy / 'main' / 'build_test_app.c',
+        '// placeholder_before_main',
+        'void test_ldgen_lf_cache_extra_func(void) {}',
+    )
+    result = idf_py('build')
+    assert cache_msg in result.stdout, f'expected {cache_msg!r} in build output'
+
+    logging.info('touching a fragment file invalidates the lf cache')
+    idf_path = Path(os.environ['IDF_PATH'])
+    (idf_path / 'components/esp_common/common.lf').touch()
+    result = idf_py('build')
+    assert cache_msg not in result.stdout, f'unexpected {cache_msg!r} in build output after fragment change'
+
+
 @pytest.mark.usefixtures('idf_copy')
 def test_rebuild_version_change(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
     idf_path = Path(os.environ['IDF_PATH'])