feat(ldgen): cache parsed fragment files

Profiling shows that parsing the linker fragment (.lf) files with
pyparsing is the single largest cost in ldgen (~58% of total runtime).
parse_fragment_file() rebuilds its grammar from scratch on every call
because the conditional parser closure has to capture sdkconfig to
evaluate if/elif/else at parse time.

Add a cache that pickles the parsed FragmentFile object list to
<output>.lfcache. The cache is keyed on fragment file paths+mtimes
plus sdkconfig and kconfig mtimes — sdkconfig is included because
fragment if/elif/else blocks are evaluated against it at parse time.

This avoids the pyparsing cost on rebuilds where ldgen has to run
but the fragment files themselves are unchanged — for example when
a function is added or removed in a source file. Cache hits produce
the same sections.ld as a full parse, and any pickle load failure
falls through to a normal parse, so cache corruption or a Python
upgrade cannot produce a wrong build.

Print "Skipping linker fragment parsing, fragment files unchanged"
on the lf cache hit path so build logs show when the optimization
fired — this makes it easy to diagnose user reports of unexpected
ldgen behavior.

The LDGEN_NO_CACHE environment variable disables the lf cache. When
set, ldgen falls through to a normal parse without consulting or
writing the cache file.

Measured on wifi_station (205 libs, 67 .lf files), median of 10 runs:

                                          before   after
  rebuild, section change, .lf unchanged   5.82s   1.34s

Signed-off-by: Frantisek Hrbata <frantisek.hrbata@espressif.com>
This commit is contained in:
Frantisek Hrbata
2026-04-07 11:09:30 +02:00
parent e1f43e15b7
commit e4fa15b50a
3 changed files with 116 additions and 12 deletions
+78 -9
View File
@@ -8,6 +8,7 @@ import errno
import hashlib
import json
import os
import pickle
import re
import subprocess
import sys
@@ -68,6 +69,55 @@ def _save_fingerprint(output_path, fingerprint):
pass
def _compute_lf_cache_key(fragment_files, config_file, kconfig_file):
"""Compute a cache key for parsed fragment files.
Keyed on fragment file paths+mtimes plus sdkconfig and kconfig mtimes,
because fragment parsing evaluates `if/elif/else` conditional blocks
against sdkconfig at parse time — so a sdkconfig change can change the
parsed FragmentFile output even if the fragment files themselves are
unchanged.
"""
hasher = hashlib.md5()
paths = [p.name if hasattr(p, 'name') else p for p in fragment_files]
paths += [p for p in (config_file, kconfig_file) if p]
for path in sorted(paths):
try:
hasher.update(f'{path}:{os.path.getmtime(path)}'.encode())
except OSError:
return None
return hasher.hexdigest()
def _load_lf_cache(cache_path, key):
"""Load parsed FragmentFile list from cache if key matches.
The lf cache is written and read only by ldgen, in the same build
directory where sections.ld, compiled object files, and the rest of
the build state already live. The trust boundary matches the build
system's trust boundary: anyone who can modify <output>.lfcache can
also modify sections.ld, *.o, or the toolchain binaries directly.
pickle is safe in this context; it is not exposed to untrusted input.
"""
try:
with open(cache_path, 'rb') as f:
data = pickle.load(f)
if isinstance(data, dict) and data.get('key') == key:
return data.get('fragments')
except (OSError, pickle.UnpicklingError, EOFError, AttributeError, ImportError, ValueError):
pass
return None
def _save_lf_cache(cache_path, key, fragments):
"""Save parsed FragmentFile list for next run."""
try:
with open(cache_path, 'wb') as f:
pickle.dump({'key': key, 'fragments': fragments}, f, pickle.HIGHEST_PROTOCOL)
except (OSError, pickle.PicklingError):
pass
def _update_environment(args):
env = [(name, value) for (name, value) in (e.split('=', 1) for e in args.env)]
for name, value in env:
@@ -196,15 +246,34 @@ def main():
sdkconfig = SDKConfig(kconfig_file, config_file)
for fragment_file in fragment_files:
try:
fragment_file = parse_fragment_file(fragment_file, sdkconfig)
except (ParseException, ParseFatalException) as e:
# ParseException is raised on incorrect grammar
# ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
# keys, key unsupported by fragment, unexpected number of values, etc.)
raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
generation_model.add_fragments_from_file(fragment_file)
# Try to load parsed fragment files from cache. The lf cache is
# complementary to the fingerprint skip above: if we get here, section
# names changed, but the fragment files themselves may still be
# identical and don't need re-parsing.
lf_cache_path = None if no_cache or not output_path else output_path + '.lfcache'
lf_cache_key = None if no_cache else _compute_lf_cache_key(fragment_files, config_file, kconfig_file)
parsed_fragments = None
if lf_cache_path and lf_cache_key:
parsed_fragments = _load_lf_cache(lf_cache_path, lf_cache_key)
if parsed_fragments is not None:
print('Skipping linker fragment parsing, fragment files unchanged')
if parsed_fragments is None:
parsed_fragments = []
for fragment_file in fragment_files:
try:
parsed = parse_fragment_file(fragment_file, sdkconfig)
except (ParseException, ParseFatalException) as e:
# ParseException is raised on incorrect grammar
# ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
# keys, key unsupported by fragment, unexpected number of values, etc.)
raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
parsed_fragments.append(parsed)
if lf_cache_path and lf_cache_key:
_save_lf_cache(lf_cache_path, lf_cache_key, parsed_fragments)
for parsed in parsed_fragments:
generation_model.add_fragments_from_file(parsed)
non_contiguous_sram = sdkconfig.evaluate_expression('SOC_MEM_NON_CONTIGUOUS_SRAM')
mapping_rules = generation_model.generate(sections_infos, non_contiguous_sram)
+10 -3
View File
@@ -1,5 +1,5 @@
#
# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD
# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD
# SPDX-License-Identifier: Apache-2.0
#
from typing import Any
@@ -475,9 +475,16 @@ def parse_fragment_file(path, sdkconfig):
fragment = section | scheme | mapping | get_conditional_stmt(section | scheme | mapping)
parser = ZeroOrMore(fragment).ignore(comment).set_parse_action(parse)
fragment_file = parser.parse_file(path, parse_all=True)[0]
fragment_file.path = path
# Normalize to a path string — `path` may be a file-like object when
# called via the --fragments CLI path, and file objects are not
# picklable. Storing the path as a string keeps the parsed FragmentFile
# picklable for the lf cache regardless of how ldgen was invoked. The
# file object itself is not needed — .path is only read for error
# messages, so a plain string identifier is sufficient.
path_str = path.name if hasattr(path, 'name') else path
fragment_file.path = path_str
for frag in fragment_file.fragments:
frag.path = path
frag.path = path_str
return fragment_file
+28
View File
@@ -174,6 +174,34 @@ def test_rebuild_ldgen_fingerprint(idf_py: IdfPyFunc, test_app_copy: Path) -> No
assert skip_msg not in result.stdout, f'unexpected {skip_msg!r} in build output after fragment change'
def test_rebuild_ldgen_lf_cache(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
"""Verify the ldgen lf cache: when section names change but fragment files
don't, parsed FragmentFile objects are loaded from cache rather than
re-parsed, and an informational message is printed.
"""
cache_msg = 'Skipping linker fragment parsing, fragment files unchanged'
logging.info('initial build')
idf_py('build')
logging.info(
'adding a new function changes section names but leaves fragment files unchanged - lf cache should hit'
)
replace_in_file(
test_app_copy / 'main' / 'build_test_app.c',
'// placeholder_before_main',
'void test_ldgen_lf_cache_extra_func(void) {}',
)
result = idf_py('build')
assert cache_msg in result.stdout, f'expected {cache_msg!r} in build output'
logging.info('touching a fragment file invalidates the lf cache')
idf_path = Path(os.environ['IDF_PATH'])
(idf_path / 'components/esp_common/common.lf').touch()
result = idf_py('build')
assert cache_msg not in result.stdout, f'unexpected {cache_msg!r} in build output after fragment change'
@pytest.mark.usefixtures('idf_copy')
def test_rebuild_version_change(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
idf_path = Path(os.environ['IDF_PATH'])