mirror of
https://github.com/espressif/esp-idf.git
synced 2026-04-27 11:03:11 +00:00
feat(ldgen): cache parsed fragment files
Profiling shows that parsing the linker fragment (.lf) files with
pyparsing is the single largest cost in ldgen (~58% of total runtime).
parse_fragment_file() rebuilds its grammar from scratch on every call
because the conditional parser closure has to capture sdkconfig to
evaluate if/elif/else at parse time.
Add a cache that pickles the parsed FragmentFile object list to
<output>.lfcache. The cache is keyed on fragment file paths+mtimes
plus sdkconfig and kconfig mtimes — sdkconfig is included because
fragment if/elif/else blocks are evaluated against it at parse time.
This avoids the pyparsing cost on rebuilds where ldgen has to run
but the fragment files themselves are unchanged — for example when
a function is added or removed in a source file. Cache hits produce
the same sections.ld as a full parse, and any pickle load failure
falls through to a normal parse, so cache corruption or a Python
upgrade cannot produce a wrong build.
Print "Skipping linker fragment parsing, fragment files unchanged"
on the lf cache hit path so build logs show when the optimization
fired — this makes it easy to diagnose user reports of unexpected
ldgen behavior.
The LDGEN_NO_CACHE environment variable disables the lf cache. When
set, ldgen falls through to a normal parse without consulting or
writing the cache file.
Measured on wifi_station (205 libs, 67 .lf files), median of 10 runs:
before after
rebuild, section change, .lf unchanged 5.82s 1.34s
Signed-off-by: Frantisek Hrbata <frantisek.hrbata@espressif.com>
This commit is contained in:
+78
-9
@@ -8,6 +8,7 @@ import errno
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import pickle
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
@@ -68,6 +69,55 @@ def _save_fingerprint(output_path, fingerprint):
|
||||
pass
|
||||
|
||||
|
||||
def _compute_lf_cache_key(fragment_files, config_file, kconfig_file):
|
||||
"""Compute a cache key for parsed fragment files.
|
||||
|
||||
Keyed on fragment file paths+mtimes plus sdkconfig and kconfig mtimes,
|
||||
because fragment parsing evaluates `if/elif/else` conditional blocks
|
||||
against sdkconfig at parse time — so a sdkconfig change can change the
|
||||
parsed FragmentFile output even if the fragment files themselves are
|
||||
unchanged.
|
||||
"""
|
||||
hasher = hashlib.md5()
|
||||
paths = [p.name if hasattr(p, 'name') else p for p in fragment_files]
|
||||
paths += [p for p in (config_file, kconfig_file) if p]
|
||||
for path in sorted(paths):
|
||||
try:
|
||||
hasher.update(f'{path}:{os.path.getmtime(path)}'.encode())
|
||||
except OSError:
|
||||
return None
|
||||
return hasher.hexdigest()
|
||||
|
||||
|
||||
def _load_lf_cache(cache_path, key):
|
||||
"""Load parsed FragmentFile list from cache if key matches.
|
||||
|
||||
The lf cache is written and read only by ldgen, in the same build
|
||||
directory where sections.ld, compiled object files, and the rest of
|
||||
the build state already live. The trust boundary matches the build
|
||||
system's trust boundary: anyone who can modify <output>.lfcache can
|
||||
also modify sections.ld, *.o, or the toolchain binaries directly.
|
||||
pickle is safe in this context; it is not exposed to untrusted input.
|
||||
"""
|
||||
try:
|
||||
with open(cache_path, 'rb') as f:
|
||||
data = pickle.load(f)
|
||||
if isinstance(data, dict) and data.get('key') == key:
|
||||
return data.get('fragments')
|
||||
except (OSError, pickle.UnpicklingError, EOFError, AttributeError, ImportError, ValueError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _save_lf_cache(cache_path, key, fragments):
|
||||
"""Save parsed FragmentFile list for next run."""
|
||||
try:
|
||||
with open(cache_path, 'wb') as f:
|
||||
pickle.dump({'key': key, 'fragments': fragments}, f, pickle.HIGHEST_PROTOCOL)
|
||||
except (OSError, pickle.PicklingError):
|
||||
pass
|
||||
|
||||
|
||||
def _update_environment(args):
|
||||
env = [(name, value) for (name, value) in (e.split('=', 1) for e in args.env)]
|
||||
for name, value in env:
|
||||
@@ -196,15 +246,34 @@ def main():
|
||||
|
||||
sdkconfig = SDKConfig(kconfig_file, config_file)
|
||||
|
||||
for fragment_file in fragment_files:
|
||||
try:
|
||||
fragment_file = parse_fragment_file(fragment_file, sdkconfig)
|
||||
except (ParseException, ParseFatalException) as e:
|
||||
# ParseException is raised on incorrect grammar
|
||||
# ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
|
||||
# keys, key unsupported by fragment, unexpected number of values, etc.)
|
||||
raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
|
||||
generation_model.add_fragments_from_file(fragment_file)
|
||||
# Try to load parsed fragment files from cache. The lf cache is
|
||||
# complementary to the fingerprint skip above: if we get here, section
|
||||
# names changed, but the fragment files themselves may still be
|
||||
# identical and don't need re-parsing.
|
||||
lf_cache_path = None if no_cache or not output_path else output_path + '.lfcache'
|
||||
lf_cache_key = None if no_cache else _compute_lf_cache_key(fragment_files, config_file, kconfig_file)
|
||||
parsed_fragments = None
|
||||
if lf_cache_path and lf_cache_key:
|
||||
parsed_fragments = _load_lf_cache(lf_cache_path, lf_cache_key)
|
||||
if parsed_fragments is not None:
|
||||
print('Skipping linker fragment parsing, fragment files unchanged')
|
||||
|
||||
if parsed_fragments is None:
|
||||
parsed_fragments = []
|
||||
for fragment_file in fragment_files:
|
||||
try:
|
||||
parsed = parse_fragment_file(fragment_file, sdkconfig)
|
||||
except (ParseException, ParseFatalException) as e:
|
||||
# ParseException is raised on incorrect grammar
|
||||
# ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
|
||||
# keys, key unsupported by fragment, unexpected number of values, etc.)
|
||||
raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
|
||||
parsed_fragments.append(parsed)
|
||||
if lf_cache_path and lf_cache_key:
|
||||
_save_lf_cache(lf_cache_path, lf_cache_key, parsed_fragments)
|
||||
|
||||
for parsed in parsed_fragments:
|
||||
generation_model.add_fragments_from_file(parsed)
|
||||
|
||||
non_contiguous_sram = sdkconfig.evaluate_expression('SOC_MEM_NON_CONTIGUOUS_SRAM')
|
||||
mapping_rules = generation_model.generate(sections_infos, non_contiguous_sram)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD
|
||||
# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
from typing import Any
|
||||
@@ -475,9 +475,16 @@ def parse_fragment_file(path, sdkconfig):
|
||||
fragment = section | scheme | mapping | get_conditional_stmt(section | scheme | mapping)
|
||||
parser = ZeroOrMore(fragment).ignore(comment).set_parse_action(parse)
|
||||
fragment_file = parser.parse_file(path, parse_all=True)[0]
|
||||
fragment_file.path = path
|
||||
# Normalize to a path string — `path` may be a file-like object when
|
||||
# called via the --fragments CLI path, and file objects are not
|
||||
# picklable. Storing the path as a string keeps the parsed FragmentFile
|
||||
# picklable for the lf cache regardless of how ldgen was invoked. The
|
||||
# file object itself is not needed — .path is only read for error
|
||||
# messages, so a plain string identifier is sufficient.
|
||||
path_str = path.name if hasattr(path, 'name') else path
|
||||
fragment_file.path = path_str
|
||||
|
||||
for frag in fragment_file.fragments:
|
||||
frag.path = path
|
||||
frag.path = path_str
|
||||
|
||||
return fragment_file
|
||||
|
||||
@@ -174,6 +174,34 @@ def test_rebuild_ldgen_fingerprint(idf_py: IdfPyFunc, test_app_copy: Path) -> No
|
||||
assert skip_msg not in result.stdout, f'unexpected {skip_msg!r} in build output after fragment change'
|
||||
|
||||
|
||||
def test_rebuild_ldgen_lf_cache(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
|
||||
"""Verify the ldgen lf cache: when section names change but fragment files
|
||||
don't, parsed FragmentFile objects are loaded from cache rather than
|
||||
re-parsed, and an informational message is printed.
|
||||
"""
|
||||
cache_msg = 'Skipping linker fragment parsing, fragment files unchanged'
|
||||
|
||||
logging.info('initial build')
|
||||
idf_py('build')
|
||||
|
||||
logging.info(
|
||||
'adding a new function changes section names but leaves fragment files unchanged - lf cache should hit'
|
||||
)
|
||||
replace_in_file(
|
||||
test_app_copy / 'main' / 'build_test_app.c',
|
||||
'// placeholder_before_main',
|
||||
'void test_ldgen_lf_cache_extra_func(void) {}',
|
||||
)
|
||||
result = idf_py('build')
|
||||
assert cache_msg in result.stdout, f'expected {cache_msg!r} in build output'
|
||||
|
||||
logging.info('touching a fragment file invalidates the lf cache')
|
||||
idf_path = Path(os.environ['IDF_PATH'])
|
||||
(idf_path / 'components/esp_common/common.lf').touch()
|
||||
result = idf_py('build')
|
||||
assert cache_msg not in result.stdout, f'unexpected {cache_msg!r} in build output after fragment change'
|
||||
|
||||
|
||||
@pytest.mark.usefixtures('idf_copy')
|
||||
def test_rebuild_version_change(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
|
||||
idf_path = Path(os.environ['IDF_PATH'])
|
||||
|
||||
Reference in New Issue
Block a user