Merge branch 'feat/ldgen_skip_generation' into 'master'

feat(ldgen): skip generation when section names unchanged

Closes IDFGH-17454

See merge request espressif/esp-idf!47278
This commit is contained in:
Frantisek Hrbata
2026-04-20 11:06:07 +02:00
3 changed files with 216 additions and 14 deletions
+146 -10
View File
@@ -1,12 +1,15 @@
#!/usr/bin/env python
#
# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD
# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD
# SPDX-License-Identifier: Apache-2.0
#
import argparse
import errno
import hashlib
import json
import os
import pickle
import re
import subprocess
import sys
import tempfile
@@ -21,6 +24,99 @@ from ldgen.sdkconfig import SDKConfig
from pyparsing import ParseException
from pyparsing import ParseFatalException
_RE_SECTION_NAME = re.compile(r'^\s*\d+\s+(\.\S+)', re.MULTILINE)
def _compute_fingerprint(sections_infos, fragment_files, config_file, kconfig_file):
"""Compute a fingerprint from section names and mtimes of all inputs."""
hasher = hashlib.md5()
# Section names from objdump output
for archive, info in sorted(sections_infos.sections.items()):
names = _RE_SECTION_NAME.findall(info.content)
hasher.update((archive + ':' + ','.join(names)).encode())
# Mtimes of fragment files, sdkconfig, kconfig
input_files = [p.name if hasattr(p, 'name') else p for p in fragment_files]
input_files += [p for p in (config_file, kconfig_file) if p]
for path in input_files:
try:
hasher.update(f'{path}:{os.path.getmtime(path)}'.encode())
except OSError:
return None
return hasher.hexdigest()
def _can_skip_generation(output_path, fingerprint):
"""Check if fingerprint matches cached value from previous run."""
try:
with open(output_path + '.fingerprint') as f:
if f.read().strip() == fingerprint:
os.utime(output_path, None)
return True
except OSError:
pass
return False
def _save_fingerprint(output_path, fingerprint):
"""Save fingerprint for next run."""
try:
with open(output_path + '.fingerprint', 'w') as f:
f.write(fingerprint)
except OSError:
pass
def _compute_lf_cache_key(fragment_files, config_file, kconfig_file):
"""Compute a cache key for parsed fragment files.
Keyed on fragment file paths+mtimes plus sdkconfig and kconfig mtimes,
because fragment parsing evaluates `if/elif/else` conditional blocks
against sdkconfig at parse time — so a sdkconfig change can change the
parsed FragmentFile output even if the fragment files themselves are
unchanged.
"""
hasher = hashlib.md5()
paths = [p.name if hasattr(p, 'name') else p for p in fragment_files]
paths += [p for p in (config_file, kconfig_file) if p]
for path in sorted(paths):
try:
hasher.update(f'{path}:{os.path.getmtime(path)}'.encode())
except OSError:
return None
return hasher.hexdigest()
def _load_lf_cache(cache_path, key):
"""Load parsed FragmentFile list from cache if key matches.
The lf cache is written and read only by ldgen, in the same build
directory where sections.ld, compiled object files, and the rest of
the build state already live. The trust boundary matches the build
system's trust boundary: anyone who can modify <output>.lfcache can
also modify sections.ld, *.o, or the toolchain binaries directly.
pickle is safe in this context; it is not exposed to untrusted input.
"""
try:
with open(cache_path, 'rb') as f:
data = pickle.load(f)
if isinstance(data, dict) and data.get('key') == key:
return data.get('fragments')
except (OSError, pickle.UnpicklingError, EOFError, AttributeError, ImportError, ValueError):
pass
return None
def _save_lf_cache(cache_path, key, fragments):
"""Save parsed FragmentFile list for next run."""
try:
with open(cache_path, 'wb') as f:
pickle.dump({'key': key, 'fragments': fragments}, f, pickle.HIGHEST_PROTOCOL)
except (OSError, pickle.PicklingError):
pass
def _update_environment(args):
env = [(name, value) for (name, value) in (e.split('=', 1) for e in args.env)]
@@ -114,6 +210,10 @@ def main():
else:
check_mapping_exceptions = None
no_cache = os.environ.get('LDGEN_NO_CACHE') == '1'
if no_cache:
print('Linker script generation caches disabled by LDGEN_NO_CACHE')
try:
sections_infos = EntityDB()
for library in libraries_file:
@@ -125,6 +225,20 @@ def main():
dump.name = library
sections_infos.add_sections_info(dump)
# Check if we can skip generation entirely — section names and other
# inputs unchanged since last run.
fingerprint = (
None if no_cache else _compute_fingerprint(sections_infos, fragment_files, config_file, kconfig_file)
)
if (
output_path
and fingerprint
and os.path.exists(output_path)
and _can_skip_generation(output_path, fingerprint)
):
print('Skipping linker script generation, section names unchanged')
sys.exit(0)
mutable_libs = [lib.strip() for lib in mutable_libraries_file]
generation_model = Generation(check_mapping, check_mapping_exceptions, mutable_libs, args.debug)
@@ -132,15 +246,34 @@ def main():
sdkconfig = SDKConfig(kconfig_file, config_file)
for fragment_file in fragment_files:
try:
fragment_file = parse_fragment_file(fragment_file, sdkconfig)
except (ParseException, ParseFatalException) as e:
# ParseException is raised on incorrect grammar
# ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
# keys, key unsupported by fragment, unexpected number of values, etc.)
raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
generation_model.add_fragments_from_file(fragment_file)
# Try to load parsed fragment files from cache. The lf cache is
# complementary to the fingerprint skip above: if we get here, section
# names changed, but the fragment files themselves may still be
# identical and don't need re-parsing.
lf_cache_path = None if no_cache or not output_path else output_path + '.lfcache'
lf_cache_key = None if no_cache else _compute_lf_cache_key(fragment_files, config_file, kconfig_file)
parsed_fragments = None
if lf_cache_path and lf_cache_key:
parsed_fragments = _load_lf_cache(lf_cache_path, lf_cache_key)
if parsed_fragments is not None:
print('Skipping linker fragment parsing, fragment files unchanged')
if parsed_fragments is None:
parsed_fragments = []
for fragment_file in fragment_files:
try:
parsed = parse_fragment_file(fragment_file, sdkconfig)
except (ParseException, ParseFatalException) as e:
# ParseException is raised on incorrect grammar
# ParseFatalException is raised on correct grammar, but inconsistent contents (ex. duplicate
# keys, key unsupported by fragment, unexpected number of values, etc.)
raise LdGenFailure(f'failed to parse {fragment_file}\n{e}')
parsed_fragments.append(parsed)
if lf_cache_path and lf_cache_key:
_save_lf_cache(lf_cache_path, lf_cache_key, parsed_fragments)
for parsed in parsed_fragments:
generation_model.add_fragments_from_file(parsed)
non_contiguous_sram = sdkconfig.evaluate_expression('SOC_MEM_NON_CONTIGUOUS_SRAM')
mapping_rules = generation_model.generate(sections_infos, non_contiguous_sram)
@@ -163,6 +296,9 @@ def main():
output_path, 'w', encoding='utf-8'
) as f: # only create output file after generation has succeeded
f.write(output.read())
if output_path and fingerprint:
_save_fingerprint(output_path, fingerprint)
except LdGenFailure as e:
print(f'linker script generation failed for {input_file.name}\nERROR: {e}')
sys.exit(1)
+10 -3
View File
@@ -1,5 +1,5 @@
#
# SPDX-FileCopyrightText: 2021-2025 Espressif Systems (Shanghai) CO LTD
# SPDX-FileCopyrightText: 2021-2026 Espressif Systems (Shanghai) CO LTD
# SPDX-License-Identifier: Apache-2.0
#
from typing import Any
@@ -475,9 +475,16 @@ def parse_fragment_file(path, sdkconfig):
fragment = section | scheme | mapping | get_conditional_stmt(section | scheme | mapping)
parser = ZeroOrMore(fragment).ignore(comment).set_parse_action(parse)
fragment_file = parser.parse_file(path, parse_all=True)[0]
fragment_file.path = path
# Normalize to a path string — `path` may be a file-like object when
# called via the --fragments CLI path, and file objects are not
# picklable. Storing the path as a string keeps the parsed FragmentFile
# picklable for the lf cache regardless of how ldgen was invoked. The
# file object itself is not needed — .path is only read for error
# messages, so a plain string identifier is sufficient.
path_str = path.name if hasattr(path, 'name') else path
fragment_file.path = path_str
for frag in fragment_file.fragments:
frag.path = path
frag.path = path_str
return fragment_file
+60 -1
View File
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: 2022-2025 Espressif Systems (Shanghai) CO LTD
# SPDX-FileCopyrightText: 2022-2026 Espressif Systems (Shanghai) CO LTD
# SPDX-License-Identifier: Apache-2.0
# These tests check whether the build system rebuilds some files or not
# depending on the changes to the project.
@@ -143,6 +143,65 @@ def test_rebuild_linker(idf_py: IdfPyFunc) -> None:
rebuild_and_check(idf_py, APP_BINS, BOOTLOADER_BINS + PARTITION_BIN)
def test_rebuild_ldgen_fingerprint(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
"""Verify the ldgen fingerprint skip path: when section names and other
inputs are unchanged, ldgen exits early without regenerating sections.ld
and prints an informational message.
"""
skip_msg = 'Skipping linker script generation, section names unchanged'
app_c = test_app_copy / 'main' / 'build_test_app.c'
# Seed app_main with a printf so the first build establishes the string
# literal and its .rodata section. Later we only change the arithmetic
# constant, which modifies .text.app_main contents but keeps section
# names unchanged — exactly the case the fingerprint should optimize.
replace_in_file(app_c, '// placeholder_inside_main', 'printf("value = %d\\n", 1 + 2);')
logging.info('initial build')
idf_py('build')
logging.info(
'changing the printed calculation modifies .text.app_main but keeps all section names - fingerprint should hit'
)
replace_in_file(app_c, '1 + 2', '3 + 4')
result = idf_py('build')
assert skip_msg in result.stdout, f'expected {skip_msg!r} in build output'
logging.info('touching a fragment file invalidates the fingerprint')
idf_path = Path(os.environ['IDF_PATH'])
(idf_path / 'components/esp_common/common.lf').touch()
result = idf_py('build')
assert skip_msg not in result.stdout, f'unexpected {skip_msg!r} in build output after fragment change'
def test_rebuild_ldgen_lf_cache(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
"""Verify the ldgen lf cache: when section names change but fragment files
don't, parsed FragmentFile objects are loaded from cache rather than
re-parsed, and an informational message is printed.
"""
cache_msg = 'Skipping linker fragment parsing, fragment files unchanged'
logging.info('initial build')
idf_py('build')
logging.info(
'adding a new function changes section names but leaves fragment files unchanged - lf cache should hit'
)
replace_in_file(
test_app_copy / 'main' / 'build_test_app.c',
'// placeholder_before_main',
'void test_ldgen_lf_cache_extra_func(void) {}',
)
result = idf_py('build')
assert cache_msg in result.stdout, f'expected {cache_msg!r} in build output'
logging.info('touching a fragment file invalidates the lf cache')
idf_path = Path(os.environ['IDF_PATH'])
(idf_path / 'components/esp_common/common.lf').touch()
result = idf_py('build')
assert cache_msg not in result.stdout, f'unexpected {cache_msg!r} in build output after fragment change'
@pytest.mark.usefixtures('idf_copy')
def test_rebuild_version_change(idf_py: IdfPyFunc, test_app_copy: Path) -> None:
idf_path = Path(os.environ['IDF_PATH'])