From 4d7f13c88bd9f7fc0e4085444b4381d5bf62a4ce Mon Sep 17 00:00:00 2001 From: Trevor Elkins Date: Fri, 30 Jan 2026 14:32:33 -0500 Subject: [PATCH] fix: check segment name when matching symbol neighbors --- .../size/symbols/macho_symbol_sizes.py | 108 +++++++++--------- 1 file changed, 53 insertions(+), 55 deletions(-) diff --git a/src/launchpad/size/symbols/macho_symbol_sizes.py b/src/launchpad/size/symbols/macho_symbol_sizes.py index 150ef4d2..173db7a9 100644 --- a/src/launchpad/size/symbols/macho_symbol_sizes.py +++ b/src/launchpad/size/symbols/macho_symbol_sizes.py @@ -3,12 +3,18 @@ from typing import NamedTuple import lief +import sentry_sdk from launchpad.utils.logging import get_logger logger = get_logger(__name__) +def _decode_name(raw_name: str | bytes) -> str: + """Decode a name from bytes if necessary.""" + return raw_name.decode("utf-8", errors="replace") if isinstance(raw_name, bytes) else str(raw_name) + + class _SymbolSizeData(NamedTuple): name: str section_name: str | None @@ -34,19 +40,16 @@ def __init__(self, binary: lief.MachO.Binary) -> None: def get_symbol_sizes(self) -> list[SymbolSize]: """Get the symbol sizes.""" - symbol_data = list(self._symbol_sizes(self.binary)) - - symbol_sizes: list[SymbolSize] = [] - for data in symbol_data: - symbol_sizes.append( - SymbolSize( - mangled_name=data.name, - section_name=data.section_name, - segment_name=data.segment_name, - address=data.address, - size=data.size, - ) + symbol_sizes = [ + SymbolSize( + mangled_name=data.name, + section_name=data.section_name, + segment_name=data.segment_name, + address=data.address, + size=data.size, ) + for data in self._symbol_sizes(self.binary) + ] logger.debug(f"Found {len(symbol_sizes)} symbol sizes") symbol_sizes.sort(key=lambda x: x.size, reverse=True) @@ -78,65 +81,60 @@ def _symbol_sizes(self, bin: lief.MachO.Binary) -> Generator[_SymbolSizeData]: # sort symbols by their address so we can calculate the distance between them syms = sorted((s for s in bin.symbols if self._is_measurable(s)), key=lambda s: s.value) + num_syms = len(syms) + + cached_section = None + cached_section_va = None for idx, sym in enumerate(syms): start = sym.value - section = bin.section_from_virtual_address(start) - if section: - max_section_addr = section.virtual_address + section.size - raw_name = section.name - section_name = ( - raw_name.decode("utf-8", errors="replace") if isinstance(raw_name, bytes) else str(raw_name) - ) - - if section.segment: - raw_seg_name = section.segment.name - segment_name = ( - raw_seg_name.decode("utf-8", errors="replace") - if isinstance(raw_seg_name, bytes) - else str(raw_seg_name) - ) - else: - segment_name = None + if cached_section_va == start: + section = cached_section else: - max_section_addr = None - section_name = None - segment_name = None + section = bin.section_from_virtual_address(start) + + if not section: logger.warning("size.macho.symbol_not_found_in_section", extra={"symbol": sym.name}) + cached_section = None + cached_section_va = None continue - # Only calculate the distance between symbols in the same section - if max_section_addr: - if idx + 1 < len(syms): - next_sym = syms[idx + 1] - next_sym_section = bin.section_from_virtual_address(next_sym.value) - end = ( - next_sym.value - if next_sym_section and next_sym_section.name == section.name - else max_section_addr - ) - else: - end = max_section_addr + max_section_addr = section.virtual_address + section.size + section_name = _decode_name(section.name) + segment_name = _decode_name(section.segment.name) if section.segment else None + + if idx + 1 < num_syms: + next_sym = syms[idx + 1] + next_sym_section = bin.section_from_virtual_address(next_sym.value) + cached_section = next_sym_section + cached_section_va = next_sym.value + same_section = ( + next_sym_section + and next_sym_section.segment + and next_sym_section.segment.name == section.segment.name + and next_sym_section.name == section.name + ) + end = next_sym.value if same_section else max_section_addr else: - end = syms[idx + 1].value + end = max_section_addr + cached_section = None + cached_section_va = None - # Convert virtual addresses to file offsets to calculate the disk size offset_end = bin.virtual_address_to_offset(end) offset_start = bin.virtual_address_to_offset(start) size = 0 if not isinstance(offset_end, lief.lief_errors) and not isinstance(offset_start, lief.lief_errors): raw_size = offset_end - offset_start if raw_size < 0: - logger.warning( - "size.macho.negative_symbol_size", - extra={ - "symbol": sym.name, - "offset_start": offset_start, - "offset_end": offset_end, - "section": section_name, - }, - ) + error_context = { + "symbol": sym.name, + "offset_start": offset_start, + "offset_end": offset_end, + "section": section_name, + } + logger.warning("size.macho.negative_symbol_size", extra=error_context) + sentry_sdk.capture_message("size.macho.negative_symbol_size", level="error", extras=error_context) size = max(0, raw_size) else: logger.warning(f"Failed to calculate size for symbol {sym.name}")