linker_diff/
lib.rs

1//! This crate finds differences between two ELF files. It's intended use is where the files were
2//! produced by different linkers, or different versions of the same linker. So the input files
3//! should be the same except for where the linkers make different decisions such as layout.
4//!
5//! Because the intended use is verifying the correct functioning of linkers, the focus is on
6//! avoiding false positives rather than avoiding false negatives. i.e. we'd much rather fail to
7//! report a difference than report a difference that doesn't matter. Ideally a reported difference
8//! should indicate a bug or missing feature of the linker.
9//!
10//! Right now, performance of this library is not a priority, so there's quite a bit of heap
11//! allocation going on that with a little work could be avoided. If we end up using this library as
12//! part of a fuzzer this may need to be optimised.
13
14#![allow(clippy::too_many_arguments)]
15
16use anyhow::Context as _;
17use anyhow::bail;
18use asm_diff::AddressIndex;
19use clap::Parser;
20use clap::ValueEnum;
21use itertools::Itertools as _;
22#[allow(clippy::wildcard_imports)]
23use linker_utils::elf::secnames::*;
24use object::LittleEndian;
25use object::Object as _;
26use object::ObjectSection;
27use object::ObjectSymbol as _;
28use object::read::elf::ElfSection64;
29use section_map::IndexedLayout;
30use section_map::LayoutAndFiles;
31use std::collections::HashMap;
32use std::fmt::Display;
33use std::path::Path;
34use std::path::PathBuf;
35
36mod aarch64;
37mod arch;
38mod asm_diff;
39mod debug_info_diff;
40mod diagnostics;
41mod eh_frame_diff;
42mod gnu_hash;
43mod header_diff;
44mod init_order;
45pub(crate) mod section_map;
46mod segment;
47mod symbol_diff;
48mod symtab;
49mod trace;
50mod version_diff;
51mod x86_64;
52
53type Result<T = (), E = anyhow::Error> = core::result::Result<T, E>;
54type ElfFile64<'data> = object::read::elf::ElfFile64<'data, LittleEndian>;
55type ElfSymbol64<'data, 'file> = object::read::elf::ElfSymbol64<'data, 'file, LittleEndian>;
56
57use arch::Arch;
58use arch::ArchKind;
59use colored::Colorize;
60pub use diagnostics::enable_diagnostics;
61use section_map::InputSectionId;
62use section_map::OwnedFileIdentifier;
63
64#[non_exhaustive]
65#[derive(Parser, Default, Clone)]
66pub struct Config {
67    /// Keys to ignore.
68    #[arg(long, value_delimiter = ',')]
69    pub ignore: Vec<String>,
70
71    /// Show only the specified keys.
72    #[arg(long, value_delimiter = ',')]
73    pub only: Vec<String>,
74
75    /// Treat the sections with the specified names as equivalent. e.g. ".got.plt=.got"
76    #[arg(long, value_delimiter = ',', value_parser = parse_string_equality)]
77    pub equiv: Vec<(String, String)>,
78
79    /// Apply defaults for things that should be ignored currently for Wild. These defaults are
80    /// subject to change as Wild changes.
81    #[arg(long)]
82    pub wild_defaults: bool,
83
84    /// Print information about what sections did and didn't get diffed.
85    #[arg(long)]
86    pub coverage: bool,
87
88    /// Display names for input files.
89    #[arg(long, value_delimiter = ',', value_name = "NAME,NAME...")]
90    pub display_names: Vec<String>,
91
92    /// Files to compare against
93    #[arg(long = "ref", value_name = "FILE")]
94    pub references: Vec<PathBuf>,
95
96    #[arg(long, alias = "color", default_value = "auto")]
97    pub colour: Colour,
98
99    /// Primary file that we're validating against the reference file(s)
100    pub file: PathBuf,
101}
102
103#[derive(ValueEnum, Copy, Clone, Default)]
104pub enum Colour {
105    #[default]
106    Auto,
107    Never,
108    Always,
109}
110
111/// An output binary such as an executable or shared object.
112pub struct Binary<'data> {
113    name: String,
114    path: PathBuf,
115    elf_file: &'data ElfFile64<'data>,
116    address_index: AddressIndex<'data>,
117    name_index: NameIndex<'data>,
118    indexed_layout: Option<IndexedLayout<'data>>,
119    trace: trace::Trace,
120    sections_by_name: HashMap<&'data [u8], SectionInfo>,
121}
122
123#[derive(Clone, Copy)]
124struct SectionInfo {
125    index: object::SectionIndex,
126    size: u64,
127}
128
129struct NameIndex<'data> {
130    globals_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
131    locals_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
132    dynamic_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
133}
134
135impl Config {
136    #[must_use]
137    pub fn from_env() -> Self {
138        Self::parse()
139    }
140
141    fn apply_wild_defaults(&mut self, arch: ArchKind) {
142        self.ignore.extend(
143            [
144                // We don't currently support allocating space except in sections, so we have sections
145                // to hold the section and program headers. We then need to ignore them because GNU ld
146                // doesn't define such sections.
147                "section.shdr",
148                "section.phdr",
149                // We don't yet support these sections.
150                "section.data.rel.ro",
151                "section.stapsdt.base",
152                "section.note.gnu.build-id",
153                "section.note.gnu.property",
154                "section.note.stapsdt",
155                "section.hash",
156                // We set this to 8. GNU ld sometimes does too, but sometimes to 0.
157                "section.got.entsize",
158                "section.plt.got.entsize",
159                "section.plt.entsize",
160                // GNU ld sometimes sets this differently that we do.
161                "section.plt",
162                "section.plt.alignment",
163                "section.bss.alignment",
164                "section.gnu.build.attributes",
165                "section.annobin.notes.entsize",
166                // We don't yet group .lrodata sections separately.
167                "section.lrodata",
168                // We sometimes eliminate __tls_get_addr where GNU ld doesn't. This can mean that we
169                // have no versioned symbols for ld-linux-x86-64.so.2 or equivalent, which means we
170                // end up with one less version record.
171                ".dynamic.DT_VERNEEDNUM",
172                // We currently handle these dynamic tags differently
173                ".dynamic.DT_JMPREL",
174                ".dynamic.DT_PLTGOT",
175                ".dynamic.DT_PLTREL",
176                // We currently produce a .got.plt whenever we produce .plt, but GNU ld doesn't
177                "section.got.plt",
178                GOT_PLT_SECTION_NAME_STR,
179                // We don't currently produce a separate .plt.sec section.
180                "section.plt.sec",
181                // We don't yet write this.
182                ".dynamic.DT_HASH",
183                // aarch64-linux-gnu-ld on arch linux emits DT_BIND_NOW instead of DT_FLAGS.BIND_NOW
184                ".dynamic.DT_BIND_NOW",
185                ".dynamic.DT_FLAGS.BIND_NOW",
186                // TODO: Implement proper ordering of .init .ctors etc
187                "init_array",
188                "fini_array",
189                // When GNU ld encounters a GOT-forming reference to an ifunc, it generates a
190                // canonical PLT entry and points the GOT at that. This means that it ends up with
191                // GOT->PLT->GOT. We don't as yet support doing this.
192                "rel.missing-got-plt-got",
193                // We do support this. TODO: Should definitely look into why we're seeing this missing
194                // in our output.
195                "section.rela.plt",
196                // We currently write 10 byte PLT entries in some cases where GNU ld writes 8 byte ones.
197                "section.plt.got.alignment",
198                // GNU ld sometimes makes this writable sometimes not. Presumably this depends on
199                // whether there are relocations or some flags.
200                "section.eh_frame.flags",
201                // A package note section used by Ubuntu: https://systemd.io/ELF_PACKAGE_METADATA/
202                "section.note.package",
203                // TLSDESC relaxations aren't yet implemented.
204                "rel.match_failed.R_X86_64_GOTPC32_TLSDESC",
205                "rel.missing-opt.R_X86_64_TLSDESC_CALL.SkipTlsDescCall.*",
206                // Wild eliminates GOTPCRELX in statically linked executables even for undefined
207                // symbols, whereas other linkers don't. This is a valid optimisation that other
208                // linkers don't currently do.
209                "rel.extra-opt.R_X86_64_GOTPCRELX.CallIndirectToRelative.static-*",
210                // We don't yet support emitting warnings.
211                "section.gnu.warning",
212                // GNU ld sometimes applies relaxations that we don't yet.
213                "rel.match_failed.R_AARCH64_TLSDESC_LD64_LO12",
214                "rel.match_failed.R_AARCH64_TLSGD_ADD_LO12_NC",
215                "rel.missing-opt.R_X86_64_TLSGD.TlsGdToInitialExec.shared-object",
216                // GNU ld sometimes relaxes an adrp instruction to an adr instruction when the
217                // address is known and within +/-1MB. We don't as yet.
218                "rel.missing-opt.R_AARCH64_ADR_GOT_PAGE.AdrpToAdr.*",
219                "rel.missing-opt.R_AARCH64_ADR_PREL_PG_HI21.AdrpToAdr.*",
220                // The other linkers set properties on sections if all input sections have that
221                // property. For sections like .rodata, this seems like an unimportant behaviour to
222                // replicate.
223                "section.rodata.entsize",
224                "section.rodata.flags",
225                // We emit dynamic relocations for direct references to undefined weak symbols that
226                // might be provided at runtime as well as GOT entries for indirect references. GNU
227                // ld and lld only emit the GOT entries and leave direct references as null. Our
228                // behaviour seems more consistent with the description of
229                // `-zdynamic-undefined-weak`.
230                "rel.undefined-weak.dynamic.R_X86_64_64",
231                "rel.undefined-weak.dynamic.R_AARCH64_ABS64",
232                // On aarch64, GNU ld, at least sometimes, converts R_AARCH64_ABS64 to a PLT-forming
233                // relocation. We at present, don't.
234                "rel.dynamic-plt-bypass",
235                // If we don't optimise a TLS access, then we'll have references to __tls_get_addr,
236                // when GNU ld doesn't.
237                "dynsym.__tls_get_addr.*",
238                // GNU ld emits two segments, whereas wild emits only a single segment.
239                "segment.LOAD.R.*",
240                // We haven't provided an implementation that is compatible with existing linkers.
241                "segment.PT_NOTE.*",
242                "segment.PT_INTERP.*",
243                "segment.PT_PHDR.*",
244                "segment.PT_GNU_RELRO.*",
245                "segment.PT_GNU_STACK.*",
246                "segment.PT_GNU_PROPERTY.*",
247            ]
248            .into_iter()
249            .map(ToOwned::to_owned),
250        );
251
252        if arch == ArchKind::Aarch64 {
253            self.ignore.extend(
254                [
255                    // Other linkers have a bigger initial PLT entry, thus the entsize is set to zero:
256                    // https://sourceware.org/bugzilla/show_bug.cgi?id=26312
257                    "section.plt.entsize",
258                ]
259                .into_iter()
260                .map(ToOwned::to_owned),
261            );
262        }
263
264        self.equiv.push((
265            GOT_SECTION_NAME_STR.to_owned(),
266            GOT_PLT_SECTION_NAME_STR.to_owned(),
267        ));
268        // We don't currently define .plt.got and .plt.sec, we just put everything into .plt.
269        self.equiv.push((
270            PLT_SECTION_NAME_STR.to_owned(),
271            PLT_GOT_SECTION_NAME_STR.to_owned(),
272        ));
273        self.equiv.push((
274            PLT_SECTION_NAME_STR.to_owned(),
275            PLT_SEC_SECTION_NAME_STR.to_owned(),
276        ));
277    }
278
279    #[must_use]
280    pub fn to_arg_string(&self) -> String {
281        let mut out = String::new();
282        if self.wild_defaults {
283            out.push_str("--wild-defaults ");
284        }
285        if !self.ignore.is_empty() {
286            out.push_str("--ignore '");
287            out.push_str(&self.ignore.join(","));
288            out.push_str("' ");
289        }
290        if !self.equiv.is_empty() {
291            out.push_str("--equiv '");
292            let parts = self
293                .equiv
294                .iter()
295                .map(|(k, v)| format!("{k}={v}"))
296                .collect_vec();
297            out.push_str(&parts.join(","));
298            out.push_str("' ");
299        }
300        if !self.display_names.is_empty() {
301            out.push_str("--display-names ");
302            out.push_str(&self.display_names.join(","));
303            out.push(' ');
304        }
305        for file in &self.references {
306            out.push_str("--ref ");
307            out.push_str(&file.to_string_lossy());
308            out.push(' ');
309        }
310        out.push_str(&self.file.to_string_lossy());
311        out
312    }
313
314    fn filenames(&self) -> impl Iterator<Item = &PathBuf> {
315        // We always put our file first, since it makes it easier to treat it differently. e.g. when
316        // we compare a value from our file against each of the values from the other files.
317        std::iter::once(&self.file).chain(&self.references)
318    }
319}
320
321impl<'data> Binary<'data> {
322    pub(crate) fn new(
323        elf_file: &'data ElfFile64<'data>,
324        name: String,
325        path: PathBuf,
326        layout_and_files: Option<&'data LayoutAndFiles>,
327    ) -> Result<Self> {
328        let address_index = AddressIndex::new(elf_file);
329        let indexed_layout = layout_and_files.map(IndexedLayout::new).transpose()?;
330        let trace = trace::Trace::for_path(&path)?;
331
332        let sections_by_name = elf_file
333            .sections()
334            .map(|section| {
335                Ok((
336                    section.name_bytes()?,
337                    SectionInfo {
338                        index: section.index(),
339                        size: section.size(),
340                    },
341                ))
342            })
343            .collect::<Result<HashMap<&[u8], SectionInfo>>>()?;
344
345        Ok(Self {
346            name,
347            elf_file,
348            path,
349            address_index,
350            name_index: NameIndex::new(elf_file),
351            indexed_layout,
352            trace,
353            sections_by_name,
354        })
355    }
356
357    /// Looks up a symbol, first trying to get a global, or failing that a local. If multiple
358    /// symbols have the same name, then `hint_address` is used to select which one to return.
359    pub(crate) fn symbol_by_name(&self, name: &[u8], hint_address: u64) -> NameLookupResult {
360        match self.lookup_symbol(&self.name_index.globals_by_name, name, hint_address) {
361            NameLookupResult::Undefined => {
362                self.lookup_symbol(&self.name_index.locals_by_name, name, hint_address)
363            }
364            other => other,
365        }
366    }
367
368    fn lookup_symbol(
369        &self,
370        symbol_map: &HashMap<&[u8], Vec<object::SymbolIndex>>,
371        name: &[u8],
372        hint_address: u64,
373    ) -> NameLookupResult {
374        let indexes = symbol_map.get(name).map(Vec::as_slice).unwrap_or_default();
375
376        if indexes.len() >= 2 {
377            for sym_index in indexes {
378                if let Ok(sym) = self.elf_file.symbol_by_index(*sym_index) {
379                    if sym.address() == hint_address {
380                        return NameLookupResult::Defined(sym);
381                    }
382                }
383            }
384
385            // We didn't find a symbol with exactly the address hinted at.
386            return NameLookupResult::Duplicate;
387        }
388
389        if let Some(symbol_index) = indexes.first() {
390            if let Ok(sym) = self.elf_file.symbol_by_index(*symbol_index) {
391                NameLookupResult::Defined(sym)
392            } else {
393                NameLookupResult::Undefined
394            }
395        } else {
396            NameLookupResult::Undefined
397        }
398    }
399
400    fn section_by_name(&self, name: &str) -> Option<ElfSection64<LittleEndian>> {
401        self.section_by_name_bytes(name.as_bytes())
402    }
403
404    fn section_by_name_bytes(&self, name: &[u8]) -> Option<ElfSection64<LittleEndian>> {
405        let index = self.sections_by_name.get(name)?.index;
406        self.elf_file.section_by_index(index).ok()
407    }
408
409    fn section_containing_address(&self, address: u64) -> Option<ElfSection64<LittleEndian>> {
410        self.elf_file
411            .sections()
412            .find(|sec| (sec.address()..sec.address() + sec.size()).contains(&address))
413    }
414
415    /// Returns the name of the section that contains the supplied address. Does a linear scan, so
416    /// should only be used for error reporting.
417    fn section_name_containing_address(&self, address: u64) -> Option<&str> {
418        self.section_containing_address(address)
419            .and_then(|sec| sec.name().ok())
420    }
421}
422
423#[derive(Debug)]
424enum NameLookupResult<'data, 'file> {
425    Undefined,
426    Duplicate,
427    Defined(ElfSymbol64<'data, 'file>),
428}
429
430fn validate_objects(
431    report: &mut Report,
432    objects: &[Binary],
433    validation_name: &str,
434    validation_fn: impl Fn(&Binary) -> Result,
435) {
436    let values = objects
437        .iter()
438        .map(|obj| match validation_fn(obj) {
439            Ok(_) => "OK".to_owned(),
440            Err(e) => e.to_string(),
441        })
442        .collect_vec();
443    if first_equals_any(values.iter()) {
444        return;
445    }
446    report.add_diff(Diff {
447        key: validation_name.to_owned(),
448        values: DiffValues::PerObject(values),
449    });
450}
451
452pub struct Report {
453    /// The names of each of our binaries. These should be short, not a full path, since we often
454    /// prefix lines with these names.
455    names: Vec<String>,
456
457    /// The full path of each of our binaries.
458    paths: Vec<PathBuf>,
459
460    /// The differences that were detected.
461    diffs: Vec<Diff>,
462
463    /// The configuration that was used.
464    config: Config,
465
466    pub coverage: Option<Coverage>,
467}
468
469#[derive(Default)]
470pub struct Coverage {
471    sections: HashMap<InputSectionId, SectionCoverage>,
472}
473
474struct SectionCoverage {
475    /// The original input file from which the section came.
476    original_file: OwnedFileIdentifier,
477
478    /// The name of the section.
479    name: String,
480
481    /// Whether we diffed this section at all.
482    diffed: bool,
483
484    /// The size of the section in bytes.
485    num_bytes: u64,
486}
487
488impl Report {
489    pub fn from_config(mut config: Config) -> Result<Report> {
490        // This changes mutable global state, which isn't an ideal thing to be doing from a library.
491        // It's expedient though, and we don't really expect linker-diff to get used as a library
492        // anywhere except the linker-diff binary and wild's integration tests, so this probably
493        // isn't a big deal.
494        match config.colour {
495            Colour::Auto => colored::control::unset_override(),
496            Colour::Never => colored::control::set_override(false),
497            Colour::Always => colored::control::set_override(true),
498        }
499
500        let display_names = short_file_display_names(&config)?;
501
502        let file_bytes = config
503            .filenames()
504            .map(|filename| -> Result<Vec<u8>> {
505                let bytes = std::fs::read(filename)
506                    .with_context(|| format!("Failed to read `{}`", filename.display()))?;
507                Ok(bytes)
508            })
509            .collect::<Result<Vec<Vec<u8>>>>()?;
510
511        let elf_files = file_bytes
512            .iter()
513            .map(|bytes| -> Result<ElfFile64> { Ok(ElfFile64::parse(bytes.as_slice())?) })
514            .collect::<Result<Vec<_>>>()?;
515
516        let layouts = config
517            .filenames()
518            .map(|p| LayoutAndFiles::from_base_path(p))
519            .collect::<Result<Vec<_>>>()?;
520
521        let objects = elf_files
522            .iter()
523            .zip(display_names)
524            .zip(config.filenames())
525            .zip(&layouts)
526            .map(|(((elf_file, name), path), layout)| -> Result<Binary> {
527                Binary::new(elf_file, name, path.clone(), layout.as_ref())
528            })
529            .collect::<Result<Vec<_>>>()?;
530
531        let arch = ArchKind::from_objects(&objects)?;
532
533        if config.wild_defaults {
534            config.apply_wild_defaults(arch);
535        }
536
537        let mut report = Report {
538            names: objects.iter().map(|o| o.name.clone()).collect(),
539            paths: objects.iter().map(|o| o.path.clone()).collect(),
540            diffs: Default::default(),
541            coverage: config.coverage.then(Coverage::default),
542            config,
543        };
544
545        report.run_on_objects(&objects, arch);
546
547        Ok(report)
548    }
549
550    fn run_on_objects(&mut self, objects: &[Binary], arch: ArchKind) {
551        validate_objects(
552            self,
553            objects,
554            GNU_HASH_SECTION_NAME_STR,
555            gnu_hash::check_object,
556        );
557        validate_objects(self, objects, "index", asm_diff::validate_indexes);
558        validate_objects(
559            self,
560            objects,
561            GOT_PLT_SECTION_NAME_STR,
562            asm_diff::validate_got_plt,
563        );
564        validate_objects(
565            self,
566            objects,
567            SYMTAB_SECTION_NAME_STR,
568            symtab::validate_debug,
569        );
570        validate_objects(
571            self,
572            objects,
573            DYNSYM_SECTION_NAME_STR,
574            symtab::validate_dynamic,
575        );
576        header_diff::check_dynamic_headers(self, objects);
577        header_diff::check_file_headers(self, objects);
578        header_diff::report_section_diffs(self, objects);
579        eh_frame_diff::report_diffs(self, objects);
580        version_diff::report_diffs(self, objects);
581        debug_info_diff::check_debug_info(self, objects);
582        symbol_diff::report_diffs(self, objects);
583        segment::report_diffs(self, objects);
584
585        match arch {
586            ArchKind::X86_64 => {
587                self.report_arch_specific_diffs::<crate::x86_64::X86_64>(objects);
588            }
589            ArchKind::Aarch64 => {
590                self.report_arch_specific_diffs::<crate::aarch64::AArch64>(objects);
591            }
592        }
593    }
594
595    fn report_arch_specific_diffs<A: Arch>(&mut self, binaries: &[Binary]) {
596        asm_diff::report_section_diffs::<A>(self, binaries);
597        init_order::report_diffs::<A>(self, binaries);
598    }
599
600    fn add_diff(&mut self, diff: Diff) {
601        if self.should_ignore(&diff.key) {
602            return;
603        }
604        self.diffs.push(diff);
605    }
606
607    fn add_diffs(&mut self, new_diffs: Vec<Diff>) {
608        for diff in new_diffs {
609            self.add_diff(diff);
610        }
611    }
612
613    #[must_use]
614    pub fn has_problems(&self) -> bool {
615        !self.diffs.is_empty()
616    }
617
618    fn should_ignore(&self, key: &str) -> bool {
619        if !self.config.only.is_empty() {
620            return !self.config.only.iter().any(|i| {
621                if let Some(prefix) = i.strip_suffix('*') {
622                    key.starts_with(prefix)
623                } else {
624                    key == *i
625                }
626            });
627        }
628        self.config.ignore.iter().any(|i| {
629            if let Some(prefix) = i.strip_suffix('*') {
630                key.starts_with(prefix)
631            } else {
632                key == *i
633            }
634        })
635    }
636
637    fn add_error(&mut self, error: impl Into<String>) {
638        self.diffs.push(Diff {
639            key: "error".to_owned(),
640            values: DiffValues::PreFormatted(error.into()),
641        });
642    }
643}
644
645struct Diff {
646    key: String,
647    values: DiffValues,
648}
649
650enum DiffValues {
651    PerObject(Vec<String>),
652    PreFormatted(String),
653}
654
655impl Display for Report {
656    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
657        for (name, path) in self.names.iter().zip(&self.paths) {
658            writeln!(f, "{name}: {}", path.display())?;
659        }
660
661        for diff in &self.diffs {
662            writeln!(f, "{}", diff.key)?;
663
664            match &diff.values {
665                DiffValues::PerObject(values) => {
666                    for (filename, result) in self.names.iter().zip(values) {
667                        writeln!(f, "  {filename} {result}")?;
668                    }
669                }
670                DiffValues::PreFormatted(values) => {
671                    for line in values.lines() {
672                        writeln!(f, "  {line}")?;
673                    }
674                }
675            }
676
677            writeln!(f)?;
678        }
679
680        Ok(())
681    }
682}
683
684impl Display for Binary<'_> {
685    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
686        self.name.fmt(f)
687    }
688}
689
690impl Display for Coverage {
691    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
692        writeln!(f, "Diffed sections:")?;
693
694        let mut total_bytes = 0;
695        let mut total_diffed = 0;
696
697        for sec in self.sections.values() {
698            writeln!(
699                f,
700                "  {} {}: {}",
701                sec.original_file,
702                sec.name,
703                if sec.diffed {
704                    "true".green()
705                } else {
706                    "false".red()
707                }
708            )?;
709
710            if sec.diffed {
711                total_diffed += sec.num_bytes;
712            }
713
714            total_bytes += sec.num_bytes;
715        }
716
717        writeln!(
718            f,
719            "Diffed {total_diffed} of {total_bytes} section bytes ({}%)",
720            total_diffed * 100 / total_bytes
721        )?;
722
723        Ok(())
724    }
725}
726
727fn short_file_display_names(config: &Config) -> Result<Vec<String>> {
728    let paths: Vec<&PathBuf> = config.filenames().collect();
729    if !config.display_names.is_empty() {
730        if config.display_names.len() != paths.len() {
731            bail!(
732                "--display-names has {} names, but {} filenames were provided",
733                config.display_names.len(),
734                paths.len()
735            );
736        }
737        return Ok(config.display_names.clone());
738    }
739    if paths.is_empty() {
740        return Ok(vec![]);
741    }
742    let mut names = paths
743        .iter()
744        .map(|p| p.to_string_lossy().into_owned())
745        .collect_vec();
746    if names.iter().all(|name| {
747        Path::new(name)
748            .extension()
749            .is_some_and(|ext| ext.eq_ignore_ascii_case("so"))
750    }) {
751        names = names
752            .into_iter()
753            .map(|n| n.strip_suffix(".so").unwrap().to_owned())
754            .collect();
755    }
756
757    if names.len() > 1 {
758        // This is not quite right, since we might split in the middle of a multibyte character.
759        // But this is a dev tool, so we'll punt on that for now.
760        let mut iterators = names.iter().map(|n| n.bytes()).collect_vec();
761        let mut n = 0;
762        while first_equals_all(iterators.iter_mut().map(Iterator::next)) {
763            n += 1;
764        }
765        names = names
766            .iter()
767            .map(|name| String::from_utf8_lossy(&name.bytes().skip(n).collect_vec()).into_owned())
768            .collect_vec();
769    }
770    Ok(names)
771}
772
773fn first_equals_all<T: PartialEq>(mut inputs: impl Iterator<Item = T>) -> bool {
774    let Some(first) = inputs.next() else {
775        return true;
776    };
777    for next in inputs {
778        if next != first {
779            return false;
780        }
781    }
782    true
783}
784
785/// Returns whether the first input is equal to at least one of the remaining values.
786fn first_equals_any<T: PartialEq>(mut inputs: impl Iterator<Item = T>) -> bool {
787    let Some(first) = inputs.next() else {
788        return true;
789    };
790    for next in inputs {
791        if next == first {
792            return true;
793        }
794    }
795    false
796}
797
798impl<'data> NameIndex<'data> {
799    fn new(elf_file: &ElfFile64<'data>) -> NameIndex<'data> {
800        let mut globals_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
801        let mut locals_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
802        let mut dynamic_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
803
804        for sym in elf_file.symbols() {
805            // We only index symbols that have a section. Note this is different than the object
806            // crate's `is_defined`, which imposes additional requirements that we don't want.
807            if sym.section_index().is_none() {
808                continue;
809            }
810
811            if let Ok(mut name) = sym.name_bytes() {
812                // Wild doesn't emit local symbols that start with ".L". The other linkers mostly do
813                // the same. However, GNU ld and lld, if they encounter a GOT-forming relocation to
814                // such a symbol, even if they then optimise away the GOT-forming relocation, will
815                // emit the symbol. This behaviour seems weird and not worth replicating, so we just
816                // ignore all just symbols.
817                if name.starts_with(b".L") {
818                    continue;
819                }
820
821                // GNU ld sometimes emits symbols that contain the symbol version. This causes
822                // problems when we go to look those symbols up, since they no longer match the name
823                // of the symbol in the original input file. So for now at least, we get rid of the
824                // version.
825                if let Some(at_pos) = name.iter().position(|b| *b == b'@') {
826                    name = &name[..at_pos];
827                }
828
829                if sym.is_global() {
830                    globals_by_name.entry(name).or_default().push(sym.index());
831                } else {
832                    locals_by_name.entry(name).or_default().push(sym.index());
833                }
834            }
835        }
836
837        for sym in elf_file.dynamic_symbols() {
838            if let Ok(name) = sym.name_bytes() {
839                dynamic_by_name.entry(name).or_default().push(sym.index());
840            }
841        }
842
843        NameIndex {
844            globals_by_name,
845            locals_by_name,
846            dynamic_by_name,
847        }
848    }
849}
850
851fn slice_from_all_bytes<T: object::Pod>(data: &[u8]) -> &[T] {
852    object::slice_from_bytes(data, data.len() / size_of::<T>())
853        .unwrap()
854        .0
855}
856
857fn parse_string_equality(
858    s: &str,
859) -> Result<(String, String), Box<dyn std::error::Error + Send + Sync + 'static>> {
860    let (a, b) = s
861        .split_once('=')
862        .ok_or_else(|| format!("invalid key-value pair. No '=' found in `{s}`"))?;
863    Ok((a.to_owned(), b.to_owned()))
864}
865
866fn get_r_type<R: arch::RType>(rel: &object::Relocation) -> R {
867    let object::RelocationFlags::Elf { r_type } = rel.flags() else {
868        panic!("Unsupported object type (relocation flags)");
869    };
870    R::from_raw(r_type)
871}