linker_diff/
lib.rs

1//! This crate finds differences between two ELF files. It's intended use is where the files were
2//! produced by different linkers, or different versions of the same linker. So the input files
3//! should be the same except for where the linkers make different decisions such as layout.
4//!
5//! Because the intended use is verifying the correct functioning of linkers, the focus is on
6//! avoiding false positives rather than avoiding false negatives. i.e. we'd much rather fail to
7//! report a difference than report a difference that doesn't matter. Ideally a reported difference
8//! should indicate a bug or missing feature of the linker.
9//!
10//! Right now, performance of this library is not a priority, so there's quite a bit of heap
11//! allocation going on that with a little work could be avoided. If we end up using this library as
12//! part of a fuzzer this may need to be optimised.
13
14#![allow(clippy::too_many_arguments)]
15
16use anyhow::Context as _;
17use anyhow::bail;
18use asm_diff::AddressIndex;
19use clap::Parser;
20use clap::ValueEnum;
21use hashbrown::HashMap;
22use itertools::Itertools as _;
23#[allow(clippy::wildcard_imports)]
24use linker_utils::elf::secnames::*;
25use object::LittleEndian;
26use object::Object as _;
27use object::ObjectSection;
28use object::ObjectSymbol as _;
29use object::read::elf::ElfSection64;
30use section_map::IndexedLayout;
31use section_map::LayoutAndFiles;
32use std::fmt::Display;
33use std::path::Path;
34use std::path::PathBuf;
35
36mod aarch64;
37mod arch;
38mod asm_diff;
39mod debug_info_diff;
40mod diagnostics;
41mod eh_frame_diff;
42mod gnu_hash;
43mod header_diff;
44mod init_order;
45mod loongarch64;
46mod riscv64;
47pub(crate) mod section_map;
48mod segment;
49mod symbol_diff;
50mod symtab;
51mod sysv_hash;
52mod trace;
53mod utils;
54mod version_diff;
55mod x86_64;
56
57type Result<T = (), E = anyhow::Error> = core::result::Result<T, E>;
58type ElfFile64<'data> = object::read::elf::ElfFile64<'data, LittleEndian>;
59type ElfSymbol64<'data, 'file> = object::read::elf::ElfSymbol64<'data, 'file, LittleEndian>;
60
61use arch::Arch;
62use arch::ArchKind;
63use colored::Colorize;
64pub use diagnostics::enable_diagnostics;
65use section_map::InputSectionId;
66use section_map::OwnedFileIdentifier;
67
68#[non_exhaustive]
69#[derive(Parser, Default, Clone)]
70pub struct Config {
71    /// Keys to ignore.
72    #[arg(long, value_delimiter = ',')]
73    pub ignore: Vec<String>,
74
75    /// Show only the specified keys.
76    #[arg(long, value_delimiter = ',')]
77    pub only: Vec<String>,
78
79    /// Treat the sections with the specified names as equivalent. e.g. ".got.plt=.got"
80    #[arg(long, value_delimiter = ',', value_parser = parse_string_equality)]
81    pub equiv: Vec<(String, String)>,
82
83    /// Apply defaults for things that should be ignored currently for Wild. These defaults are
84    /// subject to change as Wild changes.
85    #[arg(long)]
86    pub wild_defaults: bool,
87
88    /// Print information about what sections did and didn't get diffed.
89    #[arg(long)]
90    pub coverage: bool,
91
92    /// Display names for input files.
93    #[arg(long, value_delimiter = ',', value_name = "NAME,NAME...")]
94    pub display_names: Vec<String>,
95
96    /// Files to compare against
97    #[arg(long = "ref", value_name = "FILE")]
98    pub references: Vec<PathBuf>,
99
100    #[arg(long, alias = "color", default_value = "auto")]
101    pub colour: Colour,
102
103    /// Primary file that we're validating against the reference file(s)
104    pub file: PathBuf,
105}
106
107#[derive(ValueEnum, Copy, Clone, Default)]
108pub enum Colour {
109    #[default]
110    Auto,
111    Never,
112    Always,
113}
114
115/// An output binary such as an executable or shared object.
116pub struct Binary<'data> {
117    name: String,
118    path: PathBuf,
119    elf_file: &'data ElfFile64<'data>,
120    address_index: AddressIndex<'data>,
121    name_index: NameIndex<'data>,
122    indexed_layout: Option<IndexedLayout<'data>>,
123    trace: trace::Trace,
124    sections_by_name: HashMap<&'data [u8], SectionInfo>,
125}
126
127#[derive(Clone, Copy)]
128struct SectionInfo {
129    index: object::SectionIndex,
130    size: u64,
131}
132
133struct NameIndex<'data> {
134    globals_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
135    locals_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
136    dynamic_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
137}
138
139impl Config {
140    #[must_use]
141    pub fn from_env() -> Self {
142        Self::parse()
143    }
144
145    fn apply_wild_defaults(&mut self, arch: ArchKind) {
146        self.ignore.extend(
147            [
148                // We don't currently support allocating space except in sections, so we have
149                // sections to hold the section and program headers. We then need
150                // to ignore them because GNU ld doesn't define such sections.
151                "section.shdr",
152                "section.phdr",
153                // We don't yet support these sections.
154                "section.data.rel.ro",
155                // We set this to 8. GNU ld sometimes does too, but sometimes to 0.
156                "section.got.entsize",
157                "section.plt.got.entsize",
158                "section.plt.entsize",
159                // GNU ld sometimes sets this differently that we do.
160                "section.plt",
161                "section.plt.alignment",
162                "section.bss.alignment",
163                "section.gnu.build.attributes",
164                "section.annobin.notes.entsize",
165                // We don't yet group .lrodata sections separately.
166                "section.lrodata",
167                // We sometimes eliminate __tls_get_addr where GNU ld doesn't. This can mean that
168                // we have no versioned symbols for ld-linux-x86-64.so.2 or
169                // equivalent, which means we end up with one less version record.
170                ".dynamic.DT_VERNEEDNUM",
171                // We currently handle these dynamic tags differently
172                ".dynamic.DT_JMPREL",
173                ".dynamic.DT_PLTGOT",
174                ".dynamic.DT_PLTREL",
175                // We currently produce a .got.plt whenever we produce .plt, but GNU ld doesn't
176                "section.got.plt",
177                GOT_PLT_SECTION_NAME_STR,
178                // We don't currently produce a separate .plt.sec section.
179                "section.plt.sec",
180                // Different hash values due to different implementations.
181                ".dynamic.DT_HASH",
182                // Different hash values due to different implementations.
183                ".hash",
184                "section.hash.alignment",
185                "section.hash.entsize",
186                // Some other linkers seem to generate a `.hash` section even when there are no
187                // dynamic symbols.
188                "section.hash",
189                // aarch64-linux-gnu-ld on arch linux emits DT_BIND_NOW instead of
190                // DT_FLAGS.BIND_NOW
191                ".dynamic.DT_BIND_NOW",
192                ".dynamic.DT_FLAGS.BIND_NOW",
193                // When GNU ld encounters a GOT-forming reference to an ifunc, it generates a
194                // canonical PLT entry and points the GOT at that. This means that it ends up with
195                // GOT->PLT->GOT. We don't as yet support doing this.
196                "rel.missing-got-plt-got",
197                // We do support this. TODO: Should definitely look into why we're seeing this
198                // missing in our output.
199                "section.rela.plt",
200                // We currently write 10 byte PLT entries in some cases where GNU ld writes 8 byte
201                // ones.
202                "section.plt.got.alignment",
203                // GNU ld sometimes makes this writable sometimes not. Presumably this depends on
204                // whether there are relocations or some flags.
205                "section.eh_frame.flags",
206                // TLSDESC relaxations aren't yet implemented.
207                "rel.match_failed.R_X86_64_GOTPC32_TLSDESC",
208                "rel.missing-opt.R_X86_64_TLSDESC_CALL.SkipTlsDescCall.*",
209                // Wild eliminates GOTPCRELX in statically linked executables even for undefined
210                // symbols, whereas other linkers don't. This is a valid optimisation that other
211                // linkers don't currently do.
212                "rel.extra-opt.R_X86_64_GOTPCRELX.CallIndirectToRelative.static-*",
213                // We don't yet support emitting warnings.
214                "section.gnu.warning",
215                // GNU ld sometimes applies relaxations that we don't yet.
216                "rel.match_failed.R_AARCH64_TLSDESC_LD64_LO12",
217                "rel.match_failed.R_AARCH64_TLSGD_ADD_LO12_NC",
218                "rel.missing-opt.R_X86_64_TLSGD.TlsGdToInitialExec.shared-object",
219                // GNU ld sometimes relaxes an adrp instruction to an adr instruction when the
220                // address is known and within +/-1MB. We don't as yet.
221                "rel.missing-opt.R_AARCH64_ADR_GOT_PAGE.AdrpToAdr.*",
222                "rel.missing-opt.R_AARCH64_ADR_PREL_PG_HI21.AdrpToAdr.*",
223                "rel.extra-opt.R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21.MovzXnLsl16.*",
224                // The other linkers set properties on sections if all input sections have that
225                // property. For sections like .rodata, this seems like an unimportant behaviour to
226                // replicate.
227                "section.rodata.entsize",
228                "section.rodata.flags",
229                // We emit dynamic relocations for direct references to undefined weak symbols that
230                // might be provided at runtime as well as GOT entries for indirect references. GNU
231                // ld and lld only emit the GOT entries and leave direct references as null. Our
232                // behaviour seems more consistent with the description of
233                // `-zdynamic-undefined-weak`.
234                "rel.undefined-weak.dynamic.R_X86_64_64",
235                "rel.undefined-weak.dynamic.R_AARCH64_ABS64",
236                // On aarch64, GNU ld, at least sometimes, converts R_AARCH64_ABS64 to a
237                // PLT-forming relocation. We at present, don't.
238                "rel.dynamic-plt-bypass",
239                // If we don't optimise a TLS access, then we'll have references to __tls_get_addr,
240                // when GNU ld doesn't.
241                "dynsym.__tls_get_addr.*",
242                // GNU ld emits two segments, whereas wild emits only a single segment.
243                "segment.LOAD.R.*",
244                // We haven't provided an implementation that is compatible with existing linkers.
245                "segment.PHDR.*",
246                "segment.GNU_RELRO.*",
247                "segment.GNU_STACK.*",
248                // Wild currently generates PT_NOTE even for non-alloc note sections, while the
249                // other linkers don't.
250                "segment.NOTE.*",
251                // TODO: RISC-V
252                "segment.LOAD.RW.alignment",
253                // TODO: Latest lld sometimes doesn’t create a .note.gnu.property section even when
254                // Wild does.
255                "segment.GNU_PROPERTY.alignment",
256                "segment.GNU_PROPERTY.flags",
257                // GNU ld and lld sometimes don’t generate .sframe sections in cases where we do.
258                // TODO: Figure out why this is happening.
259                "segment.GNU_SFRAME.alignment",
260                "segment.GNU_SFRAME.flags",
261            ]
262            .into_iter()
263            .map(ToOwned::to_owned),
264        );
265
266        match arch {
267            ArchKind::Aarch64 => self.ignore.extend(
268                [
269                    // Other linkers have a bigger initial PLT entry, thus the entsize is set to
270                    // zero: https://sourceware.org/bugzilla/show_bug.cgi?id=26312
271                    "section.plt.entsize",
272                    // On Alpine Linux, aarch64, GNU ld seems to emit the _DYNAMIC symbol without a
273                    // section index instead of pointing it at the .dynamic section.
274                    "rel.extra-symbol._DYNAMIC",
275                    // Also on Alpine Linux, aarch64, it seems that GNU ld is emitting an
276                    // unnecessary GLOB_DAT relocation in a GOT entry.
277                    "rel.missing-got-dynamic.executable",
278                ]
279                .into_iter()
280                .map(ToOwned::to_owned),
281            ),
282            ArchKind::RISCV64 => self.ignore.extend(
283                [
284                    // TODO: for some reason, main is put into .dynsym
285                    "dynsym.main.section",
286                    // GOT entries may differ due to unimplemented relaxations
287                    "section.got.*",
288                    // Dynamic relocations may differ
289                    "rel.dynamic.*",
290                    "rel.undefined-weak.*",
291                    // Symbol address inconsistencies due to different optimizations
292                    "error.*",
293                    "section-diff-failed*",
294                    // .relro_padding is showing up on risc-v.
295                    "section.relro_padding",
296                ]
297                .into_iter()
298                .map(ToOwned::to_owned),
299            ),
300            ArchKind::X86_64 => {}
301            ArchKind::LoongArch64 => self.ignore.extend(
302                [
303                    "section.sdata",
304                    "section.iplt",
305                    "rel.unknown_failure*",
306                    "literal-byte-mismatch*",
307                    "error.*",
308                    "section-diff-failed*",
309                ]
310                .into_iter()
311                .map(ToOwned::to_owned),
312            ),
313        }
314
315        self.equiv.push((
316            GOT_SECTION_NAME_STR.to_owned(),
317            GOT_PLT_SECTION_NAME_STR.to_owned(),
318        ));
319        // We don't currently define .plt.got and .plt.sec, we just put everything into .plt.
320        self.equiv.push((
321            PLT_SECTION_NAME_STR.to_owned(),
322            PLT_GOT_SECTION_NAME_STR.to_owned(),
323        ));
324        self.equiv.push((
325            PLT_SECTION_NAME_STR.to_owned(),
326            PLT_SEC_SECTION_NAME_STR.to_owned(),
327        ));
328    }
329
330    #[must_use]
331    pub fn to_arg_string(&self) -> String {
332        let mut out = String::new();
333        if self.wild_defaults {
334            out.push_str("--wild-defaults ");
335        }
336        if !self.ignore.is_empty() {
337            out.push_str("--ignore '");
338            out.push_str(&self.ignore.join(","));
339            out.push_str("' ");
340        }
341        if !self.equiv.is_empty() {
342            out.push_str("--equiv '");
343            let parts = self
344                .equiv
345                .iter()
346                .map(|(k, v)| format!("{k}={v}"))
347                .collect_vec();
348            out.push_str(&parts.join(","));
349            out.push_str("' ");
350        }
351        if !self.display_names.is_empty() {
352            out.push_str("--display-names ");
353            out.push_str(&self.display_names.join(","));
354            out.push(' ');
355        }
356        for file in &self.references {
357            out.push_str("--ref ");
358            out.push_str(&file.to_string_lossy());
359            out.push(' ');
360        }
361        out.push_str(&self.file.to_string_lossy());
362        out
363    }
364
365    fn filenames(&self) -> impl Iterator<Item = &PathBuf> {
366        // We always put our file first, since it makes it easier to treat it differently. e.g. when
367        // we compare a value from our file against each of the values from the other files.
368        std::iter::once(&self.file).chain(&self.references)
369    }
370}
371
372impl<'data> Binary<'data> {
373    pub(crate) fn new(
374        elf_file: &'data ElfFile64<'data>,
375        name: String,
376        path: PathBuf,
377        layout_and_files: Option<&'data LayoutAndFiles>,
378    ) -> Result<Self> {
379        let address_index = AddressIndex::new(elf_file);
380        let indexed_layout = layout_and_files.map(IndexedLayout::new).transpose()?;
381        let trace = trace::Trace::for_path(&path)?;
382
383        let sections_by_name = elf_file
384            .sections()
385            .map(|section| {
386                Ok((
387                    section.name_bytes()?,
388                    SectionInfo {
389                        index: section.index(),
390                        size: section.size(),
391                    },
392                ))
393            })
394            .collect::<Result<HashMap<&[u8], SectionInfo>>>()?;
395
396        Ok(Self {
397            name,
398            elf_file,
399            path,
400            address_index,
401            name_index: NameIndex::new(elf_file),
402            indexed_layout,
403            trace,
404            sections_by_name,
405        })
406    }
407
408    /// Looks up a symbol, first trying to get a global, or failing that a local. If multiple
409    /// symbols have the same name, then `hint_address` is used to select which one to return.
410    pub(crate) fn symbol_by_name<'file: 'data>(
411        &'file self,
412        name: &[u8],
413        hint_address: u64,
414    ) -> NameLookupResult<'data, 'file> {
415        match self.lookup_symbol(&self.name_index.globals_by_name, name, hint_address) {
416            NameLookupResult::Undefined => {
417                self.lookup_symbol(&self.name_index.locals_by_name, name, hint_address)
418            }
419            other => other,
420        }
421    }
422
423    fn lookup_symbol<'file: 'data>(
424        &'file self,
425        symbol_map: &HashMap<&[u8], Vec<object::SymbolIndex>>,
426        name: &[u8],
427        hint_address: u64,
428    ) -> NameLookupResult<'data, 'file> {
429        let indexes = symbol_map.get(name).map(Vec::as_slice).unwrap_or_default();
430
431        if indexes.len() >= 2 {
432            for sym_index in indexes {
433                if let Ok(sym) = self.elf_file.symbol_by_index(*sym_index)
434                    && sym.address() == hint_address
435                {
436                    return NameLookupResult::Defined(sym);
437                }
438            }
439
440            // We didn't find a symbol with exactly the address hinted at.
441            return NameLookupResult::Duplicate;
442        }
443
444        if let Some(symbol_index) = indexes.first() {
445            if let Ok(sym) = self.elf_file.symbol_by_index(*symbol_index) {
446                NameLookupResult::Defined(sym)
447            } else {
448                NameLookupResult::Undefined
449            }
450        } else {
451            NameLookupResult::Undefined
452        }
453    }
454
455    fn section_by_name<'file: 'data>(
456        &'file self,
457        name: &str,
458    ) -> Option<ElfSection64<'data, 'file, LittleEndian>> {
459        self.section_by_name_bytes(name.as_bytes())
460    }
461
462    fn section_by_name_bytes<'file: 'data>(
463        &'file self,
464        name: &[u8],
465    ) -> Option<ElfSection64<'data, 'file, LittleEndian>> {
466        let index = self.sections_by_name.get(name)?.index;
467        self.elf_file.section_by_index(index).ok()
468    }
469
470    fn section_containing_address<'file: 'data>(
471        &'file self,
472        address: u64,
473    ) -> Option<ElfSection64<'file, 'data, LittleEndian>> {
474        self.elf_file
475            .sections()
476            .find(|sec| (sec.address()..sec.address() + sec.size()).contains(&address))
477    }
478
479    /// Returns the name of the section that contains the supplied address. Does a linear scan, so
480    /// should only be used for error reporting.
481    fn section_name_containing_address(&self, address: u64) -> Option<&str> {
482        self.section_containing_address(address)
483            .and_then(|sec| sec.name().ok())
484    }
485}
486
487#[derive(Debug)]
488enum NameLookupResult<'data, 'file> {
489    Undefined,
490    Duplicate,
491    Defined(ElfSymbol64<'data, 'file>),
492}
493
494fn validate_objects(
495    report: &mut Report,
496    objects: &[Binary],
497    validation_name: &str,
498    validation_fn: impl Fn(&Binary) -> Result,
499) {
500    let values = objects
501        .iter()
502        .map(|obj| match validation_fn(obj) {
503            Ok(_) => "OK".to_owned(),
504            Err(e) => e.to_string(),
505        })
506        .collect_vec();
507    if first_equals_any(values.iter()) {
508        return;
509    }
510    report.add_diff(Diff {
511        key: validation_name.to_owned(),
512        values: DiffValues::PerObject(values),
513    });
514}
515
516pub struct Report {
517    /// The names of each of our binaries. These should be short, not a full path, since we often
518    /// prefix lines with these names.
519    names: Vec<String>,
520
521    /// The full path of each of our binaries.
522    paths: Vec<PathBuf>,
523
524    /// The differences that were detected.
525    diffs: Vec<Diff>,
526
527    /// The configuration that was used.
528    config: Config,
529
530    pub coverage: Option<Coverage>,
531}
532
533#[derive(Default)]
534pub struct Coverage {
535    sections: HashMap<InputSectionId, SectionCoverage>,
536}
537
538struct SectionCoverage {
539    /// The original input file from which the section came.
540    original_file: OwnedFileIdentifier,
541
542    /// The name of the section.
543    name: String,
544
545    /// Whether we diffed this section at all.
546    diffed: bool,
547
548    /// The size of the section in bytes.
549    num_bytes: u64,
550}
551
552impl Report {
553    pub fn from_config(mut config: Config) -> Result<Report> {
554        // This changes mutable global state, which isn't an ideal thing to be doing from a library.
555        // It's expedient though, and we don't really expect linker-diff to get used as a library
556        // anywhere except the linker-diff binary and wild's integration tests, so this probably
557        // isn't a big deal.
558        match config.colour {
559            Colour::Auto => colored::control::unset_override(),
560            Colour::Never => colored::control::set_override(false),
561            Colour::Always => colored::control::set_override(true),
562        }
563
564        let display_names = short_file_display_names(&config)?;
565
566        let file_bytes = config
567            .filenames()
568            .map(|filename| -> Result<Vec<u8>> {
569                let bytes = std::fs::read(filename)
570                    .with_context(|| format!("Failed to read `{}`", filename.display()))?;
571                Ok(bytes)
572            })
573            .collect::<Result<Vec<Vec<u8>>>>()?;
574
575        let elf_files = file_bytes
576            .iter()
577            .map(|bytes| -> Result<ElfFile64> { Ok(ElfFile64::parse(bytes.as_slice())?) })
578            .collect::<Result<Vec<_>>>()?;
579
580        let layouts = config
581            .filenames()
582            .map(|p| LayoutAndFiles::from_base_path(p))
583            .collect::<Result<Vec<_>>>()?;
584
585        let objects = elf_files
586            .iter()
587            .zip(display_names)
588            .zip(config.filenames())
589            .zip(&layouts)
590            .map(|(((elf_file, name), path), layout)| -> Result<Binary> {
591                Binary::new(elf_file, name, path.clone(), layout.as_ref())
592            })
593            .collect::<Result<Vec<_>>>()?;
594
595        if objects.len() < 2 {
596            bail!("At least two files must be provided for comparison");
597        }
598
599        let arch = ArchKind::from_objects(&objects)?;
600
601        if config.wild_defaults {
602            config.apply_wild_defaults(arch);
603        }
604
605        let mut report = Report {
606            names: objects.iter().map(|o| o.name.clone()).collect(),
607            paths: objects.iter().map(|o| o.path.clone()).collect(),
608            diffs: Default::default(),
609            coverage: config.coverage.then(Coverage::default),
610            config,
611        };
612
613        report.run_on_objects(&objects, arch);
614
615        Ok(report)
616    }
617
618    fn run_on_objects(&mut self, objects: &[Binary], arch: ArchKind) {
619        validate_objects(
620            self,
621            objects,
622            GNU_HASH_SECTION_NAME_STR,
623            gnu_hash::check_object,
624        );
625        validate_objects(
626            self,
627            objects,
628            HASH_SECTION_NAME_STR,
629            sysv_hash::check_object,
630        );
631        validate_objects(self, objects, "index", asm_diff::validate_indexes);
632        validate_objects(
633            self,
634            objects,
635            GOT_PLT_SECTION_NAME_STR,
636            asm_diff::validate_got_plt,
637        );
638        validate_objects(
639            self,
640            objects,
641            SYMTAB_SECTION_NAME_STR,
642            symtab::validate_debug,
643        );
644        validate_objects(
645            self,
646            objects,
647            DYNSYM_SECTION_NAME_STR,
648            symtab::validate_dynamic,
649        );
650        header_diff::check_dynamic_headers(self, objects);
651        header_diff::check_file_headers(self, objects);
652        header_diff::report_section_diffs(self, objects);
653        eh_frame_diff::report_diffs(self, objects);
654        version_diff::report_diffs(self, objects);
655        debug_info_diff::check_debug_info(self, objects);
656        symbol_diff::report_diffs(self, objects);
657        segment::report_diffs(self, objects);
658
659        match arch {
660            ArchKind::X86_64 => {
661                self.report_arch_specific_diffs::<crate::x86_64::X86_64>(objects);
662            }
663            ArchKind::Aarch64 => {
664                self.report_arch_specific_diffs::<crate::aarch64::AArch64>(objects);
665            }
666
667            ArchKind::RISCV64 => {
668                self.report_arch_specific_diffs::<crate::riscv64::RiscV64>(objects);
669            }
670            ArchKind::LoongArch64 => {
671                self.report_arch_specific_diffs::<crate::loongarch64::LoongArch64>(objects);
672            }
673        }
674    }
675
676    fn report_arch_specific_diffs<A: Arch>(&mut self, binaries: &[Binary]) {
677        asm_diff::report_section_diffs::<A>(self, binaries);
678        init_order::report_diffs::<A>(self, binaries);
679    }
680
681    fn add_diff(&mut self, diff: Diff) {
682        if self.should_ignore(&diff.key) {
683            return;
684        }
685        self.diffs.push(diff);
686    }
687
688    fn add_diffs(&mut self, new_diffs: Vec<Diff>) {
689        for diff in new_diffs {
690            self.add_diff(diff);
691        }
692    }
693
694    #[must_use]
695    pub fn has_problems(&self) -> bool {
696        !self.diffs.is_empty()
697    }
698
699    #[must_use]
700    pub fn should_ignore(&self, key: &str) -> bool {
701        if !self.config.only.is_empty() {
702            return !self.config.only.iter().any(|i| {
703                if let Some(prefix) = i.strip_suffix('*') {
704                    key.starts_with(prefix)
705                } else {
706                    key == *i
707                }
708            });
709        }
710        self.config.ignore.iter().any(|i| {
711            if let Some(prefix) = i.strip_suffix('*') {
712                key.starts_with(prefix)
713            } else {
714                key == *i
715            }
716        })
717    }
718
719    fn add_error(&mut self, error: impl Into<String>) {
720        self.diffs.push(Diff {
721            key: "error".to_owned(),
722            values: DiffValues::PreFormatted(error.into()),
723        });
724    }
725}
726
727struct Diff {
728    key: String,
729    values: DiffValues,
730}
731
732enum DiffValues {
733    PerObject(Vec<String>),
734    PreFormatted(String),
735}
736
737impl Display for Report {
738    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
739        for (name, path) in self.names.iter().zip(&self.paths) {
740            writeln!(f, "{name}: {}", path.display())?;
741        }
742
743        for diff in &self.diffs {
744            writeln!(f, "{}", diff.key)?;
745
746            match &diff.values {
747                DiffValues::PerObject(values) => {
748                    for (filename, result) in self.names.iter().zip(values) {
749                        writeln!(f, "  {filename} {result}")?;
750                    }
751                }
752                DiffValues::PreFormatted(values) => {
753                    for line in values.lines() {
754                        writeln!(f, "  {line}")?;
755                    }
756                }
757            }
758
759            writeln!(f)?;
760        }
761
762        Ok(())
763    }
764}
765
766impl Display for Binary<'_> {
767    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
768        self.name.fmt(f)
769    }
770}
771
772impl Display for Coverage {
773    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
774        writeln!(f, "Diffed sections:")?;
775
776        let mut total_bytes = 0;
777        let mut total_diffed = 0;
778
779        for sec in self.sections.values() {
780            writeln!(
781                f,
782                "  {} {}: {}",
783                sec.original_file,
784                sec.name,
785                if sec.diffed {
786                    "true".green()
787                } else {
788                    "false".red()
789                }
790            )?;
791
792            if sec.diffed {
793                total_diffed += sec.num_bytes;
794            }
795
796            total_bytes += sec.num_bytes;
797        }
798
799        writeln!(
800            f,
801            "Diffed {total_diffed} of {total_bytes} section bytes ({}%)",
802            total_diffed * 100 / total_bytes
803        )?;
804
805        Ok(())
806    }
807}
808
809fn short_file_display_names(config: &Config) -> Result<Vec<String>> {
810    let paths: Vec<&PathBuf> = config.filenames().collect();
811    if !config.display_names.is_empty() {
812        if config.display_names.len() != paths.len() {
813            bail!(
814                "--display-names has {} names, but {} filenames were provided",
815                config.display_names.len(),
816                paths.len()
817            );
818        }
819        return Ok(config.display_names.clone());
820    }
821    if paths.is_empty() {
822        return Ok(vec![]);
823    }
824    let mut names = paths
825        .iter()
826        .map(|p| p.to_string_lossy().into_owned())
827        .collect_vec();
828    if names.iter().all(|name| {
829        Path::new(name)
830            .extension()
831            .is_some_and(|ext| ext.eq_ignore_ascii_case("so"))
832    }) {
833        names = names
834            .into_iter()
835            .map(|n| n.strip_suffix(".so").unwrap().to_owned())
836            .collect();
837    }
838
839    if names.len() > 1 {
840        // This is not quite right, since we might split in the middle of a multibyte character.
841        // But this is a dev tool, so we'll punt on that for now.
842        let mut iterators = names.iter().map(|n| n.bytes()).collect_vec();
843        let mut n = 0;
844        while first_equals_all(iterators.iter_mut().map(Iterator::next)) {
845            n += 1;
846        }
847        names = names
848            .iter()
849            .map(|name| String::from_utf8_lossy(&name.bytes().skip(n).collect_vec()).into_owned())
850            .collect_vec();
851    }
852    Ok(names)
853}
854
855fn first_equals_all<T: PartialEq>(mut inputs: impl Iterator<Item = T>) -> bool {
856    let Some(first) = inputs.next() else {
857        return true;
858    };
859    for next in inputs {
860        if next != first {
861            return false;
862        }
863    }
864    true
865}
866
867/// Returns whether the first input is equal to at least one of the remaining values.
868fn first_equals_any<T: PartialEq>(mut inputs: impl Iterator<Item = T>) -> bool {
869    let Some(first) = inputs.next() else {
870        return true;
871    };
872    for next in inputs {
873        if next == first {
874            return true;
875        }
876    }
877    false
878}
879
880impl<'data> NameIndex<'data> {
881    fn new(elf_file: &ElfFile64<'data>) -> NameIndex<'data> {
882        let mut globals_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
883        let mut locals_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
884        let mut dynamic_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
885
886        for sym in elf_file.symbols() {
887            // We only index symbols that have a section. Note this is different than the object
888            // crate's `is_defined`, which imposes additional requirements that we don't want.
889            if sym.section_index().is_none() {
890                continue;
891            }
892
893            if let Ok(mut name) = sym.name_bytes() {
894                // Wild doesn't emit local symbols that start with ".L". The other linkers mostly do
895                // the same. However, GNU ld and lld, if they encounter a GOT-forming relocation to
896                // such a symbol, even if they then optimise away the GOT-forming relocation, will
897                // emit the symbol. This behaviour seems weird and not worth replicating, so we just
898                // ignore all just symbols.
899                if name.starts_with(b".L") {
900                    continue;
901                }
902
903                // GNU ld sometimes emits symbols that contain the symbol version. This causes
904                // problems when we go to look those symbols up, since they no longer match the name
905                // of the symbol in the original input file. So for now at least, we get rid of the
906                // version.
907                if let Some(at_pos) = name.iter().position(|b| *b == b'@') {
908                    name = &name[..at_pos];
909                }
910
911                if sym.is_global() {
912                    globals_by_name.entry(name).or_default().push(sym.index());
913                } else {
914                    locals_by_name.entry(name).or_default().push(sym.index());
915                }
916            }
917        }
918
919        for sym in elf_file.dynamic_symbols() {
920            if let Ok(name) = sym.name_bytes() {
921                dynamic_by_name.entry(name).or_default().push(sym.index());
922            }
923        }
924
925        NameIndex {
926            globals_by_name,
927            locals_by_name,
928            dynamic_by_name,
929        }
930    }
931}
932
933fn slice_from_all_bytes<T: object::Pod>(data: &[u8]) -> &[T] {
934    object::slice_from_bytes(data, data.len() / size_of::<T>())
935        .unwrap()
936        .0
937}
938
939fn parse_string_equality(
940    s: &str,
941) -> Result<(String, String), Box<dyn std::error::Error + Send + Sync + 'static>> {
942    let (a, b) = s
943        .split_once('=')
944        .ok_or_else(|| format!("invalid key-value pair. No '=' found in `{s}`"))?;
945    Ok((a.to_owned(), b.to_owned()))
946}
947
948fn get_r_type<R: arch::RType>(rel: &object::Relocation) -> R {
949    let object::RelocationFlags::Elf { r_type } = rel.flags() else {
950        panic!("Unsupported object type (relocation flags)");
951    };
952    R::from_raw(r_type)
953}