Skip to main content

linker_diff/
lib.rs

1//! This crate finds differences between two ELF files. It's intended use is where the files were
2//! produced by different linkers, or different versions of the same linker. So the input files
3//! should be the same except for where the linkers make different decisions such as layout.
4//!
5//! Because the intended use is verifying the correct functioning of linkers, the focus is on
6//! avoiding false positives rather than avoiding false negatives. i.e. we'd much rather fail to
7//! report a difference than report a difference that doesn't matter. Ideally a reported difference
8//! should indicate a bug or missing feature of the linker.
9//!
10//! Right now, performance of this library is not a priority, so there's quite a bit of heap
11//! allocation going on that with a little work could be avoided. If we end up using this library as
12//! part of a fuzzer this may need to be optimised.
13
14#![allow(clippy::too_many_arguments)]
15
16use anyhow::Context as _;
17use anyhow::bail;
18use asm_diff::AddressIndex;
19use clap::Parser;
20use clap::ValueEnum;
21use hashbrown::HashMap;
22use itertools::Itertools as _;
23#[allow(clippy::wildcard_imports)]
24use linker_utils::elf::secnames::*;
25use linker_utils::utils::slice_from_all_bytes;
26use object::LittleEndian;
27use object::Object as _;
28use object::ObjectSection;
29use object::ObjectSymbol as _;
30use object::read::elf::ElfSection64;
31use section_map::IndexedLayout;
32use section_map::LayoutAndFiles;
33use std::fmt::Display;
34use std::path::Path;
35use std::path::PathBuf;
36
37mod aarch64;
38mod arch;
39mod asm_diff;
40mod debug_info_diff;
41mod diagnostics;
42mod eh_frame_diff;
43mod gnu_hash;
44mod header_diff;
45mod init_order;
46mod loongarch64;
47mod riscv64;
48mod riscv_attributes;
49pub(crate) mod section_map;
50mod segment;
51mod symbol_diff;
52mod symtab;
53mod sysv_hash;
54mod trace;
55mod utils;
56mod version_diff;
57mod x86_64;
58
59type Result<T = (), E = anyhow::Error> = core::result::Result<T, E>;
60type ElfFile64<'data> = object::read::elf::ElfFile64<'data, LittleEndian>;
61type ElfSymbol64<'data, 'file> = object::read::elf::ElfSymbol64<'data, 'file, LittleEndian>;
62
63use arch::Arch;
64use arch::ArchKind;
65use colored::Colorize;
66pub use diagnostics::enable_diagnostics;
67use section_map::InputSectionId;
68use section_map::OwnedFileIdentifier;
69
70#[non_exhaustive]
71#[derive(Parser, Default, Clone)]
72pub struct Config {
73    /// Keys to ignore.
74    #[arg(long, value_delimiter = ',')]
75    pub ignore: Vec<String>,
76
77    /// Show only the specified keys.
78    #[arg(long, value_delimiter = ',')]
79    pub only: Vec<String>,
80
81    /// Treat the sections with the specified names as equivalent. e.g. ".got.plt=.got"
82    #[arg(long, value_delimiter = ',', value_parser = parse_string_equality)]
83    pub equiv: Vec<(String, String)>,
84
85    /// Apply defaults for things that should be ignored currently for Wild. These defaults are
86    /// subject to change as Wild changes.
87    #[arg(long)]
88    pub wild_defaults: bool,
89
90    /// Print information about what sections did and didn't get diffed.
91    #[arg(long)]
92    pub coverage: bool,
93
94    /// Display names for input files.
95    #[arg(long, value_delimiter = ',', value_name = "NAME,NAME...")]
96    pub display_names: Vec<String>,
97
98    /// Files to compare against
99    #[arg(long = "ref", value_name = "FILE")]
100    pub references: Vec<PathBuf>,
101
102    #[arg(long, alias = "color", default_value = "auto")]
103    pub colour: Colour,
104
105    /// Primary file that we're validating against the reference file(s)
106    pub file: PathBuf,
107}
108
109#[derive(ValueEnum, Copy, Clone, Default)]
110pub enum Colour {
111    #[default]
112    Auto,
113    Never,
114    Always,
115}
116
117/// An output binary such as an executable or shared object.
118pub struct Binary<'data> {
119    name: String,
120    path: PathBuf,
121    elf_file: &'data ElfFile64<'data>,
122    address_index: AddressIndex<'data>,
123    name_index: NameIndex<'data>,
124    indexed_layout: Option<IndexedLayout<'data>>,
125    trace: trace::Trace,
126    sections_by_name: HashMap<&'data [u8], SectionInfo>,
127}
128
129#[derive(Clone, Copy)]
130struct SectionInfo {
131    index: object::SectionIndex,
132    size: u64,
133}
134
135struct NameIndex<'data> {
136    globals_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
137    locals_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
138    dynamic_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
139}
140
141impl Config {
142    #[must_use]
143    pub fn from_env() -> Self {
144        Self::parse()
145    }
146
147    fn apply_wild_defaults(&mut self, arch: ArchKind) {
148        self.ignore.extend(
149            [
150                // We don't currently support allocating space except in sections, so we have
151                // sections to hold the section and program headers. We then need
152                // to ignore them because GNU ld doesn't define such sections.
153                "section.shdr",
154                "section.phdr",
155                // We don't yet support these sections.
156                "section.data.rel.ro",
157                // We set this to 8. GNU ld sometimes does too, but sometimes to 0.
158                "section.got.entsize",
159                "section.plt.got.entsize",
160                "section.plt.entsize",
161                // GNU ld sometimes sets this differently that we do.
162                "section.plt",
163                "section.plt.alignment",
164                "section.bss.alignment",
165                "section.gnu.build.attributes",
166                "section.annobin.notes.entsize",
167                // We don't yet group .lrodata sections separately.
168                "section.lrodata",
169                // We sometimes eliminate __tls_get_addr where GNU ld doesn't. This can mean that
170                // we have no versioned symbols for ld-linux-x86-64.so.2 or
171                // equivalent, which means we end up with one less version record.
172                ".dynamic.DT_VERNEEDNUM",
173                // We currently handle these dynamic tags differently
174                ".dynamic.DT_JMPREL",
175                ".dynamic.DT_PLTGOT",
176                ".dynamic.DT_PLTREL",
177                // We currently produce a .got.plt whenever we produce .plt, but GNU ld doesn't
178                "section.got.plt",
179                GOT_PLT_SECTION_NAME_STR,
180                // We don't currently produce a separate .plt.sec section.
181                "section.plt.sec",
182                // Different hash values due to different implementations.
183                ".dynamic.DT_HASH",
184                // Different hash values due to different implementations.
185                ".hash",
186                "section.hash.alignment",
187                "section.hash.entsize",
188                // Some other linkers seem to generate a `.hash` section even when there are no
189                // dynamic symbols.
190                "section.hash",
191                // aarch64-linux-gnu-ld on arch linux emits DT_BIND_NOW instead of
192                // DT_FLAGS.BIND_NOW
193                ".dynamic.DT_BIND_NOW",
194                ".dynamic.DT_FLAGS.BIND_NOW",
195                // When GNU ld encounters a GOT-forming reference to an ifunc, it generates a
196                // canonical PLT entry and points the GOT at that. This means that it ends up with
197                // GOT->PLT->GOT. We don't as yet support doing this.
198                "rel.missing-got-plt-got",
199                // We do support this. TODO: Should definitely look into why we're seeing this
200                // missing in our output.
201                "section.rela.plt",
202                // We currently write 10 byte PLT entries in some cases where GNU ld writes 8 byte
203                // ones.
204                "section.plt.got.alignment",
205                // GNU ld sometimes makes this writable sometimes not. Presumably this depends on
206                // whether there are relocations or some flags.
207                "section.eh_frame.flags",
208                // TLSDESC relaxations aren't yet implemented.
209                "rel.match_failed.R_X86_64_GOTPC32_TLSDESC",
210                "rel.match_failed.R_X86_64_CODE_4_GOTPC32_TLSDESC",
211                "rel.missing-opt.R_X86_64_TLSDESC_CALL.SkipTlsDescCall.*",
212                // Wild eliminates GOTPCRELX in statically linked executables even for undefined
213                // symbols, whereas other linkers don't. This is a valid optimisation that other
214                // linkers don't currently do.
215                "rel.extra-opt.R_X86_64_GOTPCRELX.CallIndirectToRelative.static-*",
216                // Wild applies MovIndirectToLea relaxation to _DYNAMIC symbol in static builds
217                // because it's marked as NON_INTERPOSABLE. GNU ld keeps the GOT-relative access.
218                // Both are correct, but Wild's approach is more optimized.
219                "rel.extra-opt.R_X86_64_REX_GOTPCRELX.MovIndirectToLea.static-*",
220                // We don't yet support emitting warnings.
221                "section.gnu.warning",
222                // GNU ld sometimes applies relaxations that we don't yet.
223                "rel.match_failed.R_AARCH64_TLSDESC_LD64_LO12",
224                "rel.match_failed.R_AARCH64_TLSGD_ADD_LO12_NC",
225                "rel.missing-opt.R_X86_64_TLSGD.TlsGdToInitialExec.shared-object",
226                // GNU ld sometimes relaxes an adrp instruction to an adr instruction when the
227                // address is known and within +/-1MB. We don't as yet.
228                "rel.missing-opt.R_AARCH64_ADR_GOT_PAGE.AdrpToAdr.*",
229                "rel.missing-opt.R_AARCH64_ADR_PREL_PG_HI21.AdrpToAdr.*",
230                "rel.extra-opt.R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21.MovzXnLsl16.*",
231                // LLD does some different relaxations to us
232                "rel.missing-opt.R_AARCH64_ADR_GOT_PAGE.ReplaceWithNop.*",
233                "rel.missing-opt.R_AARCH64_ADR_PREL_PG_HI21.ReplaceWithNop.*",
234                // The other linkers set properties on sections if all input sections have that
235                // property. For sections like .rodata, this seems like an unimportant behaviour to
236                // replicate.
237                "section.rodata.entsize",
238                "section.rodata.flags",
239                // We emit dynamic relocations for direct references to undefined weak symbols that
240                // might be provided at runtime as well as GOT entries for indirect references. GNU
241                // ld and lld only emit the GOT entries and leave direct references as null. Our
242                // behaviour seems more consistent with the description of
243                // `-zdynamic-undefined-weak`.
244                "rel.undefined-weak.dynamic.R_X86_64_64",
245                "rel.undefined-weak.dynamic.R_AARCH64_ABS64",
246                // On aarch64, GNU ld, at least sometimes, converts R_AARCH64_ABS64 to a
247                // PLT-forming relocation. We at present, don't.
248                "rel.dynamic-plt-bypass",
249                // If we don't optimise a TLS access, then we'll have references to __tls_get_addr,
250                // when GNU ld doesn't.
251                "dynsym.__tls_get_addr.*",
252                // GNU ld emits two segments, whereas wild emits only a single segment.
253                "segment.LOAD.R.*",
254                // We haven't provided an implementation that is compatible with existing linkers.
255                "segment.PHDR.*",
256                "segment.GNU_RELRO.*",
257                "segment.GNU_STACK.*",
258                // Wild currently generates PT_NOTE even for non-alloc note sections, while the
259                // other linkers don't.
260                "segment.NOTE.*",
261                // TODO: RISC-V
262                "segment.LOAD.RW.alignment",
263                // TODO: Latest lld sometimes doesn’t create a .note.gnu.property section even when
264                // Wild does.
265                "segment.GNU_PROPERTY.alignment",
266                "segment.GNU_PROPERTY.flags",
267                // TODO: We consider SFrame sections experimental and disabled by default.
268                "segment.GNU_SFRAME.alignment",
269                "segment.GNU_SFRAME.flags",
270                "section.sframe",
271                // Different linkers put the PLT in different locations relative to .text, so
272                // whether range-extension thunks are needed varies.
273                "rel.plt.extra-thunk",
274                "rel.plt.absent-thunk",
275                // On some systems Wild outputs these symbols while GNU ld does not.
276            ]
277            .into_iter()
278            .map(ToOwned::to_owned),
279        );
280
281        match arch {
282            ArchKind::Aarch64 => self.ignore.extend(
283                [
284                    "section.ARM.attributes",
285                    // Other linkers have a bigger initial PLT entry, thus the entsize is set to
286                    // zero: https://sourceware.org/bugzilla/show_bug.cgi?id=26312
287                    "section.plt.entsize",
288                    // On Alpine Linux, aarch64, GNU ld seems to emit the _DYNAMIC symbol without a
289                    // section index instead of pointing it at the .dynamic section.
290                    "rel.extra-symbol._DYNAMIC",
291                    // Also on Alpine Linux, aarch64, it seems that GNU ld is emitting an
292                    // unnecessary GLOB_DAT relocation in a GOT entry.
293                    "rel.missing-got-dynamic.executable",
294                    // GNU ld replaces calls to undefined symbols with nop. Wild instead encodes
295                    // bl 0x0 so that if the call site is reached, it will crash rather than
296                    // silently continuing execution.
297                    "rel.missing-opt.R_AARCH64_CALL26.ReplaceWithNop.*",
298                    "rel.missing-opt.R_AARCH64_JUMP26.ReplaceWithNop.*",
299                ]
300                .into_iter()
301                .map(ToOwned::to_owned),
302            ),
303            ArchKind::RISCV64 => self.ignore.extend(
304                [
305                    // TODO: for some reason, main is put into .dynsym by GNU ld.
306                    "dynsym.main.section",
307                    // GOT entries may differ due to unimplemented relaxations
308                    "section.got.*",
309                    // Dynamic relocations may differ
310                    "rel.dynamic.*",
311                    "rel.undefined-weak.*",
312                    // Symbol address inconsistencies due to different optimizations
313                    "error.*",
314                    "section-diff-failed*",
315                    // .relro_padding is showing up on risc-v.
316                    "section.relro_padding",
317                ]
318                .into_iter()
319                .map(ToOwned::to_owned),
320            ),
321            ArchKind::X86_64 => {}
322            ArchKind::LoongArch64 => self.ignore.extend(
323                [
324                    "section.sdata",
325                    "section.iplt",
326                    "rel.unknown_failure*",
327                    "literal-byte-mismatch*",
328                    "error.*",
329                    "section-diff-failed*",
330                    // GNU ld replaces calls to undefined symbols with nop. Wild instead encodes
331                    // bl 0x0 so that if the call site is reached, it will crash rather than
332                    // silently continuing execution.
333                    "rel.missing-opt.R_LARCH_B26.ReplaceWithNop.*",
334                ]
335                .into_iter()
336                .map(ToOwned::to_owned),
337            ),
338        }
339
340        self.equiv.push((
341            GOT_SECTION_NAME_STR.to_owned(),
342            GOT_PLT_SECTION_NAME_STR.to_owned(),
343        ));
344        // We don't currently define .plt.got and .plt.sec, we just put everything into .plt.
345        self.equiv.push((
346            PLT_SECTION_NAME_STR.to_owned(),
347            PLT_GOT_SECTION_NAME_STR.to_owned(),
348        ));
349        self.equiv.push((
350            PLT_SECTION_NAME_STR.to_owned(),
351            PLT_SEC_SECTION_NAME_STR.to_owned(),
352        ));
353    }
354
355    #[must_use]
356    pub fn to_arg_string(&self) -> String {
357        let mut out = String::new();
358        if self.wild_defaults {
359            out.push_str("--wild-defaults ");
360        }
361        if !self.ignore.is_empty() {
362            out.push_str("--ignore '");
363            out.push_str(&self.ignore.join(","));
364            out.push_str("' ");
365        }
366        if !self.equiv.is_empty() {
367            out.push_str("--equiv '");
368            let parts = self
369                .equiv
370                .iter()
371                .map(|(k, v)| format!("{k}={v}"))
372                .collect_vec();
373            out.push_str(&parts.join(","));
374            out.push_str("' ");
375        }
376        if !self.display_names.is_empty() {
377            out.push_str("--display-names ");
378            out.push_str(&self.display_names.join(","));
379            out.push(' ');
380        }
381        for file in &self.references {
382            out.push_str("--ref ");
383            out.push_str(&file.to_string_lossy());
384            out.push(' ');
385        }
386        out.push_str(&self.file.to_string_lossy());
387        out
388    }
389
390    fn filenames(&self) -> impl Iterator<Item = &PathBuf> {
391        // We always put our file first, since it makes it easier to treat it differently. e.g. when
392        // we compare a value from our file against each of the values from the other files.
393        std::iter::once(&self.file).chain(&self.references)
394    }
395}
396
397impl<'data> Binary<'data> {
398    pub(crate) fn new(
399        elf_file: &'data ElfFile64<'data>,
400        name: String,
401        path: PathBuf,
402        layout_and_files: Option<&'data LayoutAndFiles>,
403    ) -> Result<Self> {
404        let address_index = AddressIndex::new(elf_file);
405        let indexed_layout = layout_and_files.map(IndexedLayout::new).transpose()?;
406        let trace = trace::Trace::for_path(&path)?;
407
408        let sections_by_name = elf_file
409            .sections()
410            .map(|section| {
411                Ok((
412                    section.name_bytes()?,
413                    SectionInfo {
414                        index: section.index(),
415                        size: section.size(),
416                    },
417                ))
418            })
419            .collect::<Result<HashMap<&[u8], SectionInfo>>>()?;
420
421        Ok(Self {
422            name,
423            elf_file,
424            path,
425            address_index,
426            name_index: NameIndex::new(elf_file),
427            indexed_layout,
428            trace,
429            sections_by_name,
430        })
431    }
432
433    /// Looks up a symbol, first trying to get a global, or failing that a local. If multiple
434    /// symbols have the same name, then `hint_address` is used to select which one to return.
435    pub(crate) fn symbol_by_name<'file: 'data>(
436        &'file self,
437        name: &[u8],
438        hint_address: u64,
439    ) -> NameLookupResult<'data, 'file> {
440        match self.lookup_symbol(&self.name_index.globals_by_name, name, hint_address) {
441            NameLookupResult::Undefined => {
442                self.lookup_symbol(&self.name_index.locals_by_name, name, hint_address)
443            }
444            other => other,
445        }
446    }
447
448    fn lookup_symbol<'file: 'data>(
449        &'file self,
450        symbol_map: &HashMap<&[u8], Vec<object::SymbolIndex>>,
451        name: &[u8],
452        hint_address: u64,
453    ) -> NameLookupResult<'data, 'file> {
454        let indexes = symbol_map.get(name).map(Vec::as_slice).unwrap_or_default();
455
456        if indexes.len() >= 2 {
457            for sym_index in indexes {
458                if let Ok(sym) = self.elf_file.symbol_by_index(*sym_index)
459                    && sym.address() == hint_address
460                {
461                    return NameLookupResult::Defined(sym);
462                }
463            }
464
465            // We didn't find a symbol with exactly the address hinted at.
466            return NameLookupResult::Duplicate;
467        }
468
469        if let Some(symbol_index) = indexes.first() {
470            if let Ok(sym) = self.elf_file.symbol_by_index(*symbol_index) {
471                NameLookupResult::Defined(sym)
472            } else {
473                NameLookupResult::Undefined
474            }
475        } else {
476            NameLookupResult::Undefined
477        }
478    }
479
480    fn section_by_name<'file: 'data>(
481        &'file self,
482        name: &str,
483    ) -> Option<ElfSection64<'data, 'file, LittleEndian>> {
484        self.section_by_name_bytes(name.as_bytes())
485    }
486
487    fn section_by_name_bytes<'file: 'data>(
488        &'file self,
489        name: &[u8],
490    ) -> Option<ElfSection64<'data, 'file, LittleEndian>> {
491        let index = self.sections_by_name.get(name)?.index;
492        self.elf_file.section_by_index(index).ok()
493    }
494
495    fn section_containing_address<'file: 'data>(
496        &'file self,
497        address: u64,
498    ) -> Option<ElfSection64<'file, 'data, LittleEndian>> {
499        self.elf_file
500            .sections()
501            .find(|sec| (sec.address()..sec.address() + sec.size()).contains(&address))
502    }
503
504    /// Returns the name of the section that contains the supplied address. Does a linear scan, so
505    /// should only be used for error reporting.
506    fn section_name_containing_address(&self, address: u64) -> Option<&str> {
507        self.section_containing_address(address)
508            .and_then(|sec| sec.name().ok())
509    }
510}
511
512#[derive(Debug)]
513enum NameLookupResult<'data, 'file> {
514    Undefined,
515    Duplicate,
516    Defined(ElfSymbol64<'data, 'file>),
517}
518
519fn validate_objects(
520    report: &mut Report,
521    objects: &[Binary],
522    validation_name: &str,
523    validation_fn: impl Fn(&Binary) -> Result,
524) {
525    let values = objects
526        .iter()
527        .map(|obj| match validation_fn(obj) {
528            Ok(_) => "OK".to_owned(),
529            Err(e) => e.to_string(),
530        })
531        .collect_vec();
532    if first_equals_any(values.iter()) {
533        return;
534    }
535    report.add_diff(Diff {
536        key: validation_name.to_owned(),
537        values: DiffValues::PerObject(values),
538    });
539}
540
541pub struct Report {
542    /// The names of each of our binaries. These should be short, not a full path, since we often
543    /// prefix lines with these names.
544    names: Vec<String>,
545
546    /// The full path of each of our binaries.
547    paths: Vec<PathBuf>,
548
549    /// The differences that were detected.
550    diffs: Vec<Diff>,
551
552    /// The configuration that was used.
553    config: Config,
554
555    pub coverage: Option<Coverage>,
556}
557
558#[derive(Default)]
559pub struct Coverage {
560    sections: HashMap<InputSectionId, SectionCoverage>,
561}
562
563struct SectionCoverage {
564    /// The original input file from which the section came.
565    original_file: OwnedFileIdentifier,
566
567    /// The name of the section.
568    name: String,
569
570    /// Whether we diffed this section at all.
571    diffed: bool,
572
573    /// The size of the section in bytes.
574    num_bytes: u64,
575}
576
577impl Report {
578    pub fn from_config(mut config: Config) -> Result<Report> {
579        // This changes mutable global state, which isn't an ideal thing to be doing from a library.
580        // It's expedient though, and we don't really expect linker-diff to get used as a library
581        // anywhere except the linker-diff binary and wild's integration tests, so this probably
582        // isn't a big deal.
583        match config.colour {
584            Colour::Auto => colored::control::unset_override(),
585            Colour::Never => colored::control::set_override(false),
586            Colour::Always => colored::control::set_override(true),
587        }
588
589        let display_names = short_file_display_names(&config)?;
590
591        let file_bytes = config
592            .filenames()
593            .map(|filename| -> Result<Vec<u8>> {
594                let bytes = std::fs::read(filename)
595                    .with_context(|| format!("Failed to read `{}`", filename.display()))?;
596                Ok(bytes)
597            })
598            .collect::<Result<Vec<Vec<u8>>>>()?;
599
600        let elf_files = file_bytes
601            .iter()
602            .map(|bytes| -> Result<ElfFile64> { Ok(ElfFile64::parse(bytes.as_slice())?) })
603            .collect::<Result<Vec<_>>>()?;
604
605        let layouts = config
606            .filenames()
607            .map(|p| LayoutAndFiles::from_base_path(p))
608            .collect::<Result<Vec<_>>>()?;
609
610        let objects = elf_files
611            .iter()
612            .zip(display_names)
613            .zip(config.filenames())
614            .zip(&layouts)
615            .map(|(((elf_file, name), path), layout)| -> Result<Binary> {
616                Binary::new(elf_file, name, path.clone(), layout.as_ref())
617            })
618            .collect::<Result<Vec<_>>>()?;
619
620        if objects.len() < 2 {
621            bail!("At least two files must be provided for comparison");
622        }
623
624        let arch = ArchKind::from_objects(&objects)?;
625
626        if config.wild_defaults {
627            config.apply_wild_defaults(arch);
628        }
629
630        let mut report = Report {
631            names: objects.iter().map(|o| o.name.clone()).collect(),
632            paths: objects.iter().map(|o| o.path.clone()).collect(),
633            diffs: Default::default(),
634            coverage: config.coverage.then(Coverage::default),
635            config,
636        };
637
638        report.run_on_objects(&objects, arch);
639
640        Ok(report)
641    }
642
643    fn run_on_objects(&mut self, objects: &[Binary], arch: ArchKind) {
644        validate_objects(
645            self,
646            objects,
647            GNU_HASH_SECTION_NAME_STR,
648            gnu_hash::check_object,
649        );
650        validate_objects(
651            self,
652            objects,
653            HASH_SECTION_NAME_STR,
654            sysv_hash::check_object,
655        );
656        validate_objects(self, objects, "index", asm_diff::validate_indexes);
657        validate_objects(
658            self,
659            objects,
660            GOT_PLT_SECTION_NAME_STR,
661            asm_diff::validate_got_plt,
662        );
663        validate_objects(
664            self,
665            objects,
666            SYMTAB_SECTION_NAME_STR,
667            symtab::validate_debug,
668        );
669        validate_objects(
670            self,
671            objects,
672            DYNSYM_SECTION_NAME_STR,
673            symtab::validate_dynamic,
674        );
675        header_diff::check_dynamic_headers(self, objects);
676        header_diff::check_file_headers(self, objects);
677        header_diff::report_section_diffs(self, objects);
678        eh_frame_diff::report_diffs(self, objects);
679        version_diff::report_diffs(self, objects);
680        debug_info_diff::check_debug_info(self, objects);
681        symbol_diff::report_diffs(self, objects);
682        segment::report_diffs(self, objects);
683
684        match arch {
685            ArchKind::X86_64 => {
686                self.report_arch_specific_diffs::<crate::x86_64::X86_64>(objects);
687            }
688            ArchKind::Aarch64 => {
689                self.report_arch_specific_diffs::<crate::aarch64::AArch64>(objects);
690            }
691
692            ArchKind::RISCV64 => {
693                self.report_arch_specific_diffs::<crate::riscv64::RiscV64>(objects);
694                riscv_attributes::report_diffs(self, objects);
695            }
696            ArchKind::LoongArch64 => {
697                self.report_arch_specific_diffs::<crate::loongarch64::LoongArch64>(objects);
698            }
699        }
700    }
701
702    fn report_arch_specific_diffs<A: Arch>(&mut self, binaries: &[Binary]) {
703        asm_diff::report_section_diffs::<A>(self, binaries);
704        init_order::report_diffs::<A>(self, binaries);
705    }
706
707    fn add_diff(&mut self, diff: Diff) {
708        if self.should_ignore(&diff.key) {
709            return;
710        }
711        self.diffs.push(diff);
712    }
713
714    fn add_diffs(&mut self, new_diffs: Vec<Diff>) {
715        for diff in new_diffs {
716            self.add_diff(diff);
717        }
718    }
719
720    #[must_use]
721    pub fn has_problems(&self) -> bool {
722        !self.diffs.is_empty()
723    }
724
725    #[must_use]
726    pub fn should_ignore(&self, key: &str) -> bool {
727        if !self.config.only.is_empty() {
728            return !self.config.only.iter().any(|i| {
729                if let Some(prefix) = i.strip_suffix('*') {
730                    key.starts_with(prefix)
731                } else {
732                    key == *i
733                }
734            });
735        }
736        self.config.ignore.iter().any(|i| {
737            if let Some(prefix) = i.strip_suffix('*') {
738                key.starts_with(prefix)
739            } else {
740                key == *i
741            }
742        })
743    }
744
745    fn add_error(&mut self, error: impl Into<String>) {
746        self.diffs.push(Diff {
747            key: "error".to_owned(),
748            values: DiffValues::PreFormatted(error.into()),
749        });
750    }
751}
752
753struct Diff {
754    key: String,
755    values: DiffValues,
756}
757
758enum DiffValues {
759    PerObject(Vec<String>),
760    PreFormatted(String),
761}
762
763impl Display for Report {
764    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
765        for (name, path) in self.names.iter().zip(&self.paths) {
766            writeln!(f, "{name}: {}", path.display())?;
767        }
768
769        for diff in &self.diffs {
770            writeln!(f, "{}", diff.key)?;
771
772            match &diff.values {
773                DiffValues::PerObject(values) => {
774                    for (filename, result) in self.names.iter().zip(values) {
775                        writeln!(f, "  {filename} {result}")?;
776                    }
777                }
778                DiffValues::PreFormatted(values) => {
779                    for line in values.lines() {
780                        writeln!(f, "  {line}")?;
781                    }
782                }
783            }
784
785            writeln!(f)?;
786        }
787
788        Ok(())
789    }
790}
791
792impl Display for Binary<'_> {
793    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
794        self.name.fmt(f)
795    }
796}
797
798impl Display for Coverage {
799    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
800        writeln!(f, "Diffed sections:")?;
801
802        let mut total_bytes = 0;
803        let mut total_diffed = 0;
804
805        for sec in self.sections.values() {
806            writeln!(
807                f,
808                "  {} {}: {}",
809                sec.original_file,
810                sec.name,
811                if sec.diffed {
812                    "true".green()
813                } else {
814                    "false".red()
815                }
816            )?;
817
818            if sec.diffed {
819                total_diffed += sec.num_bytes;
820            }
821
822            total_bytes += sec.num_bytes;
823        }
824
825        writeln!(
826            f,
827            "Diffed {total_diffed} of {total_bytes} section bytes ({}%)",
828            total_diffed * 100 / total_bytes
829        )?;
830
831        Ok(())
832    }
833}
834
835fn short_file_display_names(config: &Config) -> Result<Vec<String>> {
836    let paths = config.filenames().collect_vec();
837    if !config.display_names.is_empty() {
838        if config.display_names.len() != paths.len() {
839            bail!(
840                "--display-names has {} names, but {} filenames were provided",
841                config.display_names.len(),
842                paths.len()
843            );
844        }
845        return Ok(config.display_names.clone());
846    }
847    if paths.is_empty() {
848        return Ok(vec![]);
849    }
850    let mut names = paths
851        .iter()
852        .map(|p| p.to_string_lossy().into_owned())
853        .collect_vec();
854    if names.iter().all(|name| {
855        Path::new(name)
856            .extension()
857            .is_some_and(|ext| ext.eq_ignore_ascii_case("so"))
858    }) {
859        names = names
860            .into_iter()
861            .map(|n| n.strip_suffix(".so").unwrap().to_owned())
862            .collect();
863    }
864
865    if names.len() > 1 {
866        // We'll stop when we get to the length of the first name. This check is here to avoid
867        // infinitely looping if all the names are equal.
868        let first_len = names.first().map_or(0, |n| n.len());
869
870        // This is not quite right, since we might split in the middle of a multibyte character.
871        // But this is a dev tool, so we'll punt on that for now.
872        let mut iterators = names.iter().map(|n| n.bytes()).collect_vec();
873        let mut n = 0;
874        while first_equals_all(iterators.iter_mut().map(Iterator::next)) && n < first_len {
875            n += 1;
876        }
877        names = names
878            .iter()
879            .map(|name| String::from_utf8_lossy(&name.bytes().skip(n).collect_vec()).into_owned())
880            .collect_vec();
881    }
882    Ok(names)
883}
884
885fn first_equals_all<T: PartialEq>(mut inputs: impl Iterator<Item = T>) -> bool {
886    let Some(first) = inputs.next() else {
887        return true;
888    };
889    for next in inputs {
890        if next != first {
891            return false;
892        }
893    }
894    true
895}
896
897/// Returns whether the first input is equal to at least one of the remaining values.
898fn first_equals_any<T: PartialEq>(mut inputs: impl Iterator<Item = T>) -> bool {
899    let Some(first) = inputs.next() else {
900        return true;
901    };
902    for next in inputs {
903        if next == first {
904            return true;
905        }
906    }
907    false
908}
909
910impl<'data> NameIndex<'data> {
911    fn new(elf_file: &ElfFile64<'data>) -> NameIndex<'data> {
912        let mut globals_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
913        let mut locals_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
914        let mut dynamic_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
915
916        for sym in elf_file.symbols() {
917            // We only index symbols that have a section. Note this is different than the object
918            // crate's `is_defined`, which imposes additional requirements that we don't want.
919            if sym.section_index().is_none() {
920                continue;
921            }
922
923            if let Ok(mut name) = sym.name_bytes() {
924                // Wild doesn't emit local symbols that start with ".L". The other linkers mostly do
925                // the same. However, GNU ld and lld, if they encounter a GOT-forming relocation to
926                // such a symbol, even if they then optimise away the GOT-forming relocation, will
927                // emit the symbol. This behaviour seems weird and not worth replicating, so we just
928                // ignore all just symbols.
929                if name.starts_with(b".L") {
930                    continue;
931                }
932
933                // GNU ld sometimes emits symbols that contain the symbol version. This causes
934                // problems when we go to look those symbols up, since they no longer match the name
935                // of the symbol in the original input file. So for now at least, we get rid of the
936                // version.
937                if let Some(at_pos) = name.iter().position(|b| *b == b'@') {
938                    name = &name[..at_pos];
939                }
940
941                if sym.is_global() {
942                    globals_by_name.entry(name).or_default().push(sym.index());
943                } else {
944                    locals_by_name.entry(name).or_default().push(sym.index());
945                }
946            }
947        }
948
949        for sym in elf_file.dynamic_symbols() {
950            if let Ok(name) = sym.name_bytes() {
951                dynamic_by_name.entry(name).or_default().push(sym.index());
952            }
953        }
954
955        NameIndex {
956            globals_by_name,
957            locals_by_name,
958            dynamic_by_name,
959        }
960    }
961}
962
963fn parse_string_equality(
964    s: &str,
965) -> Result<(String, String), Box<dyn std::error::Error + Send + Sync + 'static>> {
966    let (a, b) = s
967        .split_once('=')
968        .ok_or_else(|| format!("invalid key-value pair. No '=' found in `{s}`"))?;
969    Ok((a.to_owned(), b.to_owned()))
970}
971
972fn get_r_type<R: arch::RType>(rel: &object::Relocation) -> R {
973    let object::RelocationFlags::Elf { r_type } = rel.flags() else {
974        panic!("Unsupported object type (relocation flags)");
975    };
976    R::from_raw(r_type)
977}