linker_diff/
lib.rs

1//! This crate finds differences between two ELF files. It's intended use is where the files were
2//! produced by different linkers, or different versions of the same linker. So the input files
3//! should be the same except for where the linkers make different decisions such as layout.
4//!
5//! Because the intended use is verifying the correct functioning of linkers, the focus is on
6//! avoiding false positives rather than avoiding false negatives. i.e. we'd much rather fail to
7//! report a difference than report a difference that doesn't matter. Ideally a reported difference
8//! should indicate a bug or missing feature of the linker.
9//!
10//! Right now, performance of this library is not a priority, so there's quite a bit of heap
11//! allocation going on that with a little work could be avoided. If we end up using this library as
12//! part of a fuzzer this may need to be optimised.
13
14#![allow(clippy::too_many_arguments)]
15
16use anyhow::Context as _;
17use anyhow::bail;
18use asm_diff::AddressIndex;
19use clap::Parser;
20use clap::ValueEnum;
21use hashbrown::HashMap;
22use itertools::Itertools as _;
23#[allow(clippy::wildcard_imports)]
24use linker_utils::elf::secnames::*;
25use object::LittleEndian;
26use object::Object as _;
27use object::ObjectSection;
28use object::ObjectSymbol as _;
29use object::read::elf::ElfSection64;
30use section_map::IndexedLayout;
31use section_map::LayoutAndFiles;
32use std::fmt::Display;
33use std::path::Path;
34use std::path::PathBuf;
35
36mod aarch64;
37mod arch;
38mod asm_diff;
39mod debug_info_diff;
40mod diagnostics;
41mod eh_frame_diff;
42mod gnu_hash;
43mod header_diff;
44mod init_order;
45pub(crate) mod section_map;
46mod segment;
47mod symbol_diff;
48mod symtab;
49mod trace;
50mod version_diff;
51mod x86_64;
52
53type Result<T = (), E = anyhow::Error> = core::result::Result<T, E>;
54type ElfFile64<'data> = object::read::elf::ElfFile64<'data, LittleEndian>;
55type ElfSymbol64<'data, 'file> = object::read::elf::ElfSymbol64<'data, 'file, LittleEndian>;
56
57use arch::Arch;
58use arch::ArchKind;
59use colored::Colorize;
60pub use diagnostics::enable_diagnostics;
61use section_map::InputSectionId;
62use section_map::OwnedFileIdentifier;
63
64#[non_exhaustive]
65#[derive(Parser, Default, Clone)]
66pub struct Config {
67    /// Keys to ignore.
68    #[arg(long, value_delimiter = ',')]
69    pub ignore: Vec<String>,
70
71    /// Show only the specified keys.
72    #[arg(long, value_delimiter = ',')]
73    pub only: Vec<String>,
74
75    /// Treat the sections with the specified names as equivalent. e.g. ".got.plt=.got"
76    #[arg(long, value_delimiter = ',', value_parser = parse_string_equality)]
77    pub equiv: Vec<(String, String)>,
78
79    /// Apply defaults for things that should be ignored currently for Wild. These defaults are
80    /// subject to change as Wild changes.
81    #[arg(long)]
82    pub wild_defaults: bool,
83
84    /// Print information about what sections did and didn't get diffed.
85    #[arg(long)]
86    pub coverage: bool,
87
88    /// Display names for input files.
89    #[arg(long, value_delimiter = ',', value_name = "NAME,NAME...")]
90    pub display_names: Vec<String>,
91
92    /// Files to compare against
93    #[arg(long = "ref", value_name = "FILE")]
94    pub references: Vec<PathBuf>,
95
96    #[arg(long, alias = "color", default_value = "auto")]
97    pub colour: Colour,
98
99    /// Primary file that we're validating against the reference file(s)
100    pub file: PathBuf,
101}
102
103#[derive(ValueEnum, Copy, Clone, Default)]
104pub enum Colour {
105    #[default]
106    Auto,
107    Never,
108    Always,
109}
110
111/// An output binary such as an executable or shared object.
112pub struct Binary<'data> {
113    name: String,
114    path: PathBuf,
115    elf_file: &'data ElfFile64<'data>,
116    address_index: AddressIndex<'data>,
117    name_index: NameIndex<'data>,
118    indexed_layout: Option<IndexedLayout<'data>>,
119    trace: trace::Trace,
120    sections_by_name: HashMap<&'data [u8], SectionInfo>,
121}
122
123#[derive(Clone, Copy)]
124struct SectionInfo {
125    index: object::SectionIndex,
126    size: u64,
127}
128
129struct NameIndex<'data> {
130    globals_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
131    locals_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
132    dynamic_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
133}
134
135impl Config {
136    #[must_use]
137    pub fn from_env() -> Self {
138        Self::parse()
139    }
140
141    fn apply_wild_defaults(&mut self, arch: ArchKind) {
142        self.ignore.extend(
143            [
144                // We don't currently support allocating space except in sections, so we have sections
145                // to hold the section and program headers. We then need to ignore them because GNU ld
146                // doesn't define such sections.
147                "section.shdr",
148                "section.phdr",
149                // We don't yet support these sections.
150                "section.data.rel.ro",
151                "section.stapsdt.base",
152                "section.note.gnu.build-id",
153                "section.note.gnu.property",
154                "section.note.stapsdt",
155                "section.hash",
156                "section.sframe",
157                // We set this to 8. GNU ld sometimes does too, but sometimes to 0.
158                "section.got.entsize",
159                "section.plt.got.entsize",
160                "section.plt.entsize",
161                // GNU ld sometimes sets this differently that we do.
162                "section.plt",
163                "section.plt.alignment",
164                "section.bss.alignment",
165                "section.gnu.build.attributes",
166                "section.annobin.notes.entsize",
167                // We don't yet group .lrodata sections separately.
168                "section.lrodata",
169                // We sometimes eliminate __tls_get_addr where GNU ld doesn't. This can mean that we
170                // have no versioned symbols for ld-linux-x86-64.so.2 or equivalent, which means we
171                // end up with one less version record.
172                ".dynamic.DT_VERNEEDNUM",
173                // We currently handle these dynamic tags differently
174                ".dynamic.DT_JMPREL",
175                ".dynamic.DT_PLTGOT",
176                ".dynamic.DT_PLTREL",
177                // We currently produce a .got.plt whenever we produce .plt, but GNU ld doesn't
178                "section.got.plt",
179                GOT_PLT_SECTION_NAME_STR,
180                // We don't currently produce a separate .plt.sec section.
181                "section.plt.sec",
182                // We don't yet write this.
183                ".dynamic.DT_HASH",
184                // aarch64-linux-gnu-ld on arch linux emits DT_BIND_NOW instead of DT_FLAGS.BIND_NOW
185                ".dynamic.DT_BIND_NOW",
186                ".dynamic.DT_FLAGS.BIND_NOW",
187                // TODO: Implement proper ordering of .init .ctors etc
188                "init_array",
189                "fini_array",
190                // When GNU ld encounters a GOT-forming reference to an ifunc, it generates a
191                // canonical PLT entry and points the GOT at that. This means that it ends up with
192                // GOT->PLT->GOT. We don't as yet support doing this.
193                "rel.missing-got-plt-got",
194                // We do support this. TODO: Should definitely look into why we're seeing this missing
195                // in our output.
196                "section.rela.plt",
197                // We currently write 10 byte PLT entries in some cases where GNU ld writes 8 byte ones.
198                "section.plt.got.alignment",
199                // GNU ld sometimes makes this writable sometimes not. Presumably this depends on
200                // whether there are relocations or some flags.
201                "section.eh_frame.flags",
202                // A package note section used by Ubuntu: https://systemd.io/ELF_PACKAGE_METADATA/
203                "section.note.package",
204                // TLSDESC relaxations aren't yet implemented.
205                "rel.match_failed.R_X86_64_GOTPC32_TLSDESC",
206                "rel.missing-opt.R_X86_64_TLSDESC_CALL.SkipTlsDescCall.*",
207                // Wild eliminates GOTPCRELX in statically linked executables even for undefined
208                // symbols, whereas other linkers don't. This is a valid optimisation that other
209                // linkers don't currently do.
210                "rel.extra-opt.R_X86_64_GOTPCRELX.CallIndirectToRelative.static-*",
211                // We don't yet support emitting warnings.
212                "section.gnu.warning",
213                // GNU ld sometimes applies relaxations that we don't yet.
214                "rel.match_failed.R_AARCH64_TLSDESC_LD64_LO12",
215                "rel.match_failed.R_AARCH64_TLSGD_ADD_LO12_NC",
216                "rel.missing-opt.R_X86_64_TLSGD.TlsGdToInitialExec.shared-object",
217                // GNU ld sometimes relaxes an adrp instruction to an adr instruction when the
218                // address is known and within +/-1MB. We don't as yet.
219                "rel.missing-opt.R_AARCH64_ADR_GOT_PAGE.AdrpToAdr.*",
220                "rel.missing-opt.R_AARCH64_ADR_PREL_PG_HI21.AdrpToAdr.*",
221                "rel.extra-opt.R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21.MovzXnLsl16.*",
222                // The other linkers set properties on sections if all input sections have that
223                // property. For sections like .rodata, this seems like an unimportant behaviour to
224                // replicate.
225                "section.rodata.entsize",
226                "section.rodata.flags",
227                // We emit dynamic relocations for direct references to undefined weak symbols that
228                // might be provided at runtime as well as GOT entries for indirect references. GNU
229                // ld and lld only emit the GOT entries and leave direct references as null. Our
230                // behaviour seems more consistent with the description of
231                // `-zdynamic-undefined-weak`.
232                "rel.undefined-weak.dynamic.R_X86_64_64",
233                "rel.undefined-weak.dynamic.R_AARCH64_ABS64",
234                // On aarch64, GNU ld, at least sometimes, converts R_AARCH64_ABS64 to a PLT-forming
235                // relocation. We at present, don't.
236                "rel.dynamic-plt-bypass",
237                // If we don't optimise a TLS access, then we'll have references to __tls_get_addr,
238                // when GNU ld doesn't.
239                "dynsym.__tls_get_addr.*",
240                // GNU ld emits two segments, whereas wild emits only a single segment.
241                "segment.LOAD.R.*",
242                // We haven't provided an implementation that is compatible with existing linkers.
243                "segment.PT_NOTE.*",
244                "segment.PT_INTERP.*",
245                "segment.PT_PHDR.*",
246                "segment.PT_GNU_RELRO.*",
247                "segment.PT_GNU_STACK.*",
248                "segment.PT_GNU_PROPERTY.*",
249                "segment.PT_GNU_SFRAME.*",
250                // TODO: RISC-v
251                "segment.SHT_RISCV_ATTRIBUTES.*",
252                "segment.LOAD.RW.alignment",
253            ]
254            .into_iter()
255            .map(ToOwned::to_owned),
256        );
257
258        match arch {
259            ArchKind::Aarch64 => self.ignore.extend(
260                [
261                    // Other linkers have a bigger initial PLT entry, thus the entsize is set to zero:
262                    // https://sourceware.org/bugzilla/show_bug.cgi?id=26312
263                    "section.plt.entsize",
264                    // On Alpine Linux, aarch64, GNU ld seems to emit the _DYNAMIC symbol without a
265                    // section index instead of pointing it at the .dynamic section.
266                    "rel.extra-symbol._DYNAMIC",
267                    // Also on Alpine Linux, aarch64, it seems that GNU ld is emitting an
268                    // unnecessary GLOB_DAT relocation in a GOT entry.
269                    "rel.missing-got-dynamic.executable",
270                ]
271                .into_iter()
272                .map(ToOwned::to_owned),
273            ),
274            ArchKind::RISCV64 => self.ignore.extend(
275                [
276                    // TODO: for some reason, main is put into .dynsym
277                    "dynsym.main.section",
278                    // #701
279                    "file-header.flags",
280                ]
281                .into_iter()
282                .map(ToOwned::to_owned),
283            ),
284            ArchKind::X86_64 => {}
285        }
286
287        self.equiv.push((
288            GOT_SECTION_NAME_STR.to_owned(),
289            GOT_PLT_SECTION_NAME_STR.to_owned(),
290        ));
291        // We don't currently define .plt.got and .plt.sec, we just put everything into .plt.
292        self.equiv.push((
293            PLT_SECTION_NAME_STR.to_owned(),
294            PLT_GOT_SECTION_NAME_STR.to_owned(),
295        ));
296        self.equiv.push((
297            PLT_SECTION_NAME_STR.to_owned(),
298            PLT_SEC_SECTION_NAME_STR.to_owned(),
299        ));
300    }
301
302    #[must_use]
303    pub fn to_arg_string(&self) -> String {
304        let mut out = String::new();
305        if self.wild_defaults {
306            out.push_str("--wild-defaults ");
307        }
308        if !self.ignore.is_empty() {
309            out.push_str("--ignore '");
310            out.push_str(&self.ignore.join(","));
311            out.push_str("' ");
312        }
313        if !self.equiv.is_empty() {
314            out.push_str("--equiv '");
315            let parts = self
316                .equiv
317                .iter()
318                .map(|(k, v)| format!("{k}={v}"))
319                .collect_vec();
320            out.push_str(&parts.join(","));
321            out.push_str("' ");
322        }
323        if !self.display_names.is_empty() {
324            out.push_str("--display-names ");
325            out.push_str(&self.display_names.join(","));
326            out.push(' ');
327        }
328        for file in &self.references {
329            out.push_str("--ref ");
330            out.push_str(&file.to_string_lossy());
331            out.push(' ');
332        }
333        out.push_str(&self.file.to_string_lossy());
334        out
335    }
336
337    fn filenames(&self) -> impl Iterator<Item = &PathBuf> {
338        // We always put our file first, since it makes it easier to treat it differently. e.g. when
339        // we compare a value from our file against each of the values from the other files.
340        std::iter::once(&self.file).chain(&self.references)
341    }
342}
343
344impl<'data> Binary<'data> {
345    pub(crate) fn new(
346        elf_file: &'data ElfFile64<'data>,
347        name: String,
348        path: PathBuf,
349        layout_and_files: Option<&'data LayoutAndFiles>,
350    ) -> Result<Self> {
351        let address_index = AddressIndex::new(elf_file);
352        let indexed_layout = layout_and_files.map(IndexedLayout::new).transpose()?;
353        let trace = trace::Trace::for_path(&path)?;
354
355        let sections_by_name = elf_file
356            .sections()
357            .map(|section| {
358                Ok((
359                    section.name_bytes()?,
360                    SectionInfo {
361                        index: section.index(),
362                        size: section.size(),
363                    },
364                ))
365            })
366            .collect::<Result<HashMap<&[u8], SectionInfo>>>()?;
367
368        Ok(Self {
369            name,
370            elf_file,
371            path,
372            address_index,
373            name_index: NameIndex::new(elf_file),
374            indexed_layout,
375            trace,
376            sections_by_name,
377        })
378    }
379
380    /// Looks up a symbol, first trying to get a global, or failing that a local. If multiple
381    /// symbols have the same name, then `hint_address` is used to select which one to return.
382    pub(crate) fn symbol_by_name<'file: 'data>(
383        &'file self,
384        name: &[u8],
385        hint_address: u64,
386    ) -> NameLookupResult<'data, 'file> {
387        match self.lookup_symbol(&self.name_index.globals_by_name, name, hint_address) {
388            NameLookupResult::Undefined => {
389                self.lookup_symbol(&self.name_index.locals_by_name, name, hint_address)
390            }
391            other => other,
392        }
393    }
394
395    fn lookup_symbol<'file: 'data>(
396        &'file self,
397        symbol_map: &HashMap<&[u8], Vec<object::SymbolIndex>>,
398        name: &[u8],
399        hint_address: u64,
400    ) -> NameLookupResult<'data, 'file> {
401        let indexes = symbol_map.get(name).map(Vec::as_slice).unwrap_or_default();
402
403        if indexes.len() >= 2 {
404            for sym_index in indexes {
405                if let Ok(sym) = self.elf_file.symbol_by_index(*sym_index)
406                    && sym.address() == hint_address
407                {
408                    return NameLookupResult::Defined(sym);
409                }
410            }
411
412            // We didn't find a symbol with exactly the address hinted at.
413            return NameLookupResult::Duplicate;
414        }
415
416        if let Some(symbol_index) = indexes.first() {
417            if let Ok(sym) = self.elf_file.symbol_by_index(*symbol_index) {
418                NameLookupResult::Defined(sym)
419            } else {
420                NameLookupResult::Undefined
421            }
422        } else {
423            NameLookupResult::Undefined
424        }
425    }
426
427    fn section_by_name<'file: 'data>(
428        &'file self,
429        name: &str,
430    ) -> Option<ElfSection64<'data, 'file, LittleEndian>> {
431        self.section_by_name_bytes(name.as_bytes())
432    }
433
434    fn section_by_name_bytes<'file: 'data>(
435        &'file self,
436        name: &[u8],
437    ) -> Option<ElfSection64<'data, 'file, LittleEndian>> {
438        let index = self.sections_by_name.get(name)?.index;
439        self.elf_file.section_by_index(index).ok()
440    }
441
442    fn section_containing_address<'file: 'data>(
443        &'file self,
444        address: u64,
445    ) -> Option<ElfSection64<'file, 'data, LittleEndian>> {
446        self.elf_file
447            .sections()
448            .find(|sec| (sec.address()..sec.address() + sec.size()).contains(&address))
449    }
450
451    /// Returns the name of the section that contains the supplied address. Does a linear scan, so
452    /// should only be used for error reporting.
453    fn section_name_containing_address(&self, address: u64) -> Option<&str> {
454        self.section_containing_address(address)
455            .and_then(|sec| sec.name().ok())
456    }
457}
458
459#[derive(Debug)]
460enum NameLookupResult<'data, 'file> {
461    Undefined,
462    Duplicate,
463    Defined(ElfSymbol64<'data, 'file>),
464}
465
466fn validate_objects(
467    report: &mut Report,
468    objects: &[Binary],
469    validation_name: &str,
470    validation_fn: impl Fn(&Binary) -> Result,
471) {
472    let values = objects
473        .iter()
474        .map(|obj| match validation_fn(obj) {
475            Ok(_) => "OK".to_owned(),
476            Err(e) => e.to_string(),
477        })
478        .collect_vec();
479    if first_equals_any(values.iter()) {
480        return;
481    }
482    report.add_diff(Diff {
483        key: validation_name.to_owned(),
484        values: DiffValues::PerObject(values),
485    });
486}
487
488pub struct Report {
489    /// The names of each of our binaries. These should be short, not a full path, since we often
490    /// prefix lines with these names.
491    names: Vec<String>,
492
493    /// The full path of each of our binaries.
494    paths: Vec<PathBuf>,
495
496    /// The differences that were detected.
497    diffs: Vec<Diff>,
498
499    /// The configuration that was used.
500    config: Config,
501
502    pub coverage: Option<Coverage>,
503}
504
505#[derive(Default)]
506pub struct Coverage {
507    sections: HashMap<InputSectionId, SectionCoverage>,
508}
509
510struct SectionCoverage {
511    /// The original input file from which the section came.
512    original_file: OwnedFileIdentifier,
513
514    /// The name of the section.
515    name: String,
516
517    /// Whether we diffed this section at all.
518    diffed: bool,
519
520    /// The size of the section in bytes.
521    num_bytes: u64,
522}
523
524impl Report {
525    pub fn from_config(mut config: Config) -> Result<Report> {
526        // This changes mutable global state, which isn't an ideal thing to be doing from a library.
527        // It's expedient though, and we don't really expect linker-diff to get used as a library
528        // anywhere except the linker-diff binary and wild's integration tests, so this probably
529        // isn't a big deal.
530        match config.colour {
531            Colour::Auto => colored::control::unset_override(),
532            Colour::Never => colored::control::set_override(false),
533            Colour::Always => colored::control::set_override(true),
534        }
535
536        let display_names = short_file_display_names(&config)?;
537
538        let file_bytes = config
539            .filenames()
540            .map(|filename| -> Result<Vec<u8>> {
541                let bytes = std::fs::read(filename)
542                    .with_context(|| format!("Failed to read `{}`", filename.display()))?;
543                Ok(bytes)
544            })
545            .collect::<Result<Vec<Vec<u8>>>>()?;
546
547        let elf_files = file_bytes
548            .iter()
549            .map(|bytes| -> Result<ElfFile64> { Ok(ElfFile64::parse(bytes.as_slice())?) })
550            .collect::<Result<Vec<_>>>()?;
551
552        let layouts = config
553            .filenames()
554            .map(|p| LayoutAndFiles::from_base_path(p))
555            .collect::<Result<Vec<_>>>()?;
556
557        let objects = elf_files
558            .iter()
559            .zip(display_names)
560            .zip(config.filenames())
561            .zip(&layouts)
562            .map(|(((elf_file, name), path), layout)| -> Result<Binary> {
563                Binary::new(elf_file, name, path.clone(), layout.as_ref())
564            })
565            .collect::<Result<Vec<_>>>()?;
566
567        if objects.len() < 2 {
568            bail!("At least two files must be provided for comparison");
569        }
570
571        let arch = ArchKind::from_objects(&objects)?;
572
573        if config.wild_defaults {
574            config.apply_wild_defaults(arch);
575        }
576
577        let mut report = Report {
578            names: objects.iter().map(|o| o.name.clone()).collect(),
579            paths: objects.iter().map(|o| o.path.clone()).collect(),
580            diffs: Default::default(),
581            coverage: config.coverage.then(Coverage::default),
582            config,
583        };
584
585        report.run_on_objects(&objects, arch);
586
587        Ok(report)
588    }
589
590    fn run_on_objects(&mut self, objects: &[Binary], arch: ArchKind) {
591        validate_objects(
592            self,
593            objects,
594            GNU_HASH_SECTION_NAME_STR,
595            gnu_hash::check_object,
596        );
597        validate_objects(self, objects, "index", asm_diff::validate_indexes);
598        validate_objects(
599            self,
600            objects,
601            GOT_PLT_SECTION_NAME_STR,
602            asm_diff::validate_got_plt,
603        );
604        validate_objects(
605            self,
606            objects,
607            SYMTAB_SECTION_NAME_STR,
608            symtab::validate_debug,
609        );
610        validate_objects(
611            self,
612            objects,
613            DYNSYM_SECTION_NAME_STR,
614            symtab::validate_dynamic,
615        );
616        header_diff::check_dynamic_headers(self, objects);
617        header_diff::check_file_headers(self, objects);
618        header_diff::report_section_diffs(self, objects);
619        eh_frame_diff::report_diffs(self, objects);
620        version_diff::report_diffs(self, objects);
621        debug_info_diff::check_debug_info(self, objects);
622        symbol_diff::report_diffs(self, objects);
623        segment::report_diffs(self, objects);
624
625        match arch {
626            ArchKind::X86_64 => {
627                self.report_arch_specific_diffs::<crate::x86_64::X86_64>(objects);
628            }
629            ArchKind::Aarch64 => {
630                self.report_arch_specific_diffs::<crate::aarch64::AArch64>(objects);
631            }
632
633            ArchKind::RISCV64 => {
634                // TODO
635            }
636        }
637    }
638
639    fn report_arch_specific_diffs<A: Arch>(&mut self, binaries: &[Binary]) {
640        asm_diff::report_section_diffs::<A>(self, binaries);
641        init_order::report_diffs::<A>(self, binaries);
642    }
643
644    fn add_diff(&mut self, diff: Diff) {
645        if self.should_ignore(&diff.key) {
646            return;
647        }
648        self.diffs.push(diff);
649    }
650
651    fn add_diffs(&mut self, new_diffs: Vec<Diff>) {
652        for diff in new_diffs {
653            self.add_diff(diff);
654        }
655    }
656
657    #[must_use]
658    pub fn has_problems(&self) -> bool {
659        !self.diffs.is_empty()
660    }
661
662    fn should_ignore(&self, key: &str) -> bool {
663        if !self.config.only.is_empty() {
664            return !self.config.only.iter().any(|i| {
665                if let Some(prefix) = i.strip_suffix('*') {
666                    key.starts_with(prefix)
667                } else {
668                    key == *i
669                }
670            });
671        }
672        self.config.ignore.iter().any(|i| {
673            if let Some(prefix) = i.strip_suffix('*') {
674                key.starts_with(prefix)
675            } else {
676                key == *i
677            }
678        })
679    }
680
681    fn add_error(&mut self, error: impl Into<String>) {
682        self.diffs.push(Diff {
683            key: "error".to_owned(),
684            values: DiffValues::PreFormatted(error.into()),
685        });
686    }
687}
688
689struct Diff {
690    key: String,
691    values: DiffValues,
692}
693
694enum DiffValues {
695    PerObject(Vec<String>),
696    PreFormatted(String),
697}
698
699impl Display for Report {
700    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
701        for (name, path) in self.names.iter().zip(&self.paths) {
702            writeln!(f, "{name}: {}", path.display())?;
703        }
704
705        for diff in &self.diffs {
706            writeln!(f, "{}", diff.key)?;
707
708            match &diff.values {
709                DiffValues::PerObject(values) => {
710                    for (filename, result) in self.names.iter().zip(values) {
711                        writeln!(f, "  {filename} {result}")?;
712                    }
713                }
714                DiffValues::PreFormatted(values) => {
715                    for line in values.lines() {
716                        writeln!(f, "  {line}")?;
717                    }
718                }
719            }
720
721            writeln!(f)?;
722        }
723
724        Ok(())
725    }
726}
727
728impl Display for Binary<'_> {
729    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
730        self.name.fmt(f)
731    }
732}
733
734impl Display for Coverage {
735    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
736        writeln!(f, "Diffed sections:")?;
737
738        let mut total_bytes = 0;
739        let mut total_diffed = 0;
740
741        for sec in self.sections.values() {
742            writeln!(
743                f,
744                "  {} {}: {}",
745                sec.original_file,
746                sec.name,
747                if sec.diffed {
748                    "true".green()
749                } else {
750                    "false".red()
751                }
752            )?;
753
754            if sec.diffed {
755                total_diffed += sec.num_bytes;
756            }
757
758            total_bytes += sec.num_bytes;
759        }
760
761        writeln!(
762            f,
763            "Diffed {total_diffed} of {total_bytes} section bytes ({}%)",
764            total_diffed * 100 / total_bytes
765        )?;
766
767        Ok(())
768    }
769}
770
771fn short_file_display_names(config: &Config) -> Result<Vec<String>> {
772    let paths: Vec<&PathBuf> = config.filenames().collect();
773    if !config.display_names.is_empty() {
774        if config.display_names.len() != paths.len() {
775            bail!(
776                "--display-names has {} names, but {} filenames were provided",
777                config.display_names.len(),
778                paths.len()
779            );
780        }
781        return Ok(config.display_names.clone());
782    }
783    if paths.is_empty() {
784        return Ok(vec![]);
785    }
786    let mut names = paths
787        .iter()
788        .map(|p| p.to_string_lossy().into_owned())
789        .collect_vec();
790    if names.iter().all(|name| {
791        Path::new(name)
792            .extension()
793            .is_some_and(|ext| ext.eq_ignore_ascii_case("so"))
794    }) {
795        names = names
796            .into_iter()
797            .map(|n| n.strip_suffix(".so").unwrap().to_owned())
798            .collect();
799    }
800
801    if names.len() > 1 {
802        // This is not quite right, since we might split in the middle of a multibyte character.
803        // But this is a dev tool, so we'll punt on that for now.
804        let mut iterators = names.iter().map(|n| n.bytes()).collect_vec();
805        let mut n = 0;
806        while first_equals_all(iterators.iter_mut().map(Iterator::next)) {
807            n += 1;
808        }
809        names = names
810            .iter()
811            .map(|name| String::from_utf8_lossy(&name.bytes().skip(n).collect_vec()).into_owned())
812            .collect_vec();
813    }
814    Ok(names)
815}
816
817fn first_equals_all<T: PartialEq>(mut inputs: impl Iterator<Item = T>) -> bool {
818    let Some(first) = inputs.next() else {
819        return true;
820    };
821    for next in inputs {
822        if next != first {
823            return false;
824        }
825    }
826    true
827}
828
829/// Returns whether the first input is equal to at least one of the remaining values.
830fn first_equals_any<T: PartialEq>(mut inputs: impl Iterator<Item = T>) -> bool {
831    let Some(first) = inputs.next() else {
832        return true;
833    };
834    for next in inputs {
835        if next == first {
836            return true;
837        }
838    }
839    false
840}
841
842impl<'data> NameIndex<'data> {
843    fn new(elf_file: &ElfFile64<'data>) -> NameIndex<'data> {
844        let mut globals_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
845        let mut locals_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
846        let mut dynamic_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
847
848        for sym in elf_file.symbols() {
849            // We only index symbols that have a section. Note this is different than the object
850            // crate's `is_defined`, which imposes additional requirements that we don't want.
851            if sym.section_index().is_none() {
852                continue;
853            }
854
855            if let Ok(mut name) = sym.name_bytes() {
856                // Wild doesn't emit local symbols that start with ".L". The other linkers mostly do
857                // the same. However, GNU ld and lld, if they encounter a GOT-forming relocation to
858                // such a symbol, even if they then optimise away the GOT-forming relocation, will
859                // emit the symbol. This behaviour seems weird and not worth replicating, so we just
860                // ignore all just symbols.
861                if name.starts_with(b".L") {
862                    continue;
863                }
864
865                // GNU ld sometimes emits symbols that contain the symbol version. This causes
866                // problems when we go to look those symbols up, since they no longer match the name
867                // of the symbol in the original input file. So for now at least, we get rid of the
868                // version.
869                if let Some(at_pos) = name.iter().position(|b| *b == b'@') {
870                    name = &name[..at_pos];
871                }
872
873                if sym.is_global() {
874                    globals_by_name.entry(name).or_default().push(sym.index());
875                } else {
876                    locals_by_name.entry(name).or_default().push(sym.index());
877                }
878            }
879        }
880
881        for sym in elf_file.dynamic_symbols() {
882            if let Ok(name) = sym.name_bytes() {
883                dynamic_by_name.entry(name).or_default().push(sym.index());
884            }
885        }
886
887        NameIndex {
888            globals_by_name,
889            locals_by_name,
890            dynamic_by_name,
891        }
892    }
893}
894
895fn slice_from_all_bytes<T: object::Pod>(data: &[u8]) -> &[T] {
896    object::slice_from_bytes(data, data.len() / size_of::<T>())
897        .unwrap()
898        .0
899}
900
901fn parse_string_equality(
902    s: &str,
903) -> Result<(String, String), Box<dyn std::error::Error + Send + Sync + 'static>> {
904    let (a, b) = s
905        .split_once('=')
906        .ok_or_else(|| format!("invalid key-value pair. No '=' found in `{s}`"))?;
907    Ok((a.to_owned(), b.to_owned()))
908}
909
910fn get_r_type<R: arch::RType>(rel: &object::Relocation) -> R {
911    let object::RelocationFlags::Elf { r_type } = rel.flags() else {
912        panic!("Unsupported object type (relocation flags)");
913    };
914    R::from_raw(r_type)
915}