linker_diff/
lib.rs

1//! This crate finds differences between two ELF files. It's intended use is where the files were
2//! produced by different linkers, or different versions of the same linker. So the input files
3//! should be the same except for where the linkers make different decisions such as layout.
4//!
5//! Because the intended use is verifying the correct functioning of linkers, the focus is on
6//! avoiding false positives rather than avoiding false negatives. i.e. we'd much rather fail to
7//! report a difference than report a difference that doesn't matter. Ideally a reported difference
8//! should indicate a bug or missing feature of the linker.
9//!
10//! Right now, performance of this library is not a priority, so there's quite a bit of heap
11//! allocation going on that with a little work could be avoided. If we end up using this library as
12//! part of a fuzzer this may need to be optimised.
13
14#![allow(clippy::too_many_arguments)]
15
16use anyhow::Context as _;
17use anyhow::bail;
18use asm_diff::AddressIndex;
19use clap::Parser;
20use clap::ValueEnum;
21use hashbrown::HashMap;
22use itertools::Itertools as _;
23#[allow(clippy::wildcard_imports)]
24use linker_utils::elf::secnames::*;
25use object::LittleEndian;
26use object::Object as _;
27use object::ObjectSection;
28use object::ObjectSymbol as _;
29use object::read::elf::ElfSection64;
30use section_map::IndexedLayout;
31use section_map::LayoutAndFiles;
32use std::fmt::Display;
33use std::path::Path;
34use std::path::PathBuf;
35
36mod aarch64;
37mod arch;
38mod asm_diff;
39mod debug_info_diff;
40mod diagnostics;
41mod eh_frame_diff;
42mod gnu_hash;
43mod header_diff;
44mod init_order;
45mod riscv64;
46pub(crate) mod section_map;
47mod segment;
48mod symbol_diff;
49mod symtab;
50mod trace;
51mod utils;
52mod version_diff;
53mod x86_64;
54
55type Result<T = (), E = anyhow::Error> = core::result::Result<T, E>;
56type ElfFile64<'data> = object::read::elf::ElfFile64<'data, LittleEndian>;
57type ElfSymbol64<'data, 'file> = object::read::elf::ElfSymbol64<'data, 'file, LittleEndian>;
58
59use arch::Arch;
60use arch::ArchKind;
61use colored::Colorize;
62pub use diagnostics::enable_diagnostics;
63use section_map::InputSectionId;
64use section_map::OwnedFileIdentifier;
65
66#[non_exhaustive]
67#[derive(Parser, Default, Clone)]
68pub struct Config {
69    /// Keys to ignore.
70    #[arg(long, value_delimiter = ',')]
71    pub ignore: Vec<String>,
72
73    /// Show only the specified keys.
74    #[arg(long, value_delimiter = ',')]
75    pub only: Vec<String>,
76
77    /// Treat the sections with the specified names as equivalent. e.g. ".got.plt=.got"
78    #[arg(long, value_delimiter = ',', value_parser = parse_string_equality)]
79    pub equiv: Vec<(String, String)>,
80
81    /// Apply defaults for things that should be ignored currently for Wild. These defaults are
82    /// subject to change as Wild changes.
83    #[arg(long)]
84    pub wild_defaults: bool,
85
86    /// Print information about what sections did and didn't get diffed.
87    #[arg(long)]
88    pub coverage: bool,
89
90    /// Display names for input files.
91    #[arg(long, value_delimiter = ',', value_name = "NAME,NAME...")]
92    pub display_names: Vec<String>,
93
94    /// Files to compare against
95    #[arg(long = "ref", value_name = "FILE")]
96    pub references: Vec<PathBuf>,
97
98    #[arg(long, alias = "color", default_value = "auto")]
99    pub colour: Colour,
100
101    /// Primary file that we're validating against the reference file(s)
102    pub file: PathBuf,
103}
104
105#[derive(ValueEnum, Copy, Clone, Default)]
106pub enum Colour {
107    #[default]
108    Auto,
109    Never,
110    Always,
111}
112
113/// An output binary such as an executable or shared object.
114pub struct Binary<'data> {
115    name: String,
116    path: PathBuf,
117    elf_file: &'data ElfFile64<'data>,
118    address_index: AddressIndex<'data>,
119    name_index: NameIndex<'data>,
120    indexed_layout: Option<IndexedLayout<'data>>,
121    trace: trace::Trace,
122    sections_by_name: HashMap<&'data [u8], SectionInfo>,
123}
124
125#[derive(Clone, Copy)]
126struct SectionInfo {
127    index: object::SectionIndex,
128    size: u64,
129}
130
131struct NameIndex<'data> {
132    globals_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
133    locals_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
134    dynamic_by_name: HashMap<&'data [u8], Vec<object::SymbolIndex>>,
135}
136
137impl Config {
138    #[must_use]
139    pub fn from_env() -> Self {
140        Self::parse()
141    }
142
143    fn apply_wild_defaults(&mut self, arch: ArchKind) {
144        self.ignore.extend(
145            [
146                // We don't currently support allocating space except in sections, so we have sections
147                // to hold the section and program headers. We then need to ignore them because GNU ld
148                // doesn't define such sections.
149                "section.shdr",
150                "section.phdr",
151                // We don't yet support these sections.
152                "section.data.rel.ro",
153                "section.hash",
154                "section.sframe",
155                // We set this to 8. GNU ld sometimes does too, but sometimes to 0.
156                "section.got.entsize",
157                "section.plt.got.entsize",
158                "section.plt.entsize",
159                // GNU ld sometimes sets this differently that we do.
160                "section.plt",
161                "section.plt.alignment",
162                "section.bss.alignment",
163                "section.gnu.build.attributes",
164                "section.annobin.notes.entsize",
165                // We don't yet group .lrodata sections separately.
166                "section.lrodata",
167                // We sometimes eliminate __tls_get_addr where GNU ld doesn't. This can mean that we
168                // have no versioned symbols for ld-linux-x86-64.so.2 or equivalent, which means we
169                // end up with one less version record.
170                ".dynamic.DT_VERNEEDNUM",
171                // We currently handle these dynamic tags differently
172                ".dynamic.DT_JMPREL",
173                ".dynamic.DT_PLTGOT",
174                ".dynamic.DT_PLTREL",
175                // We currently produce a .got.plt whenever we produce .plt, but GNU ld doesn't
176                "section.got.plt",
177                GOT_PLT_SECTION_NAME_STR,
178                // We don't currently produce a separate .plt.sec section.
179                "section.plt.sec",
180                // We don't yet write this.
181                ".dynamic.DT_HASH",
182                // aarch64-linux-gnu-ld on arch linux emits DT_BIND_NOW instead of DT_FLAGS.BIND_NOW
183                ".dynamic.DT_BIND_NOW",
184                ".dynamic.DT_FLAGS.BIND_NOW",
185                // TODO: Implement proper ordering of .init .ctors etc
186                "init_array",
187                "fini_array",
188                // When GNU ld encounters a GOT-forming reference to an ifunc, it generates a
189                // canonical PLT entry and points the GOT at that. This means that it ends up with
190                // GOT->PLT->GOT. We don't as yet support doing this.
191                "rel.missing-got-plt-got",
192                // We do support this. TODO: Should definitely look into why we're seeing this missing
193                // in our output.
194                "section.rela.plt",
195                // We currently write 10 byte PLT entries in some cases where GNU ld writes 8 byte ones.
196                "section.plt.got.alignment",
197                // GNU ld sometimes makes this writable sometimes not. Presumably this depends on
198                // whether there are relocations or some flags.
199                "section.eh_frame.flags",
200                // TLSDESC relaxations aren't yet implemented.
201                "rel.match_failed.R_X86_64_GOTPC32_TLSDESC",
202                "rel.missing-opt.R_X86_64_TLSDESC_CALL.SkipTlsDescCall.*",
203                // Wild eliminates GOTPCRELX in statically linked executables even for undefined
204                // symbols, whereas other linkers don't. This is a valid optimisation that other
205                // linkers don't currently do.
206                "rel.extra-opt.R_X86_64_GOTPCRELX.CallIndirectToRelative.static-*",
207                // We don't yet support emitting warnings.
208                "section.gnu.warning",
209                // GNU ld sometimes applies relaxations that we don't yet.
210                "rel.match_failed.R_AARCH64_TLSDESC_LD64_LO12",
211                "rel.match_failed.R_AARCH64_TLSGD_ADD_LO12_NC",
212                "rel.missing-opt.R_X86_64_TLSGD.TlsGdToInitialExec.shared-object",
213                // GNU ld sometimes relaxes an adrp instruction to an adr instruction when the
214                // address is known and within +/-1MB. We don't as yet.
215                "rel.missing-opt.R_AARCH64_ADR_GOT_PAGE.AdrpToAdr.*",
216                "rel.missing-opt.R_AARCH64_ADR_PREL_PG_HI21.AdrpToAdr.*",
217                "rel.extra-opt.R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21.MovzXnLsl16.*",
218                // The other linkers set properties on sections if all input sections have that
219                // property. For sections like .rodata, this seems like an unimportant behaviour to
220                // replicate.
221                "section.rodata.entsize",
222                "section.rodata.flags",
223                // We emit dynamic relocations for direct references to undefined weak symbols that
224                // might be provided at runtime as well as GOT entries for indirect references. GNU
225                // ld and lld only emit the GOT entries and leave direct references as null. Our
226                // behaviour seems more consistent with the description of
227                // `-zdynamic-undefined-weak`.
228                "rel.undefined-weak.dynamic.R_X86_64_64",
229                "rel.undefined-weak.dynamic.R_AARCH64_ABS64",
230                // On aarch64, GNU ld, at least sometimes, converts R_AARCH64_ABS64 to a PLT-forming
231                // relocation. We at present, don't.
232                "rel.dynamic-plt-bypass",
233                // If we don't optimise a TLS access, then we'll have references to __tls_get_addr,
234                // when GNU ld doesn't.
235                "dynsym.__tls_get_addr.*",
236                // GNU ld emits two segments, whereas wild emits only a single segment.
237                "segment.LOAD.R.*",
238                // We haven't provided an implementation that is compatible with existing linkers.
239                "segment.PHDR.*",
240                "segment.GNU_RELRO.*",
241                "segment.GNU_STACK.*",
242                "segment.GNU_PROPERTY.*",
243                "segment.GNU_SFRAME.*",
244                // Wild currently generates PT_NOTE even for non-alloc note sections, while the
245                // other linkers don't.
246                "segment.NOTE.*",
247                // TODO: RISC-V
248                "segment.LOAD.RW.alignment",
249            ]
250            .into_iter()
251            .map(ToOwned::to_owned),
252        );
253
254        match arch {
255            ArchKind::Aarch64 => self.ignore.extend(
256                [
257                    // Other linkers have a bigger initial PLT entry, thus the entsize is set to zero:
258                    // https://sourceware.org/bugzilla/show_bug.cgi?id=26312
259                    "section.plt.entsize",
260                    // On Alpine Linux, aarch64, GNU ld seems to emit the _DYNAMIC symbol without a
261                    // section index instead of pointing it at the .dynamic section.
262                    "rel.extra-symbol._DYNAMIC",
263                    // Also on Alpine Linux, aarch64, it seems that GNU ld is emitting an
264                    // unnecessary GLOB_DAT relocation in a GOT entry.
265                    "rel.missing-got-dynamic.executable",
266                ]
267                .into_iter()
268                .map(ToOwned::to_owned),
269            ),
270            ArchKind::RISCV64 => self.ignore.extend(
271                [
272                    // TODO: for some reason, main is put into .dynsym
273                    "dynsym.main.section",
274                    // GOT entries may differ due to unimplemented relaxations
275                    "section.got.*",
276                    // Dynamic relocations may differ
277                    "rel.dynamic.*",
278                    "rel.undefined-weak.*",
279                    // Symbol address inconsistencies due to different optimizations
280                    "error.*",
281                    "section-diff-failed*",
282                    // .relro_padding is showing up on risc-v.
283                    "section.relro_padding",
284                ]
285                .into_iter()
286                .map(ToOwned::to_owned),
287            ),
288            ArchKind::X86_64 => {}
289        }
290
291        self.equiv.push((
292            GOT_SECTION_NAME_STR.to_owned(),
293            GOT_PLT_SECTION_NAME_STR.to_owned(),
294        ));
295        // We don't currently define .plt.got and .plt.sec, we just put everything into .plt.
296        self.equiv.push((
297            PLT_SECTION_NAME_STR.to_owned(),
298            PLT_GOT_SECTION_NAME_STR.to_owned(),
299        ));
300        self.equiv.push((
301            PLT_SECTION_NAME_STR.to_owned(),
302            PLT_SEC_SECTION_NAME_STR.to_owned(),
303        ));
304    }
305
306    #[must_use]
307    pub fn to_arg_string(&self) -> String {
308        let mut out = String::new();
309        if self.wild_defaults {
310            out.push_str("--wild-defaults ");
311        }
312        if !self.ignore.is_empty() {
313            out.push_str("--ignore '");
314            out.push_str(&self.ignore.join(","));
315            out.push_str("' ");
316        }
317        if !self.equiv.is_empty() {
318            out.push_str("--equiv '");
319            let parts = self
320                .equiv
321                .iter()
322                .map(|(k, v)| format!("{k}={v}"))
323                .collect_vec();
324            out.push_str(&parts.join(","));
325            out.push_str("' ");
326        }
327        if !self.display_names.is_empty() {
328            out.push_str("--display-names ");
329            out.push_str(&self.display_names.join(","));
330            out.push(' ');
331        }
332        for file in &self.references {
333            out.push_str("--ref ");
334            out.push_str(&file.to_string_lossy());
335            out.push(' ');
336        }
337        out.push_str(&self.file.to_string_lossy());
338        out
339    }
340
341    fn filenames(&self) -> impl Iterator<Item = &PathBuf> {
342        // We always put our file first, since it makes it easier to treat it differently. e.g. when
343        // we compare a value from our file against each of the values from the other files.
344        std::iter::once(&self.file).chain(&self.references)
345    }
346}
347
348impl<'data> Binary<'data> {
349    pub(crate) fn new(
350        elf_file: &'data ElfFile64<'data>,
351        name: String,
352        path: PathBuf,
353        layout_and_files: Option<&'data LayoutAndFiles>,
354    ) -> Result<Self> {
355        let address_index = AddressIndex::new(elf_file);
356        let indexed_layout = layout_and_files.map(IndexedLayout::new).transpose()?;
357        let trace = trace::Trace::for_path(&path)?;
358
359        let sections_by_name = elf_file
360            .sections()
361            .map(|section| {
362                Ok((
363                    section.name_bytes()?,
364                    SectionInfo {
365                        index: section.index(),
366                        size: section.size(),
367                    },
368                ))
369            })
370            .collect::<Result<HashMap<&[u8], SectionInfo>>>()?;
371
372        Ok(Self {
373            name,
374            elf_file,
375            path,
376            address_index,
377            name_index: NameIndex::new(elf_file),
378            indexed_layout,
379            trace,
380            sections_by_name,
381        })
382    }
383
384    /// Looks up a symbol, first trying to get a global, or failing that a local. If multiple
385    /// symbols have the same name, then `hint_address` is used to select which one to return.
386    pub(crate) fn symbol_by_name<'file: 'data>(
387        &'file self,
388        name: &[u8],
389        hint_address: u64,
390    ) -> NameLookupResult<'data, 'file> {
391        match self.lookup_symbol(&self.name_index.globals_by_name, name, hint_address) {
392            NameLookupResult::Undefined => {
393                self.lookup_symbol(&self.name_index.locals_by_name, name, hint_address)
394            }
395            other => other,
396        }
397    }
398
399    fn lookup_symbol<'file: 'data>(
400        &'file self,
401        symbol_map: &HashMap<&[u8], Vec<object::SymbolIndex>>,
402        name: &[u8],
403        hint_address: u64,
404    ) -> NameLookupResult<'data, 'file> {
405        let indexes = symbol_map.get(name).map(Vec::as_slice).unwrap_or_default();
406
407        if indexes.len() >= 2 {
408            for sym_index in indexes {
409                if let Ok(sym) = self.elf_file.symbol_by_index(*sym_index)
410                    && sym.address() == hint_address
411                {
412                    return NameLookupResult::Defined(sym);
413                }
414            }
415
416            // We didn't find a symbol with exactly the address hinted at.
417            return NameLookupResult::Duplicate;
418        }
419
420        if let Some(symbol_index) = indexes.first() {
421            if let Ok(sym) = self.elf_file.symbol_by_index(*symbol_index) {
422                NameLookupResult::Defined(sym)
423            } else {
424                NameLookupResult::Undefined
425            }
426        } else {
427            NameLookupResult::Undefined
428        }
429    }
430
431    fn section_by_name<'file: 'data>(
432        &'file self,
433        name: &str,
434    ) -> Option<ElfSection64<'data, 'file, LittleEndian>> {
435        self.section_by_name_bytes(name.as_bytes())
436    }
437
438    fn section_by_name_bytes<'file: 'data>(
439        &'file self,
440        name: &[u8],
441    ) -> Option<ElfSection64<'data, 'file, LittleEndian>> {
442        let index = self.sections_by_name.get(name)?.index;
443        self.elf_file.section_by_index(index).ok()
444    }
445
446    fn section_containing_address<'file: 'data>(
447        &'file self,
448        address: u64,
449    ) -> Option<ElfSection64<'file, 'data, LittleEndian>> {
450        self.elf_file
451            .sections()
452            .find(|sec| (sec.address()..sec.address() + sec.size()).contains(&address))
453    }
454
455    /// Returns the name of the section that contains the supplied address. Does a linear scan, so
456    /// should only be used for error reporting.
457    fn section_name_containing_address(&self, address: u64) -> Option<&str> {
458        self.section_containing_address(address)
459            .and_then(|sec| sec.name().ok())
460    }
461}
462
463#[derive(Debug)]
464enum NameLookupResult<'data, 'file> {
465    Undefined,
466    Duplicate,
467    Defined(ElfSymbol64<'data, 'file>),
468}
469
470fn validate_objects(
471    report: &mut Report,
472    objects: &[Binary],
473    validation_name: &str,
474    validation_fn: impl Fn(&Binary) -> Result,
475) {
476    let values = objects
477        .iter()
478        .map(|obj| match validation_fn(obj) {
479            Ok(_) => "OK".to_owned(),
480            Err(e) => e.to_string(),
481        })
482        .collect_vec();
483    if first_equals_any(values.iter()) {
484        return;
485    }
486    report.add_diff(Diff {
487        key: validation_name.to_owned(),
488        values: DiffValues::PerObject(values),
489    });
490}
491
492pub struct Report {
493    /// The names of each of our binaries. These should be short, not a full path, since we often
494    /// prefix lines with these names.
495    names: Vec<String>,
496
497    /// The full path of each of our binaries.
498    paths: Vec<PathBuf>,
499
500    /// The differences that were detected.
501    diffs: Vec<Diff>,
502
503    /// The configuration that was used.
504    config: Config,
505
506    pub coverage: Option<Coverage>,
507}
508
509#[derive(Default)]
510pub struct Coverage {
511    sections: HashMap<InputSectionId, SectionCoverage>,
512}
513
514struct SectionCoverage {
515    /// The original input file from which the section came.
516    original_file: OwnedFileIdentifier,
517
518    /// The name of the section.
519    name: String,
520
521    /// Whether we diffed this section at all.
522    diffed: bool,
523
524    /// The size of the section in bytes.
525    num_bytes: u64,
526}
527
528impl Report {
529    pub fn from_config(mut config: Config) -> Result<Report> {
530        // This changes mutable global state, which isn't an ideal thing to be doing from a library.
531        // It's expedient though, and we don't really expect linker-diff to get used as a library
532        // anywhere except the linker-diff binary and wild's integration tests, so this probably
533        // isn't a big deal.
534        match config.colour {
535            Colour::Auto => colored::control::unset_override(),
536            Colour::Never => colored::control::set_override(false),
537            Colour::Always => colored::control::set_override(true),
538        }
539
540        let display_names = short_file_display_names(&config)?;
541
542        let file_bytes = config
543            .filenames()
544            .map(|filename| -> Result<Vec<u8>> {
545                let bytes = std::fs::read(filename)
546                    .with_context(|| format!("Failed to read `{}`", filename.display()))?;
547                Ok(bytes)
548            })
549            .collect::<Result<Vec<Vec<u8>>>>()?;
550
551        let elf_files = file_bytes
552            .iter()
553            .map(|bytes| -> Result<ElfFile64> { Ok(ElfFile64::parse(bytes.as_slice())?) })
554            .collect::<Result<Vec<_>>>()?;
555
556        let layouts = config
557            .filenames()
558            .map(|p| LayoutAndFiles::from_base_path(p))
559            .collect::<Result<Vec<_>>>()?;
560
561        let objects = elf_files
562            .iter()
563            .zip(display_names)
564            .zip(config.filenames())
565            .zip(&layouts)
566            .map(|(((elf_file, name), path), layout)| -> Result<Binary> {
567                Binary::new(elf_file, name, path.clone(), layout.as_ref())
568            })
569            .collect::<Result<Vec<_>>>()?;
570
571        if objects.len() < 2 {
572            bail!("At least two files must be provided for comparison");
573        }
574
575        let arch = ArchKind::from_objects(&objects)?;
576
577        if config.wild_defaults {
578            config.apply_wild_defaults(arch);
579        }
580
581        let mut report = Report {
582            names: objects.iter().map(|o| o.name.clone()).collect(),
583            paths: objects.iter().map(|o| o.path.clone()).collect(),
584            diffs: Default::default(),
585            coverage: config.coverage.then(Coverage::default),
586            config,
587        };
588
589        report.run_on_objects(&objects, arch);
590
591        Ok(report)
592    }
593
594    fn run_on_objects(&mut self, objects: &[Binary], arch: ArchKind) {
595        validate_objects(
596            self,
597            objects,
598            GNU_HASH_SECTION_NAME_STR,
599            gnu_hash::check_object,
600        );
601        validate_objects(self, objects, "index", asm_diff::validate_indexes);
602        validate_objects(
603            self,
604            objects,
605            GOT_PLT_SECTION_NAME_STR,
606            asm_diff::validate_got_plt,
607        );
608        validate_objects(
609            self,
610            objects,
611            SYMTAB_SECTION_NAME_STR,
612            symtab::validate_debug,
613        );
614        validate_objects(
615            self,
616            objects,
617            DYNSYM_SECTION_NAME_STR,
618            symtab::validate_dynamic,
619        );
620        header_diff::check_dynamic_headers(self, objects);
621        header_diff::check_file_headers(self, objects);
622        header_diff::report_section_diffs(self, objects);
623        eh_frame_diff::report_diffs(self, objects);
624        version_diff::report_diffs(self, objects);
625        debug_info_diff::check_debug_info(self, objects);
626        symbol_diff::report_diffs(self, objects);
627        segment::report_diffs(self, objects);
628
629        match arch {
630            ArchKind::X86_64 => {
631                self.report_arch_specific_diffs::<crate::x86_64::X86_64>(objects);
632            }
633            ArchKind::Aarch64 => {
634                self.report_arch_specific_diffs::<crate::aarch64::AArch64>(objects);
635            }
636
637            ArchKind::RISCV64 => {
638                self.report_arch_specific_diffs::<crate::riscv64::RiscV64>(objects);
639            }
640        }
641    }
642
643    fn report_arch_specific_diffs<A: Arch>(&mut self, binaries: &[Binary]) {
644        asm_diff::report_section_diffs::<A>(self, binaries);
645        init_order::report_diffs::<A>(self, binaries);
646    }
647
648    fn add_diff(&mut self, diff: Diff) {
649        if self.should_ignore(&diff.key) {
650            return;
651        }
652        self.diffs.push(diff);
653    }
654
655    fn add_diffs(&mut self, new_diffs: Vec<Diff>) {
656        for diff in new_diffs {
657            self.add_diff(diff);
658        }
659    }
660
661    #[must_use]
662    pub fn has_problems(&self) -> bool {
663        !self.diffs.is_empty()
664    }
665
666    #[must_use]
667    pub fn should_ignore(&self, key: &str) -> bool {
668        if !self.config.only.is_empty() {
669            return !self.config.only.iter().any(|i| {
670                if let Some(prefix) = i.strip_suffix('*') {
671                    key.starts_with(prefix)
672                } else {
673                    key == *i
674                }
675            });
676        }
677        self.config.ignore.iter().any(|i| {
678            if let Some(prefix) = i.strip_suffix('*') {
679                key.starts_with(prefix)
680            } else {
681                key == *i
682            }
683        })
684    }
685
686    fn add_error(&mut self, error: impl Into<String>) {
687        self.diffs.push(Diff {
688            key: "error".to_owned(),
689            values: DiffValues::PreFormatted(error.into()),
690        });
691    }
692}
693
694struct Diff {
695    key: String,
696    values: DiffValues,
697}
698
699enum DiffValues {
700    PerObject(Vec<String>),
701    PreFormatted(String),
702}
703
704impl Display for Report {
705    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
706        for (name, path) in self.names.iter().zip(&self.paths) {
707            writeln!(f, "{name}: {}", path.display())?;
708        }
709
710        for diff in &self.diffs {
711            writeln!(f, "{}", diff.key)?;
712
713            match &diff.values {
714                DiffValues::PerObject(values) => {
715                    for (filename, result) in self.names.iter().zip(values) {
716                        writeln!(f, "  {filename} {result}")?;
717                    }
718                }
719                DiffValues::PreFormatted(values) => {
720                    for line in values.lines() {
721                        writeln!(f, "  {line}")?;
722                    }
723                }
724            }
725
726            writeln!(f)?;
727        }
728
729        Ok(())
730    }
731}
732
733impl Display for Binary<'_> {
734    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
735        self.name.fmt(f)
736    }
737}
738
739impl Display for Coverage {
740    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
741        writeln!(f, "Diffed sections:")?;
742
743        let mut total_bytes = 0;
744        let mut total_diffed = 0;
745
746        for sec in self.sections.values() {
747            writeln!(
748                f,
749                "  {} {}: {}",
750                sec.original_file,
751                sec.name,
752                if sec.diffed {
753                    "true".green()
754                } else {
755                    "false".red()
756                }
757            )?;
758
759            if sec.diffed {
760                total_diffed += sec.num_bytes;
761            }
762
763            total_bytes += sec.num_bytes;
764        }
765
766        writeln!(
767            f,
768            "Diffed {total_diffed} of {total_bytes} section bytes ({}%)",
769            total_diffed * 100 / total_bytes
770        )?;
771
772        Ok(())
773    }
774}
775
776fn short_file_display_names(config: &Config) -> Result<Vec<String>> {
777    let paths: Vec<&PathBuf> = config.filenames().collect();
778    if !config.display_names.is_empty() {
779        if config.display_names.len() != paths.len() {
780            bail!(
781                "--display-names has {} names, but {} filenames were provided",
782                config.display_names.len(),
783                paths.len()
784            );
785        }
786        return Ok(config.display_names.clone());
787    }
788    if paths.is_empty() {
789        return Ok(vec![]);
790    }
791    let mut names = paths
792        .iter()
793        .map(|p| p.to_string_lossy().into_owned())
794        .collect_vec();
795    if names.iter().all(|name| {
796        Path::new(name)
797            .extension()
798            .is_some_and(|ext| ext.eq_ignore_ascii_case("so"))
799    }) {
800        names = names
801            .into_iter()
802            .map(|n| n.strip_suffix(".so").unwrap().to_owned())
803            .collect();
804    }
805
806    if names.len() > 1 {
807        // This is not quite right, since we might split in the middle of a multibyte character.
808        // But this is a dev tool, so we'll punt on that for now.
809        let mut iterators = names.iter().map(|n| n.bytes()).collect_vec();
810        let mut n = 0;
811        while first_equals_all(iterators.iter_mut().map(Iterator::next)) {
812            n += 1;
813        }
814        names = names
815            .iter()
816            .map(|name| String::from_utf8_lossy(&name.bytes().skip(n).collect_vec()).into_owned())
817            .collect_vec();
818    }
819    Ok(names)
820}
821
822fn first_equals_all<T: PartialEq>(mut inputs: impl Iterator<Item = T>) -> bool {
823    let Some(first) = inputs.next() else {
824        return true;
825    };
826    for next in inputs {
827        if next != first {
828            return false;
829        }
830    }
831    true
832}
833
834/// Returns whether the first input is equal to at least one of the remaining values.
835fn first_equals_any<T: PartialEq>(mut inputs: impl Iterator<Item = T>) -> bool {
836    let Some(first) = inputs.next() else {
837        return true;
838    };
839    for next in inputs {
840        if next == first {
841            return true;
842        }
843    }
844    false
845}
846
847impl<'data> NameIndex<'data> {
848    fn new(elf_file: &ElfFile64<'data>) -> NameIndex<'data> {
849        let mut globals_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
850        let mut locals_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
851        let mut dynamic_by_name: HashMap<&[u8], Vec<object::SymbolIndex>> = HashMap::new();
852
853        for sym in elf_file.symbols() {
854            // We only index symbols that have a section. Note this is different than the object
855            // crate's `is_defined`, which imposes additional requirements that we don't want.
856            if sym.section_index().is_none() {
857                continue;
858            }
859
860            if let Ok(mut name) = sym.name_bytes() {
861                // Wild doesn't emit local symbols that start with ".L". The other linkers mostly do
862                // the same. However, GNU ld and lld, if they encounter a GOT-forming relocation to
863                // such a symbol, even if they then optimise away the GOT-forming relocation, will
864                // emit the symbol. This behaviour seems weird and not worth replicating, so we just
865                // ignore all just symbols.
866                if name.starts_with(b".L") {
867                    continue;
868                }
869
870                // GNU ld sometimes emits symbols that contain the symbol version. This causes
871                // problems when we go to look those symbols up, since they no longer match the name
872                // of the symbol in the original input file. So for now at least, we get rid of the
873                // version.
874                if let Some(at_pos) = name.iter().position(|b| *b == b'@') {
875                    name = &name[..at_pos];
876                }
877
878                if sym.is_global() {
879                    globals_by_name.entry(name).or_default().push(sym.index());
880                } else {
881                    locals_by_name.entry(name).or_default().push(sym.index());
882                }
883            }
884        }
885
886        for sym in elf_file.dynamic_symbols() {
887            if let Ok(name) = sym.name_bytes() {
888                dynamic_by_name.entry(name).or_default().push(sym.index());
889            }
890        }
891
892        NameIndex {
893            globals_by_name,
894            locals_by_name,
895            dynamic_by_name,
896        }
897    }
898}
899
900fn slice_from_all_bytes<T: object::Pod>(data: &[u8]) -> &[T] {
901    object::slice_from_bytes(data, data.len() / size_of::<T>())
902        .unwrap()
903        .0
904}
905
906fn parse_string_equality(
907    s: &str,
908) -> Result<(String, String), Box<dyn std::error::Error + Send + Sync + 'static>> {
909    let (a, b) = s
910        .split_once('=')
911        .ok_or_else(|| format!("invalid key-value pair. No '=' found in `{s}`"))?;
912    Ok((a.to_owned(), b.to_owned()))
913}
914
915fn get_r_type<R: arch::RType>(rel: &object::Relocation) -> R {
916    let object::RelocationFlags::Elf { r_type } = rel.flags() else {
917        panic!("Unsupported object type (relocation flags)");
918    };
919    R::from_raw(r_type)
920}