Skip to main content

padlock_dwarf/
reader.rs

1use std::rc::Rc;
2
3use gimli::read::EndianRcSlice;
4use object::{Object, ObjectSection, ObjectSymbol, RelocationTarget};
5use padlock_core::arch::{AARCH64, AARCH64_APPLE, ArchConfig, RISCV64, WASM32, X86_64_SYSV};
6
7pub type DwarfRc = gimli::Dwarf<EndianRcSlice<gimli::RunTimeEndian>>;
8
9/// Parse DWARF debug info from raw binary data.
10///
11/// For unlinked object files, ELF relocations in DWARF sections are applied
12/// so that string offsets and cross-section references resolve correctly.
13pub fn load(binary_data: &[u8]) -> anyhow::Result<DwarfRc> {
14    let file = object::File::parse(binary_data)?;
15    let endian = if file.is_little_endian() {
16        gimli::RunTimeEndian::Little
17    } else {
18        gimli::RunTimeEndian::Big
19    };
20
21    let load_section =
22        |id: gimli::SectionId| -> anyhow::Result<EndianRcSlice<gimli::RunTimeEndian>> {
23            let data: Vec<u8> = match file.section_by_name(id.name()) {
24                Some(s) => load_section_with_relocations(&file, endian, s)?,
25                None => Vec::new(),
26            };
27            Ok(EndianRcSlice::new(Rc::from(data.as_slice()), endian))
28        };
29
30    gimli::Dwarf::load(load_section)
31}
32
33/// Load a section's bytes and apply any ELF RELA relocations targeting it.
34///
35/// In unlinked `.o` files, DWARF cross-section references (e.g. `DW_FORM_strp`
36/// entries in `.debug_info` pointing into `.debug_str`) are stored as zero with
37/// an associated relocation whose addend is the real offset.  Applying the
38/// relocations here gives gimli the fully-resolved bytes it expects.
39fn load_section_with_relocations(
40    file: &object::File<'_>,
41    endian: gimli::RunTimeEndian,
42    section: object::Section<'_, '_>,
43) -> anyhow::Result<Vec<u8>> {
44    let mut data = section.uncompressed_data()?.into_owned();
45    let is_little = matches!(endian, gimli::RunTimeEndian::Little);
46
47    for (offset, reloc) in section.relocations() {
48        // Compute the target value: section/symbol base address + addend.
49        // In relocatable `.o` files, section addresses are 0, so the addend
50        // alone gives the correct intra-section offset.
51        let target: u64 = match reloc.target() {
52            RelocationTarget::Section(idx) => {
53                let sec = file.section_by_index(idx)?;
54                (sec.address() as i64 + reloc.addend()) as u64
55            }
56            RelocationTarget::Symbol(sym_idx) => {
57                let sym = file.symbol_by_index(sym_idx)?;
58                (sym.address() as i64 + reloc.addend()) as u64
59            }
60            _ => continue,
61        };
62
63        let off = offset as usize;
64        match reloc.size() {
65            32 => {
66                let bytes = if is_little {
67                    (target as u32).to_le_bytes()
68                } else {
69                    (target as u32).to_be_bytes()
70                };
71                if off + 4 <= data.len() {
72                    data[off..off + 4].copy_from_slice(&bytes);
73                }
74            }
75            64 => {
76                let bytes = if is_little {
77                    target.to_le_bytes()
78                } else {
79                    target.to_be_bytes()
80                };
81                if off + 8 <= data.len() {
82                    data[off..off + 8].copy_from_slice(&bytes);
83                }
84            }
85            _ => {}
86        }
87    }
88
89    Ok(data)
90}
91
92/// Detect the target architecture from a binary.
93pub fn detect_arch(binary_data: &[u8]) -> anyhow::Result<&'static ArchConfig> {
94    let file = object::File::parse(binary_data)?;
95    match file.architecture() {
96        object::Architecture::X86_64 => Ok(&X86_64_SYSV),
97        object::Architecture::Aarch64 => {
98            if is_apple_binary(&file) {
99                Ok(&AARCH64_APPLE)
100            } else {
101                Ok(&AARCH64)
102            }
103        }
104        object::Architecture::Wasm32 => Ok(&WASM32),
105        object::Architecture::Riscv64 => Ok(&RISCV64),
106        other => Err(anyhow::anyhow!("unsupported architecture: {:?}", other)),
107    }
108}
109
110fn is_apple_binary(file: &object::File<'_>) -> bool {
111    matches!(file.format(), object::BinaryFormat::MachO)
112}
113
114/// Return the architecture of the machine running padlock.
115/// Used when analysing source files (no binary available to inspect).
116pub fn detect_arch_from_host() -> &'static ArchConfig {
117    #[cfg(target_arch = "x86_64")]
118    {
119        &X86_64_SYSV
120    }
121    #[cfg(target_arch = "aarch64")]
122    {
123        &AARCH64
124    }
125    #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
126    {
127        &X86_64_SYSV
128    }
129}
130
131// ── tests ─────────────────────────────────────────────────────────────────────
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    // ── synthetic binary header helpers ──────────────────────────────────────
138
139    /// Build a minimal 64-bit little-endian ELF header for the given machine
140    /// code. The header is valid enough for `object::File::parse` to accept it
141    /// (magic, class, data, version, e_ehsize, zero section count).
142    fn minimal_elf64(machine: u16) -> Vec<u8> {
143        let mut h = vec![0u8; 64];
144        // ELF identification
145        h[0..4].copy_from_slice(b"\x7fELF");
146        h[4] = 2; // ELFCLASS64
147        h[5] = 1; // ELFDATA2LSB (little-endian)
148        h[6] = 1; // EV_CURRENT
149        h[7] = 0; // ELFOSABI_NONE
150        // e_type = ET_REL (1)
151        h[16] = 1;
152        h[17] = 0;
153        // e_machine
154        h[18] = (machine & 0xff) as u8;
155        h[19] = (machine >> 8) as u8;
156        // e_version = 1
157        h[20] = 1;
158        // e_ehsize = 64
159        h[52] = 64;
160        // e_shentsize = 64 (even with 0 sections object crate expects this)
161        h[58] = 64;
162        h
163    }
164
165    /// Build a minimal 64-bit little-endian Mach-O header for AArch64.
166    /// 32 bytes total (MH_MAGIC_64 header, zero load commands).
167    fn minimal_macho_arm64() -> Vec<u8> {
168        let mut h = vec![0u8; 32];
169        // MH_MAGIC_64 = 0xFEEDFACF → little-endian bytes
170        h[0..4].copy_from_slice(&[0xcf, 0xfa, 0xed, 0xfe]);
171        // CPU_TYPE_ARM64 = 0x0100000C
172        h[4..8].copy_from_slice(&0x0100_000Cu32.to_le_bytes());
173        // cpusubtype = CPU_SUBTYPE_ARM64_ALL = 0
174        // filetype = MH_OBJECT = 1
175        h[12..16].copy_from_slice(&1u32.to_le_bytes());
176        // ncmds = 0, sizeofcmds = 0, flags = 0, reserved = 0
177        h
178    }
179
180    // ELF machine codes
181    const EM_X86_64: u16 = 0x3e;
182    const EM_AARCH64: u16 = 0xb7;
183    const EM_RISCV: u16 = 0xf3;
184
185    // ── detect_arch tests ─────────────────────────────────────────────────────
186
187    #[test]
188    fn detect_arch_x86_64_elf() {
189        let elf = minimal_elf64(EM_X86_64);
190        let arch = detect_arch(&elf).unwrap();
191        assert_eq!(arch.name, "x86_64");
192        assert_eq!(arch.pointer_size, 8);
193        assert_eq!(arch.cache_line_size, 64);
194    }
195
196    #[test]
197    fn detect_arch_aarch64_elf() {
198        let elf = minimal_elf64(EM_AARCH64);
199        let arch = detect_arch(&elf).unwrap();
200        assert_eq!(arch.name, "aarch64");
201        assert_eq!(arch.pointer_size, 8);
202        assert_eq!(arch.cache_line_size, 64);
203    }
204
205    #[test]
206    fn detect_arch_aarch64_macho_returns_apple_variant() {
207        let macho = minimal_macho_arm64();
208        let arch = detect_arch(&macho).unwrap();
209        assert_eq!(arch.name, "aarch64-apple");
210        assert_eq!(arch.cache_line_size, 128); // Apple Silicon: 128-byte cache lines
211    }
212
213    #[test]
214    fn detect_arch_riscv64_elf() {
215        let elf = minimal_elf64(EM_RISCV);
216        let arch = detect_arch(&elf).unwrap();
217        assert_eq!(arch.name, "riscv64");
218    }
219
220    #[test]
221    fn detect_arch_rejects_garbage() {
222        let garbage = vec![0u8; 64];
223        assert!(detect_arch(&garbage).is_err());
224    }
225
226    #[test]
227    fn detect_arch_from_host_returns_valid_config() {
228        let arch = detect_arch_from_host();
229        // Must be one of the known configs
230        assert!(matches!(arch.name, "x86_64" | "aarch64" | "aarch64-apple"));
231        assert!(arch.pointer_size == 4 || arch.pointer_size == 8);
232        assert!(arch.cache_line_size > 0);
233    }
234}