Skip to main content

ud_cli/
lib.rs

1//! Library half of the `ud` CLI.
2//!
3//! The CLI binary is a thin wrapper over the functions exposed here; the
4//! split lets integration tests call the same code path as the binary
5//! without spawning a subprocess.
6
7use std::path::Path;
8
9use ud_core::{assert_bytes_equal, Error, Result};
10use ud_translate::compile::AsmWarning;
11
12/// Run the round-trip pipeline on `input`, write the result to `output`,
13/// and verify byte-equality with the input.
14///
15/// The pipeline routes by detected format:
16///
17/// * **ELF64-LE** is parsed via [`ud_format::elf::Elf64File`] and re-emitted.
18///   This actually exercises the format reader/writer, so any drift in
19///   either path is caught here.
20/// * **Anything else** (32-bit ELF, PE, Mach-O, raw bytes) falls through
21///   to a byte-copy. The round-trip contract still holds — it's just the
22///   trivial identity until we grow support.
23///
24/// The shape of this function will not change as later phases replace
25/// `pipeline_bytes` with real decompile-then-compile logic. The contract
26/// is "input bytes equal output bytes or you get an error", forever.
27pub fn roundtrip(input: &Path, output: &Path) -> Result<()> {
28    let bytes = std::fs::read(input).map_err(|source| Error::Io {
29        path: input.to_path_buf(),
30        source,
31    })?;
32
33    let rebuilt = pipeline_bytes(&bytes);
34
35    std::fs::write(output, &rebuilt).map_err(|source| Error::Io {
36        path: output.to_path_buf(),
37        source,
38    })?;
39
40    let written_back = std::fs::read(output).map_err(|source| Error::Io {
41        path: output.to_path_buf(),
42        source,
43    })?;
44
45    assert_bytes_equal(&bytes, &written_back)
46}
47
48/// Apply the round-trip pipeline to in-memory bytes and return the result.
49///
50/// Split out so it's directly testable without filesystem I/O.
51fn pipeline_bytes(bytes: &[u8]) -> Vec<u8> {
52    if ud_format::elf::is_elf64_le(bytes) {
53        if let Ok(elf) = ud_format::elf::Elf64File::parse(bytes) {
54            return elf.write_to_vec();
55        }
56        // ELF that we still can't parse (e.g. malformed header sizes).
57        // Fall through to byte-copy so the round-trip contract holds.
58    }
59    if ud_format::pe::is_pe(bytes) {
60        if let Ok(pe) = ud_format::pe::PeFile::parse(bytes) {
61            return pe.write_to_vec();
62        }
63        // PE-shaped but invalid; fall through to byte-copy.
64    }
65    if ud_format::macho::is_macho64(bytes) {
66        if let Ok(macho) = ud_format::macho::MachoFile::parse(bytes) {
67            return macho.write_to_vec();
68        }
69        // Mach-O-shaped but rejected by v1 (32-bit, unsupported
70        // cputype, fat wrapper); fall through to byte-copy.
71    }
72    bytes.to_vec()
73}
74
75/// Result of [`roundtrip_through_source`].
76#[derive(Debug, Clone)]
77pub struct SourceRoundTripReport {
78    /// Whether the rebuilt bytes equal the input bytes.
79    pub byte_identical: bool,
80    /// Length of the input in bytes.
81    pub input_len: usize,
82    /// Length of the rebuilt bytes.
83    pub output_len: usize,
84    /// Offset of the first byte that differs, when the round-trip
85    /// failed; `None` when the result is byte-identical.
86    pub first_diff_offset: Option<usize>,
87    /// 16-byte excerpts of input and output bytes around the first
88    /// divergence. Populated only on mismatch.
89    pub diff_context: Option<DiffContext>,
90    /// `verify_asm` findings produced during the round-trip. Empty
91    /// for a clean decompile output; populated when the `.ud`
92    /// in-flight had `@asm` lines whose text disagreed with their
93    /// pinned bytes.
94    pub warnings: Vec<AsmWarning>,
95}
96
97/// 16-byte windows of input vs rebuilt bytes around the first
98/// divergence, with the divergence offset highlighted.
99#[derive(Debug, Clone)]
100pub struct DiffContext {
101    pub window_start: usize,
102    pub input_window: Vec<u8>,
103    pub output_window: Vec<u8>,
104}
105
106#[derive(Debug, thiserror::Error)]
107pub enum SourceRoundTripError {
108    #[error("input is not a recognised binary format")]
109    UnknownFormat,
110    #[error(transparent)]
111    Io(std::io::Error),
112    #[error(transparent)]
113    Decompile(#[from] ud_translate::decompile::Error),
114    #[error(transparent)]
115    Decompile6502(#[from] ud_translate::decompile::raw6502::Error),
116    #[error(transparent)]
117    ElfFormat(#[from] ud_format::elf::Error),
118    #[error(transparent)]
119    PeFormat(#[from] ud_format::pe::Error),
120    #[error(transparent)]
121    MachoFormat(#[from] ud_format::macho::Error),
122    #[error("parse of decompile output failed: {0}")]
123    Parse(String),
124    #[error(transparent)]
125    ElfLower(#[from] ud_translate::compile::ElfLowerError),
126    #[error(transparent)]
127    PeLower(#[from] ud_translate::compile::PeLowerError),
128    #[error(transparent)]
129    MachoLower(#[from] ud_translate::compile::MachoLowerError),
130    #[error(transparent)]
131    RawLower(#[from] ud_translate::compile::RawLowerError),
132}
133
134/// Run `input` through the full source pipeline:
135/// decompile → text → parse → verify_asm → lower_to_elf → write.
136///
137/// Always emits the rebuilt binary. Verification warnings are collected
138/// in the report and don't fail the call. Byte differences between input
139/// and rebuilt also don't fail the call — they appear in the report so
140/// the caller can decide what to do (warn, abort, persist anyway).
141pub fn roundtrip_through_source(
142    input: &Path,
143    output: &Path,
144) -> std::result::Result<SourceRoundTripReport, SourceRoundTripError> {
145    let input_bytes = std::fs::read(input).map_err(SourceRoundTripError::Io)?;
146
147    let (text, warnings, rebuilt) = if ud_format::elf::is_elf64_le(&input_bytes) {
148        let elf = ud_format::elf::Elf64File::parse(&input_bytes)?;
149        let ast = ud_translate::decompile::decompile(&elf)?;
150        let text = ud_ast::emit(&ast);
151        let parsed = ud_translate::compile::parse(&text)
152            .map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
153        let warnings = ud_translate::compile::verify_asm(&parsed);
154        let rebuilt = ud_translate::compile::lower_to_elf(&parsed)?;
155        (text, warnings, rebuilt)
156    } else if ud_format::pe::is_pe(&input_bytes) {
157        let pe = ud_format::pe::PeFile::parse(&input_bytes)?;
158        let ast = ud_translate::decompile::decompile_pe(&pe);
159        let text = ud_ast::emit(&ast);
160        let parsed = ud_translate::compile::parse(&text)
161            .map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
162        let warnings = ud_translate::compile::verify_asm(&parsed);
163        let rebuilt = ud_translate::compile::lower_to_pe(&parsed)?;
164        (text, warnings, rebuilt)
165    } else if ud_format::macho::is_macho64(&input_bytes) {
166        let macho = ud_format::macho::MachoFile::parse(&input_bytes)?;
167        let ast = ud_translate::decompile::decompile_macho(&macho);
168        let text = ud_ast::emit(&ast);
169        let parsed = ud_translate::compile::parse(&text)
170            .map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
171        let warnings = ud_translate::compile::verify_asm(&parsed);
172        let rebuilt = ud_translate::compile::lower_to_macho(&parsed)?;
173        (text, warnings, rebuilt)
174    } else if let Some(load_addr) = raw_6502_load_addr(&input_bytes) {
175        let image = ud_format::raw::RawImage::new(input_bytes.clone(), load_addr);
176        let ast = ud_translate::decompile::decompile_raw_6502(&image)?;
177        let text = ud_ast::emit(&ast);
178        let parsed = ud_translate::compile::parse(&text)
179            .map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
180        let warnings = ud_translate::compile::verify_asm(&parsed);
181        let rebuilt = ud_translate::compile::lower_to_raw(&parsed)?;
182        (text, warnings, rebuilt)
183    } else {
184        return Err(SourceRoundTripError::UnknownFormat);
185    };
186    let _ = text; // kept for future debug surfacing
187
188    std::fs::write(output, &rebuilt).map_err(SourceRoundTripError::Io)?;
189
190    let first_diff_offset = first_byte_diff(&input_bytes, &rebuilt);
191    let diff_context = first_diff_offset.map(|off| make_diff_context(off, &input_bytes, &rebuilt));
192    Ok(SourceRoundTripReport {
193        byte_identical: first_diff_offset.is_none() && input_bytes.len() == rebuilt.len(),
194        input_len: input_bytes.len(),
195        output_len: rebuilt.len(),
196        first_diff_offset,
197        diff_context,
198        warnings,
199    })
200}
201
202fn make_diff_context(off: usize, input: &[u8], output: &[u8]) -> DiffContext {
203    let window_start = off.saturating_sub(8);
204    let window_end_in = (off + 8).min(input.len());
205    let window_end_out = (off + 8).min(output.len());
206    DiffContext {
207        window_start,
208        input_window: input[window_start..window_end_in].to_vec(),
209        output_window: output[window_start..window_end_out].to_vec(),
210    }
211}
212
213/// Detect "this is a 6502 raw ROM image" inputs. The convention is
214/// that 6502 binaries place vectors (NMI/RESET/IRQ) at the top of
215/// the 16-bit address space, $FFFA-$FFFF. For an image of length L,
216/// the natural load address is `0x10000 - L` so the image extends
217/// exactly to $FFFF.
218///
219/// v0 heuristic: accept files in `[6, 65536]` bytes whose reset
220/// vector at $FFFC under that load address points back into the
221/// image. WozMon (256 bytes, load $FF00, reset $FF00) matches.
222#[must_use]
223pub fn raw_6502_load_addr(bytes: &[u8]) -> Option<u64> {
224    let len = bytes.len();
225    if !(6..=0x10000).contains(&len) {
226        return None;
227    }
228    let load_addr = 0x10000u64 - len as u64;
229    let end = 0x10000u64;
230    let reset_lo_off = usize::try_from(0xFFFCu64 - load_addr).ok()?;
231    let reset_hi_off = reset_lo_off + 1;
232    if reset_hi_off >= len {
233        return None;
234    }
235    let reset = u64::from(u16::from_le_bytes([
236        bytes[reset_lo_off],
237        bytes[reset_hi_off],
238    ]));
239    if reset >= load_addr && reset < end {
240        Some(load_addr)
241    } else {
242        None
243    }
244}
245
246fn first_byte_diff(a: &[u8], b: &[u8]) -> Option<usize> {
247    a.iter()
248        .zip(b)
249        .position(|(x, y)| x != y)
250        .or_else(|| (a.len() != b.len()).then_some(a.len().min(b.len())))
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn pipeline_passes_through_non_elf_bytes() {
259        let bytes = b"\x00\x01\x02\x03not an elf";
260        assert_eq!(pipeline_bytes(bytes), bytes);
261    }
262
263    #[test]
264    fn pipeline_passes_through_elf32() {
265        // Magic + ELFCLASS32 + ELFDATA2LSB → not ELF64, must byte-copy.
266        let mut bytes = vec![0u8; 64];
267        bytes[..4].copy_from_slice(b"\x7fELF");
268        bytes[4] = 1; // ELFCLASS32
269        bytes[5] = 1; // ELFDATA2LSB
270        let out = pipeline_bytes(&bytes);
271        assert_eq!(out, bytes);
272    }
273
274    #[test]
275    fn roundtrip_on_a_temp_file_succeeds() {
276        let dir = std::env::temp_dir();
277        let input = dir.join("ud-cli-rt-in");
278        let output = dir.join("ud-cli-rt-out");
279        std::fs::write(&input, b"\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00").unwrap();
280        roundtrip(&input, &output).expect("identity round-trip should succeed");
281        let _ = std::fs::remove_file(&input);
282        let _ = std::fs::remove_file(&output);
283    }
284}