1use std::path::Path;
8
9use ud_core::{assert_bytes_equal, Error, Result};
10use ud_translate::compile::AsmWarning;
11
12pub fn roundtrip(input: &Path, output: &Path) -> Result<()> {
28 let bytes = std::fs::read(input).map_err(|source| Error::Io {
29 path: input.to_path_buf(),
30 source,
31 })?;
32
33 let rebuilt = pipeline_bytes(&bytes);
34
35 std::fs::write(output, &rebuilt).map_err(|source| Error::Io {
36 path: output.to_path_buf(),
37 source,
38 })?;
39
40 let written_back = std::fs::read(output).map_err(|source| Error::Io {
41 path: output.to_path_buf(),
42 source,
43 })?;
44
45 assert_bytes_equal(&bytes, &written_back)
46}
47
48fn pipeline_bytes(bytes: &[u8]) -> Vec<u8> {
52 if ud_format::elf::is_elf64_le(bytes) {
53 if let Ok(elf) = ud_format::elf::Elf64File::parse(bytes) {
54 return elf.write_to_vec();
55 }
56 }
59 if ud_format::pe::is_pe(bytes) {
60 if let Ok(pe) = ud_format::pe::PeFile::parse(bytes) {
61 return pe.write_to_vec();
62 }
63 }
65 if ud_format::macho::is_macho64(bytes) {
66 if let Ok(macho) = ud_format::macho::MachoFile::parse(bytes) {
67 return macho.write_to_vec();
68 }
69 }
72 bytes.to_vec()
73}
74
75#[derive(Debug, Clone)]
77pub struct SourceRoundTripReport {
78 pub byte_identical: bool,
80 pub input_len: usize,
82 pub output_len: usize,
84 pub first_diff_offset: Option<usize>,
87 pub diff_context: Option<DiffContext>,
90 pub warnings: Vec<AsmWarning>,
95}
96
97#[derive(Debug, Clone)]
100pub struct DiffContext {
101 pub window_start: usize,
102 pub input_window: Vec<u8>,
103 pub output_window: Vec<u8>,
104}
105
106#[derive(Debug, thiserror::Error)]
107pub enum SourceRoundTripError {
108 #[error("input is not a recognised binary format")]
109 UnknownFormat,
110 #[error(transparent)]
111 Io(std::io::Error),
112 #[error(transparent)]
113 Decompile(#[from] ud_translate::decompile::Error),
114 #[error(transparent)]
115 Decompile6502(#[from] ud_translate::decompile::raw6502::Error),
116 #[error(transparent)]
117 ElfFormat(#[from] ud_format::elf::Error),
118 #[error(transparent)]
119 PeFormat(#[from] ud_format::pe::Error),
120 #[error(transparent)]
121 MachoFormat(#[from] ud_format::macho::Error),
122 #[error("parse of decompile output failed: {0}")]
123 Parse(String),
124 #[error(transparent)]
125 ElfLower(#[from] ud_translate::compile::ElfLowerError),
126 #[error(transparent)]
127 PeLower(#[from] ud_translate::compile::PeLowerError),
128 #[error(transparent)]
129 MachoLower(#[from] ud_translate::compile::MachoLowerError),
130 #[error(transparent)]
131 RawLower(#[from] ud_translate::compile::RawLowerError),
132}
133
134pub fn roundtrip_through_source(
142 input: &Path,
143 output: &Path,
144) -> std::result::Result<SourceRoundTripReport, SourceRoundTripError> {
145 let input_bytes = std::fs::read(input).map_err(SourceRoundTripError::Io)?;
146
147 let (text, warnings, rebuilt) = if ud_format::elf::is_elf64_le(&input_bytes) {
148 let elf = ud_format::elf::Elf64File::parse(&input_bytes)?;
149 let ast = ud_translate::decompile::decompile(&elf)?;
150 let text = ud_ast::emit(&ast);
151 let parsed = ud_translate::compile::parse(&text)
152 .map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
153 let warnings = ud_translate::compile::verify_asm(&parsed);
154 let rebuilt = ud_translate::compile::lower_to_elf(&parsed)?;
155 (text, warnings, rebuilt)
156 } else if ud_format::pe::is_pe(&input_bytes) {
157 let pe = ud_format::pe::PeFile::parse(&input_bytes)?;
158 let ast = ud_translate::decompile::decompile_pe(&pe);
159 let text = ud_ast::emit(&ast);
160 let parsed = ud_translate::compile::parse(&text)
161 .map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
162 let warnings = ud_translate::compile::verify_asm(&parsed);
163 let rebuilt = ud_translate::compile::lower_to_pe(&parsed)?;
164 (text, warnings, rebuilt)
165 } else if ud_format::macho::is_macho64(&input_bytes) {
166 let macho = ud_format::macho::MachoFile::parse(&input_bytes)?;
167 let ast = ud_translate::decompile::decompile_macho(&macho);
168 let text = ud_ast::emit(&ast);
169 let parsed = ud_translate::compile::parse(&text)
170 .map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
171 let warnings = ud_translate::compile::verify_asm(&parsed);
172 let rebuilt = ud_translate::compile::lower_to_macho(&parsed)?;
173 (text, warnings, rebuilt)
174 } else if let Some(load_addr) = raw_6502_load_addr(&input_bytes) {
175 let image = ud_format::raw::RawImage::new(input_bytes.clone(), load_addr);
176 let ast = ud_translate::decompile::decompile_raw_6502(&image)?;
177 let text = ud_ast::emit(&ast);
178 let parsed = ud_translate::compile::parse(&text)
179 .map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
180 let warnings = ud_translate::compile::verify_asm(&parsed);
181 let rebuilt = ud_translate::compile::lower_to_raw(&parsed)?;
182 (text, warnings, rebuilt)
183 } else {
184 return Err(SourceRoundTripError::UnknownFormat);
185 };
186 let _ = text; std::fs::write(output, &rebuilt).map_err(SourceRoundTripError::Io)?;
189
190 let first_diff_offset = first_byte_diff(&input_bytes, &rebuilt);
191 let diff_context = first_diff_offset.map(|off| make_diff_context(off, &input_bytes, &rebuilt));
192 Ok(SourceRoundTripReport {
193 byte_identical: first_diff_offset.is_none() && input_bytes.len() == rebuilt.len(),
194 input_len: input_bytes.len(),
195 output_len: rebuilt.len(),
196 first_diff_offset,
197 diff_context,
198 warnings,
199 })
200}
201
202fn make_diff_context(off: usize, input: &[u8], output: &[u8]) -> DiffContext {
203 let window_start = off.saturating_sub(8);
204 let window_end_in = (off + 8).min(input.len());
205 let window_end_out = (off + 8).min(output.len());
206 DiffContext {
207 window_start,
208 input_window: input[window_start..window_end_in].to_vec(),
209 output_window: output[window_start..window_end_out].to_vec(),
210 }
211}
212
213#[must_use]
223pub fn raw_6502_load_addr(bytes: &[u8]) -> Option<u64> {
224 let len = bytes.len();
225 if !(6..=0x10000).contains(&len) {
226 return None;
227 }
228 let load_addr = 0x10000u64 - len as u64;
229 let end = 0x10000u64;
230 let reset_lo_off = usize::try_from(0xFFFCu64 - load_addr).ok()?;
231 let reset_hi_off = reset_lo_off + 1;
232 if reset_hi_off >= len {
233 return None;
234 }
235 let reset = u64::from(u16::from_le_bytes([
236 bytes[reset_lo_off],
237 bytes[reset_hi_off],
238 ]));
239 if reset >= load_addr && reset < end {
240 Some(load_addr)
241 } else {
242 None
243 }
244}
245
246fn first_byte_diff(a: &[u8], b: &[u8]) -> Option<usize> {
247 a.iter()
248 .zip(b)
249 .position(|(x, y)| x != y)
250 .or_else(|| (a.len() != b.len()).then_some(a.len().min(b.len())))
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256
257 #[test]
258 fn pipeline_passes_through_non_elf_bytes() {
259 let bytes = b"\x00\x01\x02\x03not an elf";
260 assert_eq!(pipeline_bytes(bytes), bytes);
261 }
262
263 #[test]
264 fn pipeline_passes_through_elf32() {
265 let mut bytes = vec![0u8; 64];
267 bytes[..4].copy_from_slice(b"\x7fELF");
268 bytes[4] = 1; bytes[5] = 1; let out = pipeline_bytes(&bytes);
271 assert_eq!(out, bytes);
272 }
273
274 #[test]
275 fn roundtrip_on_a_temp_file_succeeds() {
276 let dir = std::env::temp_dir();
277 let input = dir.join("ud-cli-rt-in");
278 let output = dir.join("ud-cli-rt-out");
279 std::fs::write(&input, b"\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00").unwrap();
280 roundtrip(&input, &output).expect("identity round-trip should succeed");
281 let _ = std::fs::remove_file(&input);
282 let _ = std::fs::remove_file(&output);
283 }
284}