use std::path::Path;
use ud_core::{assert_bytes_equal, Error, Result};
use ud_translate::compile::AsmWarning;
pub fn roundtrip(input: &Path, output: &Path) -> Result<()> {
let bytes = std::fs::read(input).map_err(|source| Error::Io {
path: input.to_path_buf(),
source,
})?;
let rebuilt = pipeline_bytes(&bytes);
std::fs::write(output, &rebuilt).map_err(|source| Error::Io {
path: output.to_path_buf(),
source,
})?;
let written_back = std::fs::read(output).map_err(|source| Error::Io {
path: output.to_path_buf(),
source,
})?;
assert_bytes_equal(&bytes, &written_back)
}
fn pipeline_bytes(bytes: &[u8]) -> Vec<u8> {
if ud_format::elf::is_elf64_le(bytes) {
if let Ok(elf) = ud_format::elf::Elf64File::parse(bytes) {
return elf.write_to_vec();
}
}
if ud_format::pe::is_pe(bytes) {
if let Ok(pe) = ud_format::pe::PeFile::parse(bytes) {
return pe.write_to_vec();
}
}
if ud_format::macho::is_macho64(bytes) {
if let Ok(macho) = ud_format::macho::MachoFile::parse(bytes) {
return macho.write_to_vec();
}
}
bytes.to_vec()
}
#[derive(Debug, Clone)]
pub struct SourceRoundTripReport {
pub byte_identical: bool,
pub input_len: usize,
pub output_len: usize,
pub first_diff_offset: Option<usize>,
pub diff_context: Option<DiffContext>,
pub warnings: Vec<AsmWarning>,
}
#[derive(Debug, Clone)]
pub struct DiffContext {
pub window_start: usize,
pub input_window: Vec<u8>,
pub output_window: Vec<u8>,
}
#[derive(Debug, thiserror::Error)]
pub enum SourceRoundTripError {
#[error("input is not a recognised binary format")]
UnknownFormat,
#[error(transparent)]
Io(std::io::Error),
#[error(transparent)]
Decompile(#[from] ud_translate::decompile::Error),
#[error(transparent)]
Decompile6502(#[from] ud_translate::decompile::raw6502::Error),
#[error(transparent)]
ElfFormat(#[from] ud_format::elf::Error),
#[error(transparent)]
PeFormat(#[from] ud_format::pe::Error),
#[error(transparent)]
MachoFormat(#[from] ud_format::macho::Error),
#[error("parse of decompile output failed: {0}")]
Parse(String),
#[error(transparent)]
ElfLower(#[from] ud_translate::compile::ElfLowerError),
#[error(transparent)]
PeLower(#[from] ud_translate::compile::PeLowerError),
#[error(transparent)]
MachoLower(#[from] ud_translate::compile::MachoLowerError),
#[error(transparent)]
RawLower(#[from] ud_translate::compile::RawLowerError),
}
pub fn roundtrip_through_source(
input: &Path,
output: &Path,
) -> std::result::Result<SourceRoundTripReport, SourceRoundTripError> {
let input_bytes = std::fs::read(input).map_err(SourceRoundTripError::Io)?;
let (text, warnings, rebuilt) = if ud_format::elf::is_elf64_le(&input_bytes) {
let elf = ud_format::elf::Elf64File::parse(&input_bytes)?;
let ast = ud_translate::decompile::decompile(&elf)?;
let text = ud_ast::emit(&ast);
let parsed = ud_translate::compile::parse(&text)
.map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
let warnings = ud_translate::compile::verify_asm(&parsed);
let rebuilt = ud_translate::compile::lower_to_elf(&parsed)?;
(text, warnings, rebuilt)
} else if ud_format::pe::is_pe(&input_bytes) {
let pe = ud_format::pe::PeFile::parse(&input_bytes)?;
let ast = ud_translate::decompile::decompile_pe(&pe);
let text = ud_ast::emit(&ast);
let parsed = ud_translate::compile::parse(&text)
.map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
let warnings = ud_translate::compile::verify_asm(&parsed);
let rebuilt = ud_translate::compile::lower_to_pe(&parsed)?;
(text, warnings, rebuilt)
} else if ud_format::macho::is_macho64(&input_bytes) {
let macho = ud_format::macho::MachoFile::parse(&input_bytes)?;
let ast = ud_translate::decompile::decompile_macho(&macho);
let text = ud_ast::emit(&ast);
let parsed = ud_translate::compile::parse(&text)
.map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
let warnings = ud_translate::compile::verify_asm(&parsed);
let rebuilt = ud_translate::compile::lower_to_macho(&parsed)?;
(text, warnings, rebuilt)
} else if let Some(load_addr) = raw_6502_load_addr(&input_bytes) {
let image = ud_format::raw::RawImage::new(input_bytes.clone(), load_addr);
let ast = ud_translate::decompile::decompile_raw_6502(&image)?;
let text = ud_ast::emit(&ast);
let parsed = ud_translate::compile::parse(&text)
.map_err(|e| SourceRoundTripError::Parse(e.to_string()))?;
let warnings = ud_translate::compile::verify_asm(&parsed);
let rebuilt = ud_translate::compile::lower_to_raw(&parsed)?;
(text, warnings, rebuilt)
} else {
return Err(SourceRoundTripError::UnknownFormat);
};
let _ = text;
std::fs::write(output, &rebuilt).map_err(SourceRoundTripError::Io)?;
let first_diff_offset = first_byte_diff(&input_bytes, &rebuilt);
let diff_context = first_diff_offset.map(|off| make_diff_context(off, &input_bytes, &rebuilt));
Ok(SourceRoundTripReport {
byte_identical: first_diff_offset.is_none() && input_bytes.len() == rebuilt.len(),
input_len: input_bytes.len(),
output_len: rebuilt.len(),
first_diff_offset,
diff_context,
warnings,
})
}
fn make_diff_context(off: usize, input: &[u8], output: &[u8]) -> DiffContext {
let window_start = off.saturating_sub(8);
let window_end_in = (off + 8).min(input.len());
let window_end_out = (off + 8).min(output.len());
DiffContext {
window_start,
input_window: input[window_start..window_end_in].to_vec(),
output_window: output[window_start..window_end_out].to_vec(),
}
}
#[must_use]
pub fn raw_6502_load_addr(bytes: &[u8]) -> Option<u64> {
let len = bytes.len();
if !(6..=0x10000).contains(&len) {
return None;
}
let load_addr = 0x10000u64 - len as u64;
let end = 0x10000u64;
let reset_lo_off = usize::try_from(0xFFFCu64 - load_addr).ok()?;
let reset_hi_off = reset_lo_off + 1;
if reset_hi_off >= len {
return None;
}
let reset = u64::from(u16::from_le_bytes([
bytes[reset_lo_off],
bytes[reset_hi_off],
]));
if reset >= load_addr && reset < end {
Some(load_addr)
} else {
None
}
}
fn first_byte_diff(a: &[u8], b: &[u8]) -> Option<usize> {
a.iter()
.zip(b)
.position(|(x, y)| x != y)
.or_else(|| (a.len() != b.len()).then_some(a.len().min(b.len())))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn pipeline_passes_through_non_elf_bytes() {
let bytes = b"\x00\x01\x02\x03not an elf";
assert_eq!(pipeline_bytes(bytes), bytes);
}
#[test]
fn pipeline_passes_through_elf32() {
let mut bytes = vec![0u8; 64];
bytes[..4].copy_from_slice(b"\x7fELF");
bytes[4] = 1; bytes[5] = 1; let out = pipeline_bytes(&bytes);
assert_eq!(out, bytes);
}
#[test]
fn roundtrip_on_a_temp_file_succeeds() {
let dir = std::env::temp_dir();
let input = dir.join("ud-cli-rt-in");
let output = dir.join("ud-cli-rt-out");
std::fs::write(&input, b"\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00").unwrap();
roundtrip(&input, &output).expect("identity round-trip should succeed");
let _ = std::fs::remove_file(&input);
let _ = std::fs::remove_file(&output);
}
}