use fs2::FileExt;
use std::fs::{self, OpenOptions};
use std::io::{self, Write};
use std::path::Path;
pub const VERSION: u32 = 1;
pub const HEADER_LEN: usize = 16;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct WitnessHeader {
pub version: u32,
pub op_id_hash: u32,
pub timestamp: u32,
pub input_len: u32,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CorpusWitness {
pub header: WitnessHeader,
pub input: Vec<u8>,
pub expected_output: Vec<u8>,
}
#[derive(Debug, thiserror::Error)]
pub enum WitnessError {
#[error("Fix: witness file I/O failed for {path}: {source}")]
Io {
path: String,
#[source]
source: io::Error,
},
#[error("Fix: witness header is {actual} bytes; expected 16 bytes")]
ShortHeader {
actual: usize,
},
#[error("Fix: unsupported witness version {version}; expected version 1")]
Version {
version: u32,
},
#[error("Fix: witness op hash {actual:#010x} does not match {expected:#010x}")]
OpHash {
expected: u32,
actual: u32,
},
#[error("Fix: truncated witness at offset {offset}; needed {needed} bytes, had {remaining}")]
Truncated {
offset: usize,
needed: usize,
remaining: usize,
},
#[error("Fix: witness corpus changed before append boundary at byte {offset}")]
AppendOnly {
offset: usize,
},
#[error(
"Fix: witness file too large for {path} ({size} bytes); max allowed is 1073741824 bytes"
)]
FileTooLarge {
path: String,
size: u64,
},
}
#[inline]
pub fn op_id_hash(op_id: &str) -> u32 {
let mut hash = 0x811C_9DC5_u32;
for byte in op_id.bytes() {
hash ^= u32::from(byte);
hash = hash.wrapping_mul(0x0100_0193);
}
hash
}
#[inline]
pub fn verify_header(header: &[u8], op_id: &str) -> Result<WitnessHeader, WitnessError> {
let parsed = parse_header(header)?;
if parsed.op_id_hash != op_id_hash(op_id) {
return Err(WitnessError::OpHash {
expected: op_id_hash(op_id),
actual: parsed.op_id_hash,
});
}
Ok(parsed)
}
#[inline]
pub fn read_all(path: &Path) -> Result<Vec<CorpusWitness>, WitnessError> {
const MAX_SIZE: u64 = 1024 * 1024 * 1024;
let metadata = fs::metadata(path).map_err(|source| WitnessError::Io {
path: path.display().to_string(),
source,
})?;
if metadata.len() > MAX_SIZE {
return Err(WitnessError::FileTooLarge {
path: path.display().to_string(),
size: metadata.len(),
});
}
let bytes = fs::read(path).map_err(|source| WitnessError::Io {
path: path.display().to_string(),
source,
})?;
decode_all(&bytes)
}
#[inline]
pub fn append_one(
path: &Path,
op_id: &str,
input: &[u8],
expected_output: &[u8],
timestamp: u32,
) -> Result<(), WitnessError> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).map_err(|source| WitnessError::Io {
path: parent.display().to_string(),
source,
})?;
}
let mut file = OpenOptions::new()
.create(true)
.append(true)
.open(path)
.map_err(|source| WitnessError::Io {
path: path.display().to_string(),
source,
})?;
file.lock_exclusive().map_err(|source| WitnessError::Io {
path: path.display().to_string(),
source,
})?;
let input_len = u32::try_from(input.len()).map_err(|_| WitnessError::Truncated {
offset: 0,
needed: input.len(),
remaining: u32::MAX as usize,
})?;
let output_len = u32::try_from(expected_output.len()).map_err(|_| WitnessError::Truncated {
offset: 0,
needed: expected_output.len(),
remaining: u32::MAX as usize,
})?;
file.write_all(&VERSION.to_le_bytes())
.and_then(|()| file.write_all(&op_id_hash(op_id).to_le_bytes()))
.and_then(|()| file.write_all(×tamp.to_le_bytes()))
.and_then(|()| file.write_all(&input_len.to_le_bytes()))
.and_then(|()| file.write_all(input))
.and_then(|()| file.write_all(&output_len.to_le_bytes()))
.and_then(|()| file.write_all(expected_output))
.and_then(|()| file.flush())
.and_then(|()| file.sync_data())
.map_err(|source| WitnessError::Io {
path: path.display().to_string(),
source,
})
}
#[inline]
pub fn verify_append_only(previous_bytes: &[u8], new_bytes: &[u8]) -> Result<(), WitnessError> {
if new_bytes.len() < previous_bytes.len() {
return Err(WitnessError::AppendOnly {
offset: new_bytes.len(),
});
}
if !new_bytes.starts_with(previous_bytes) {
let offset = previous_bytes
.iter()
.zip(new_bytes.iter())
.position(|(left, right)| left != right)
.unwrap_or(previous_bytes.len());
return Err(WitnessError::AppendOnly { offset });
}
decode_all(new_bytes).map(|_| ())
}
fn decode_all(bytes: &[u8]) -> Result<Vec<CorpusWitness>, WitnessError> {
let mut offset = 0;
let mut witnesses = Vec::new();
while offset < bytes.len() {
ensure_available(bytes, offset, HEADER_LEN)?;
let header = parse_header(&bytes[offset..offset + HEADER_LEN])?;
offset += HEADER_LEN;
let input_len = header.input_len as usize;
ensure_available(bytes, offset, input_len)?;
let input = bytes[offset..offset + input_len].to_vec();
offset += input_len;
ensure_available(bytes, offset, 4)?;
let output_len = read_u32(bytes, offset) as usize;
offset += 4;
ensure_available(bytes, offset, output_len)?;
let expected_output = bytes[offset..offset + output_len].to_vec();
offset += output_len;
witnesses.push(CorpusWitness {
header,
input,
expected_output,
});
}
Ok(witnesses)
}
fn parse_header(header: &[u8]) -> Result<WitnessHeader, WitnessError> {
if header.len() < HEADER_LEN {
return Err(WitnessError::ShortHeader {
actual: header.len(),
});
}
let parsed = WitnessHeader {
version: read_u32(header, 0),
op_id_hash: read_u32(header, 4),
timestamp: read_u32(header, 8),
input_len: read_u32(header, 12),
};
if parsed.version != VERSION {
return Err(WitnessError::Version {
version: parsed.version,
});
}
Ok(parsed)
}
fn read_u32(bytes: &[u8], offset: usize) -> u32 {
u32::from_le_bytes([
bytes[offset],
bytes[offset + 1],
bytes[offset + 2],
bytes[offset + 3],
])
}
fn ensure_available(bytes: &[u8], offset: usize, needed: usize) -> Result<(), WitnessError> {
let remaining = bytes.len().saturating_sub(offset);
if remaining < needed {
return Err(WitnessError::Truncated {
offset,
needed,
remaining,
});
}
Ok(())
}