use std::fs::File;
use std::io::Read;
use std::path::Path;
use zip_core::ZipArchive;
use crate::meta::parse_logical_files;
use crate::{Aff4Error, ReadSeekSend, StoredHash};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LogicalEntry {
pub(crate) segment: String,
pub original_file_name: String,
pub size: u64,
pub hashes: Vec<StoredHash>,
pub last_written: Option<String>,
}
pub struct LogicalContainer {
archive: ZipArchive<Box<dyn ReadSeekSend>>,
files: Vec<LogicalEntry>,
}
impl std::fmt::Debug for LogicalContainer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalContainer")
.field("files", &self.files.len())
.finish()
}
}
impl LogicalContainer {
pub fn open(path: &Path) -> Result<Self, Aff4Error> {
Self::open_reader(Box::new(File::open(path)?))
}
pub fn open_encrypted(path: &Path, password: &str) -> Result<Self, Aff4Error> {
let inner = crate::decrypt_encrypted_stream(path, password)?;
Self::open_reader(Box::new(std::io::Cursor::new(inner)))
}
pub fn open_reader(backing: Box<dyn ReadSeekSend>) -> Result<Self, Aff4Error> {
let mut archive = ZipArchive::new(backing)?;
let turtle = {
let mut entry = archive.by_name("information.turtle")?;
let mut content = String::new();
entry.read_to_string(&mut content)?;
content
};
let parsed = parse_logical_files(&turtle)?;
if parsed.is_empty() {
return Err(Aff4Error::BadFormat(
"no aff4:FileImage entries found — not an AFF4-Logical container".into(),
));
}
let names: Vec<String> = archive.file_names().map(String::from).collect();
let mut files = Vec::with_capacity(parsed.len());
for p in parsed {
let segment = resolve_segment(&names, &p.arn).ok_or_else(|| {
Aff4Error::BadFormat(format!("FileImage {} has no matching ZIP segment", p.arn))
})?;
files.push(LogicalEntry {
segment,
original_file_name: p.original_file_name,
size: p.size,
hashes: p.hashes,
last_written: p.last_written,
});
}
Ok(Self { archive, files })
}
pub fn files(&self) -> &[LogicalEntry] {
&self.files
}
pub fn read_file(&mut self, entry: &LogicalEntry) -> Result<Vec<u8>, Aff4Error> {
let mut zip_entry = self.archive.by_name(&entry.segment)?;
let mut data = Vec::new();
zip_entry.read_to_end(&mut data)?;
Ok(data)
}
}
fn resolve_segment(names: &[String], arn: &str) -> Option<String> {
let after_scheme = arn.strip_prefix("aff4://").unwrap_or(arn);
let tail = match after_scheme.find('/') {
Some(i) => &after_scheme[i..],
None => after_scheme,
};
let candidates = [
tail.to_string(),
tail.trim_start_matches('/').to_string(),
urlencode_arn(arn),
];
for c in &candidates {
if names.iter().any(|n| n == c) {
return Some(c.clone());
}
}
names
.iter()
.filter(|n| !n.ends_with('/') && n.ends_with(tail.trim_start_matches('/')))
.max_by_key(|n| n.len())
.cloned()
}
fn urlencode_arn(arn: &str) -> String {
let stripped = arn.strip_prefix("aff4://").unwrap_or(arn);
format!("aff4%3A%2F%2F{stripped}")
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use md5::Digest as _;
use std::io::Write as _;
use zip::write::{SimpleFileOptions, ZipWriter};
use zip::CompressionMethod;
fn write_tmp(data: &[u8]) -> tempfile::NamedTempFile {
let mut f = tempfile::NamedTempFile::new().unwrap();
f.write_all(data).unwrap();
f
}
fn zip_with_turtle(turtle: &str) -> Vec<u8> {
let cursor = std::io::Cursor::new(Vec::<u8>::new());
let mut zw = ZipWriter::new(cursor);
let opts = SimpleFileOptions::default().compression_method(CompressionMethod::Stored);
zw.start_file("information.turtle", opts).unwrap();
zw.write_all(turtle.as_bytes()).unwrap();
zw.finish().unwrap().into_inner()
}
#[test]
fn open_disk_image_as_logical_is_err() {
let img = crate::testutil::test_aff4(&[0u8; 512]);
let f = write_tmp(&img);
let err = LogicalContainer::open(f.path()).unwrap_err();
assert!(matches!(err, Aff4Error::BadFormat(m) if m.contains("FileImage")));
}
#[test]
fn file_image_without_segment_is_err() {
let turtle = "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n\
@prefix aff4: <http://aff4.org/Schema#> .\n\
<aff4://vol/missing.txt> rdf:type aff4:FileImage ; aff4:size 3 .\n";
let f = write_tmp(&zip_with_turtle(turtle));
let err = LogicalContainer::open(f.path()).unwrap_err();
assert!(matches!(err, Aff4Error::BadFormat(m) if m.contains("no matching ZIP segment")));
}
#[test]
fn debug_impl_renders() {
let md5 = format!("{:x}", md5::Md5::digest(b"hi"));
let img = crate::testutil::test_aff4_logical("f.txt", b"hi", &md5);
let f = write_tmp(&img);
let container = LogicalContainer::open(f.path()).unwrap();
assert!(format!("{container:?}").contains("LogicalContainer"));
}
#[test]
fn resolve_segment_branches() {
let names = vec![
"dir/dream.txt".to_string(),
"aff4%3A%2F%2Fu/enc.txt".to_string(),
];
assert_eq!(
resolve_segment(&names, "aff4://vol/dir/dream.txt").as_deref(),
Some("dir/dream.txt")
);
assert_eq!(
resolve_segment(&names, "aff4://u/enc.txt").as_deref(),
Some("aff4%3A%2F%2Fu/enc.txt")
);
assert!(resolve_segment(&names, "aff4://noslash").is_none());
let pref = vec!["a/b/c/dream.txt".to_string()];
assert_eq!(
resolve_segment(&pref, "aff4://vol/dream.txt").as_deref(),
Some("a/b/c/dream.txt")
);
}
}