use std::collections::{HashMap, HashSet};
use std::ffi::OsString;
use crate::CryptoError;
use super::format::empty_archive_error;
use super::limits::{ArchiveLimits, enforce_entry_count_cap, enforce_total_plaintext_bytes_cap};
use super::model::{ArchiveEntry, ArchiveEntryKind};
use super::path::ascii_case_collision_key;
pub(super) fn parent_path_utf8(path: &str) -> Option<&str> {
path.rsplit_once('/').map(|(parent, _)| parent)
}
fn first_component(path: &str) -> &str {
match path.split_once('/') {
Some((before, _)) => before,
None => path,
}
}
fn parent_missing(entry: &ArchiveEntry) -> CryptoError {
CryptoError::InvalidInput(format!(
"Archive entry parent directory is missing: {}",
entry.path_utf8,
))
}
pub(super) fn validate_manifest_tree(
entries: &[ArchiveEntry],
total_file_bytes: u64,
limits: ArchiveLimits,
) -> Result<(OsString, bool, u32), CryptoError> {
if entries.is_empty() {
return Err(empty_archive_error());
}
enforce_entry_count_cap(u32::try_from(entries.len()).unwrap_or(u32::MAX), &limits)?;
enforce_total_plaintext_bytes_cap(total_file_bytes, &limits)?;
let root = first_component(&entries[0].path_utf8);
let mut exact: HashSet<&str> = HashSet::with_capacity(entries.len());
let mut ascii_ci: HashSet<Vec<u8>> = HashSet::with_capacity(entries.len());
let mut kinds: HashMap<&str, ArchiveEntryKind> = HashMap::with_capacity(entries.len());
let mut root_mode: Option<u32> = None;
for entry in entries {
if first_component(&entry.path_utf8) != root {
return Err(CryptoError::InvalidInput(
"Archive has multiple top-level roots".to_string(),
));
}
if !exact.insert(&entry.path_utf8) {
return Err(CryptoError::InvalidInput(format!(
"Duplicate archive entry: {}",
entry.path_utf8,
)));
}
if !ascii_ci.insert(ascii_case_collision_key(&entry.path_utf8)) {
return Err(CryptoError::InvalidInput(format!(
"Duplicate archive entry under ASCII case-insensitive comparison: {}",
entry.path_utf8,
)));
}
kinds.insert(&entry.path_utf8, entry.kind);
if entry.path_utf8 == root {
root_mode = Some(entry.mode);
}
}
let root_kind = kinds.get(root).copied();
let root_is_file = match root_kind {
Some(ArchiveEntryKind::File) => {
if entries.len() != 1 {
return Err(CryptoError::InvalidInput(
"Archive root file has child entries".to_string(),
));
}
true
}
Some(ArchiveEntryKind::Directory) => {
for entry in entries {
if entry.path_utf8 == root {
continue;
}
let parent =
parent_path_utf8(&entry.path_utf8).ok_or_else(|| parent_missing(entry))?;
match kinds.get(parent) {
Some(ArchiveEntryKind::Directory) => {}
Some(ArchiveEntryKind::File) => {
return Err(CryptoError::InvalidInput(format!(
"Archive entry has child under file path: {}",
entry.path_utf8,
)));
}
None => return Err(parent_missing(entry)),
}
}
false
}
None => {
return Err(CryptoError::InvalidInput(
"Archive directory root entry is missing".to_string(),
));
}
};
let root_mode = root_mode.ok_or(CryptoError::InternalInvariant(
"Root entry mode missing from validated manifest",
))?;
Ok((OsString::from(root), root_is_file, root_mode))
}
#[cfg(test)]
mod tests {
use super::*;
fn limits() -> ArchiveLimits {
ArchiveLimits::default()
}
use crate::archive::model::make_entry;
fn entry(path: &str, kind: ArchiveEntryKind, size: u64) -> ArchiveEntry {
make_entry(path, kind, size, 0o644)
}
#[test]
fn parent_path_utf8_basic() {
assert_eq!(parent_path_utf8("a"), None);
assert_eq!(parent_path_utf8("a/b"), Some("a"));
assert_eq!(parent_path_utf8("a/b/c"), Some("a/b"));
}
#[test]
fn accepts_single_file_root() {
let entries = vec![entry("hello.txt", ArchiveEntryKind::File, 100)];
let (root, is_file, _root_mode) = validate_manifest_tree(&entries, 100, limits()).unwrap();
assert_eq!(root, OsString::from("hello.txt"));
assert!(is_file);
}
#[test]
fn accepts_root_directory_only() {
let entries = vec![entry("emptydir", ArchiveEntryKind::Directory, 0)];
let (root, is_file, _root_mode) = validate_manifest_tree(&entries, 0, limits()).unwrap();
assert_eq!(root, OsString::from("emptydir"));
assert!(!is_file);
}
#[test]
fn accepts_directory_with_files() {
let entries = vec![
entry("photos", ArchiveEntryKind::Directory, 0),
entry("photos/index.txt", ArchiveEntryKind::File, 50),
entry("photos/cover.jpg", ArchiveEntryKind::File, 1024),
];
let (root, is_file, _root_mode) = validate_manifest_tree(&entries, 1074, limits()).unwrap();
assert_eq!(root, OsString::from("photos"));
assert!(!is_file);
}
#[test]
fn accepts_nested_tree() {
let entries = vec![
entry("root", ArchiveEntryKind::Directory, 0),
entry("root/a", ArchiveEntryKind::Directory, 0),
entry("root/a/b", ArchiveEntryKind::Directory, 0),
entry("root/a/b/leaf.txt", ArchiveEntryKind::File, 42),
];
let (root, is_file, _root_mode) = validate_manifest_tree(&entries, 42, limits()).unwrap();
assert_eq!(root, OsString::from("root"));
assert!(!is_file);
}
#[test]
fn accepts_non_canonical_order() {
let entries = vec![
entry("root/a/b/leaf.txt", ArchiveEntryKind::File, 42),
entry("root/a/b", ArchiveEntryKind::Directory, 0),
entry("root/a", ArchiveEntryKind::Directory, 0),
entry("root", ArchiveEntryKind::Directory, 0),
];
let (root, is_file, _root_mode) = validate_manifest_tree(&entries, 42, limits()).unwrap();
assert_eq!(root, OsString::from("root"));
assert!(!is_file);
}
#[test]
fn rejects_empty_entries() {
let entries: Vec<ArchiveEntry> = vec![];
let err = validate_manifest_tree(&entries, 0, limits()).unwrap_err();
assert!(format!("{err}").contains("Empty archive"));
}
#[test]
fn rejects_multiple_top_level_roots() {
let entries = vec![
entry("a.txt", ArchiveEntryKind::File, 1),
entry("b.txt", ArchiveEntryKind::File, 1),
];
let err = validate_manifest_tree(&entries, 2, limits()).unwrap_err();
assert!(format!("{err}").contains("multiple top-level roots"));
}
#[test]
fn rejects_root_file_with_children() {
let entries = vec![
entry("root", ArchiveEntryKind::File, 10),
entry("root/child.txt", ArchiveEntryKind::File, 5),
];
let err = validate_manifest_tree(&entries, 15, limits()).unwrap_err();
assert!(format!("{err}").contains("root file has child entries"));
}
#[test]
fn rejects_missing_directory_root_entry() {
let entries = vec![entry("root/child.txt", ArchiveEntryKind::File, 10)];
let err = validate_manifest_tree(&entries, 10, limits()).unwrap_err();
assert!(format!("{err}").contains("directory root entry is missing"));
}
#[test]
fn rejects_missing_intermediate_parent() {
let entries = vec![
entry("root", ArchiveEntryKind::Directory, 0),
entry("root/a/b.txt", ArchiveEntryKind::File, 10),
];
let err = validate_manifest_tree(&entries, 10, limits()).unwrap_err();
assert!(format!("{err}").contains("parent directory is missing"));
}
#[test]
fn rejects_child_under_file() {
let entries = vec![
entry("root", ArchiveEntryKind::Directory, 0),
entry("root/leaf", ArchiveEntryKind::File, 5),
entry("root/leaf/illegal", ArchiveEntryKind::File, 1),
];
let err = validate_manifest_tree(&entries, 6, limits()).unwrap_err();
assert!(format!("{err}").contains("child under file path"));
}
#[test]
fn rejects_exact_duplicate() {
let entries = vec![
entry("root", ArchiveEntryKind::Directory, 0),
entry("root/file.txt", ArchiveEntryKind::File, 10),
entry("root/file.txt", ArchiveEntryKind::File, 10),
];
let err = validate_manifest_tree(&entries, 20, limits()).unwrap_err();
assert!(format!("{err}").contains("Duplicate archive entry"));
}
#[test]
fn rejects_ascii_ci_duplicate() {
let entries = vec![
entry("root", ArchiveEntryKind::Directory, 0),
entry("root/Foo.txt", ArchiveEntryKind::File, 10),
entry("root/FOO.TXT", ArchiveEntryKind::File, 10),
];
let err = validate_manifest_tree(&entries, 20, limits()).unwrap_err();
assert!(format!("{err}").contains("ASCII case-insensitive"));
}
#[test]
fn rejects_ascii_ci_duplicate_directories() {
let entries = vec![
entry("root", ArchiveEntryKind::Directory, 0),
entry("root/Sub", ArchiveEntryKind::Directory, 0),
entry("root/SUB", ArchiveEntryKind::Directory, 0),
];
let err = validate_manifest_tree(&entries, 0, limits()).unwrap_err();
assert!(format!("{err}").contains("ASCII case-insensitive"));
}
#[test]
fn rejects_ascii_ci_file_vs_directory_collision() {
let entries = vec![
entry("root", ArchiveEntryKind::Directory, 0),
entry("root/Foo", ArchiveEntryKind::File, 5),
entry("root/foo", ArchiveEntryKind::Directory, 0),
];
let err = validate_manifest_tree(&entries, 5, limits()).unwrap_err();
assert!(format!("{err}").contains("ASCII case-insensitive"));
}
#[test]
fn rejects_file_directory_collision() {
let entries = vec![
entry("root", ArchiveEntryKind::Directory, 0),
entry("root/x", ArchiveEntryKind::File, 10),
entry("root/x", ArchiveEntryKind::Directory, 0),
];
let err = validate_manifest_tree(&entries, 10, limits()).unwrap_err();
assert!(format!("{err}").contains("Duplicate"));
}
#[test]
fn rejects_total_bytes_above_cap() {
let l = ArchiveLimits::default().with_max_total_plaintext_bytes(100);
let entries = vec![entry("file.txt", ArchiveEntryKind::File, 50)];
let err = validate_manifest_tree(&entries, 200, l).unwrap_err();
assert!(format!("{err}").contains("total-bytes cap exceeded"));
}
#[test]
fn rejects_entry_count_above_cap() {
let l = ArchiveLimits::default().with_max_entry_count(1);
let entries = vec![
entry("root", ArchiveEntryKind::Directory, 0),
entry("root/a.txt", ArchiveEntryKind::File, 10),
];
let err = validate_manifest_tree(&entries, 10, l).unwrap_err();
assert!(format!("{err}").contains("entry-count cap exceeded"));
}
}