use std::collections::BTreeMap;
use std::path::Path;
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
pub struct Manifest {
pub version: u32,
pub model_repo: String,
pub root_hash: String,
pub directories: BTreeMap<String, DirEntry>,
pub files: BTreeMap<String, FileEntry>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct DirEntry {
pub hash: String,
pub mtime_secs: u64,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct FileEntry {
pub mtime_secs: u64,
pub size: u64,
pub content_hash: String,
pub chunk_count: usize,
}
pub const MANIFEST_VERSION: u32 = 3;
impl Manifest {
#[must_use]
pub fn new(model_repo: &str) -> Self {
Self {
version: MANIFEST_VERSION,
model_repo: model_repo.to_string(),
root_hash: String::new(),
directories: BTreeMap::new(),
files: BTreeMap::new(),
}
}
pub fn add_file(
&mut self,
relative_path: &str,
mtime_secs: u64,
size: u64,
content_hash: &str,
chunk_count: usize,
) {
self.files.insert(
relative_path.to_string(),
FileEntry {
mtime_secs,
size,
content_hash: content_hash.to_string(),
chunk_count,
},
);
}
pub fn remove_file(&mut self, relative_path: &str) {
self.files.remove(relative_path);
}
pub fn recompute_hashes(&mut self) {
let mut dir_children: BTreeMap<String, Vec<String>> = BTreeMap::new();
for (file_path, entry) in &self.files {
let path = Path::new(file_path);
let mut current = path.parent();
while let Some(dir) = current {
let dir_str = dir.to_string_lossy().to_string();
if dir_str.is_empty() {
break;
}
dir_children
.entry(dir_str)
.or_default()
.push(entry.content_hash.clone());
current = dir.parent();
}
dir_children
.entry(String::new())
.or_default()
.push(entry.content_hash.clone());
}
self.directories.clear();
for (dir_path, child_hashes) in &mut dir_children {
if dir_path.is_empty() {
continue;
}
child_hashes.sort();
let combined = child_hashes.join("");
let hash = blake3::hash(combined.as_bytes()).to_hex().to_string();
self.directories.insert(
dir_path.clone(),
DirEntry {
hash,
mtime_secs: 0, },
);
}
let mut all_hashes: Vec<&str> = self
.files
.values()
.map(|e| e.content_hash.as_str())
.collect();
all_hashes.sort_unstable();
let combined = all_hashes.join("");
self.root_hash = blake3::hash(combined.as_bytes()).to_hex().to_string();
}
pub fn to_json(&self) -> crate::Result<String> {
serde_json::to_string_pretty(self)
.map_err(|e| crate::Error::Other(anyhow::anyhow!("manifest serialization: {e}")))
}
pub fn from_json(json: &str) -> crate::Result<Self> {
serde_json::from_str(json)
.map_err(|e| crate::Error::Other(anyhow::anyhow!("manifest deserialization: {e}")))
}
pub fn save(&self, path: &Path) -> crate::Result<()> {
let json = self.to_json()?;
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent).map_err(|e| crate::Error::Io {
path: parent.display().to_string(),
source: e,
})?;
}
std::fs::write(path, json).map_err(|e| crate::Error::Io {
path: path.display().to_string(),
source: e,
})
}
pub fn load(path: &Path) -> crate::Result<Self> {
let json = std::fs::read_to_string(path).map_err(|e| crate::Error::Io {
path: path.display().to_string(),
source: e,
})?;
Self::from_json(&json)
}
#[must_use]
pub fn is_compatible(&self, model_repo: &str) -> bool {
self.version == MANIFEST_VERSION && self.model_repo == model_repo
}
#[must_use]
pub fn referenced_hashes(&self) -> Vec<String> {
self.files
.values()
.map(|e| e.content_hash.clone())
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn round_trip_json() {
let mut m = Manifest::new("BAAI/bge-small-en-v1.5");
m.add_file("src/main.rs", 1000, 4523, "abc123", 8);
m.add_file("src/lib.rs", 1001, 2000, "def456", 5);
m.recompute_hashes();
let json = m.to_json().unwrap();
let loaded = Manifest::from_json(&json).unwrap();
assert_eq!(loaded.files.len(), 2);
assert_eq!(loaded.model_repo, "BAAI/bge-small-en-v1.5");
assert!(!loaded.root_hash.is_empty());
}
#[test]
fn root_hash_changes_on_file_change() {
let mut m1 = Manifest::new("model");
m1.add_file("a.rs", 1000, 100, "hash1", 5);
m1.recompute_hashes();
let h1 = m1.root_hash.clone();
let mut m2 = Manifest::new("model");
m2.add_file("a.rs", 1001, 100, "hash2", 5);
m2.recompute_hashes();
assert_ne!(h1, m2.root_hash);
}
#[test]
fn root_hash_stable_for_same_content() {
let mut m1 = Manifest::new("model");
m1.add_file("a.rs", 1000, 100, "hash1", 5);
m1.add_file("b.rs", 1001, 200, "hash2", 3);
m1.recompute_hashes();
let mut m2 = Manifest::new("model");
m2.add_file("b.rs", 1001, 200, "hash2", 3);
m2.add_file("a.rs", 1000, 100, "hash1", 5);
m2.recompute_hashes();
assert_eq!(m1.root_hash, m2.root_hash);
}
#[test]
fn directory_hashes_computed() {
let mut m = Manifest::new("model");
m.add_file("src/main.rs", 1000, 100, "hash1", 5);
m.add_file("src/lib.rs", 1001, 200, "hash2", 3);
m.add_file("tests/test.rs", 1002, 300, "hash3", 2);
m.recompute_hashes();
assert!(m.directories.contains_key("src"));
assert!(m.directories.contains_key("tests"));
}
#[test]
fn save_and_load() {
let dir = tempfile::TempDir::new().unwrap();
let path = dir.path().join("manifest.json");
let mut m = Manifest::new("test-model");
m.add_file("foo.rs", 100, 50, "aaa", 1);
m.recompute_hashes();
m.save(&path).unwrap();
let loaded = Manifest::load(&path).unwrap();
assert_eq!(loaded.files.len(), 1);
assert_eq!(loaded.root_hash, m.root_hash);
}
#[test]
fn is_compatible() {
let m = Manifest::new("BAAI/bge-small-en-v1.5");
assert!(m.is_compatible("BAAI/bge-small-en-v1.5"));
assert!(!m.is_compatible("nomic-ai/CodeRankEmbed"));
}
#[test]
fn referenced_hashes() {
let mut m = Manifest::new("model");
m.add_file("a.rs", 1, 1, "hash_a", 1);
m.add_file("b.rs", 2, 2, "hash_b", 2);
let hashes = m.referenced_hashes();
assert_eq!(hashes.len(), 2);
assert!(hashes.contains(&"hash_a".to_string()));
assert!(hashes.contains(&"hash_b".to_string()));
}
}