use super::metadata::{Confidence, FileMetadata, MetadataCache};
use crate::core::NormalizedPath;
use serde::{Deserialize, Serialize};
use std::io::Write;
use std::path::Path;
use std::time::{Instant, SystemTime};
pub const FORMAT_VERSION: u32 = 1;
#[derive(Debug, Clone, Serialize, Deserialize)]
struct PersistedEntry {
mtime: SystemTime,
size: u64,
content_hash: [u8; 32],
}
#[derive(Debug, Serialize, Deserialize)]
struct PersistedMetadata {
version: u32,
entries: Vec<(NormalizedPath, PersistedEntry)>,
}
impl MetadataCache {
pub fn save_to_disk(&self, path: &Path) -> std::io::Result<()> {
let entries: Vec<(NormalizedPath, PersistedEntry)> = self
.iter_for_persist()
.into_iter()
.filter_map(|(key, value)| {
value.content_hash.map(|content_hash| {
(
key,
PersistedEntry {
mtime: value.mtime,
size: value.size,
content_hash,
},
)
})
})
.collect();
if entries.is_empty() {
tracing::debug!(
path = %path.display(),
"metadata cache flush: 0 persistable entries, skipping write"
);
return Ok(());
}
let entry_count = entries.len();
let snapshot = PersistedMetadata {
version: FORMAT_VERSION,
entries,
};
let bytes = bincode::serialize(&snapshot)
.map_err(|e| std::io::Error::other(format!("bincode serialize: {e}")))?;
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
let name = path
.file_name()
.map(|n| n.to_string_lossy().into_owned())
.unwrap_or_else(|| "metadata.bin".into());
let tmp = path.with_file_name(format!(".{name}.tmp-{}", std::process::id()));
let result = write_atomic_durable(&tmp, path, &bytes);
if result.is_err() {
let _ = std::fs::remove_file(&tmp);
}
if result.is_ok() {
tracing::info!(
path = %path.display(),
entries = entry_count,
bytes = bytes.len(),
"metadata cache flushed to disk"
);
}
result
}
pub fn load_from_disk(path: &Path) -> std::io::Result<Self> {
let bytes = match std::fs::read(path) {
Ok(bytes) => bytes,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
tracing::info!(
path = %path.display(),
"metadata cache file not found, starting empty"
);
return Ok(Self::new());
}
Err(e) => return Err(e),
};
let snapshot: PersistedMetadata = bincode::deserialize(&bytes)
.map_err(|e| std::io::Error::other(format!("bincode deserialize: {e}")))?;
if snapshot.version != FORMAT_VERSION {
return Err(std::io::Error::other(format!(
"metadata snapshot version mismatch: file={} expected={}",
snapshot.version, FORMAT_VERSION
)));
}
let cache = Self::new();
let now = Instant::now();
let entry_count = snapshot.entries.len();
for (key, entry) in snapshot.entries {
cache.insert(
key,
FileMetadata {
mtime: entry.mtime,
size: entry.size,
confidence: Confidence::Medium,
last_verified: now,
content_hash: Some(entry.content_hash),
},
);
}
tracing::info!(
path = %path.display(),
loaded = entry_count,
"metadata cache loaded from disk"
);
Ok(cache)
}
}
fn write_atomic_durable(tmp: &Path, target: &Path, bytes: &[u8]) -> std::io::Result<()> {
{
let mut f = std::fs::File::create(tmp)?;
f.write_all(bytes)?;
f.sync_all()?;
}
std::fs::rename(tmp, target)?;
if let Some(parent) = target.parent() {
if let Ok(dir) = std::fs::File::open(parent) {
let _ = dir.sync_all();
}
}
Ok(())
}
trait PersistIter {
fn iter_for_persist(&self) -> Vec<(NormalizedPath, FileMetadata)>;
}
impl PersistIter for MetadataCache {
fn iter_for_persist(&self) -> Vec<(NormalizedPath, FileMetadata)> {
self.paths()
.into_iter()
.filter_map(|p| self.get(&p).map(|m| (p, m)))
.collect()
}
}
#[cfg(test)]
mod tests {
use super::super::metadata::Confidence;
use super::*;
use std::fs;
use tempfile::TempDir;
fn populated_cache() -> MetadataCache {
let cache = MetadataCache::new();
for i in 0..5 {
cache.insert(
NormalizedPath::from(format!("/tmp/persist{i}.c")),
FileMetadata {
mtime: SystemTime::UNIX_EPOCH
+ std::time::Duration::from_secs(1_000 + i as u64),
size: 100 + i as u64,
confidence: Confidence::High,
last_verified: Instant::now(),
content_hash: Some([i as u8; 32]),
},
);
}
cache
}
#[test]
fn save_then_load_roundtrip_preserves_entries() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("metadata.bin");
let cache = populated_cache();
cache.save_to_disk(&path).unwrap();
assert!(path.exists());
let loaded = MetadataCache::load_from_disk(&path).unwrap();
assert_eq!(loaded.len(), 5);
for i in 0..5 {
let key = NormalizedPath::from(format!("/tmp/persist{i}.c"));
let entry = loaded.get(&key).unwrap();
assert_eq!(entry.size, 100 + i as u64);
assert_eq!(entry.content_hash, Some([i as u8; 32]));
assert_eq!(entry.confidence, Confidence::Medium);
}
}
#[test]
fn load_missing_file_returns_empty_cache() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("does-not-exist.bin");
let cache = MetadataCache::load_from_disk(&path).unwrap();
assert!(cache.is_empty());
}
#[test]
fn save_empty_cache_does_not_create_file() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("metadata.bin");
let cache = MetadataCache::new();
cache.save_to_disk(&path).unwrap();
assert!(
!path.exists(),
"empty save must skip the write to avoid littering"
);
}
#[test]
fn entries_without_content_hash_are_skipped() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("metadata.bin");
let cache = MetadataCache::new();
cache.insert(
NormalizedPath::from("/tmp/hashed.c"),
FileMetadata {
mtime: SystemTime::UNIX_EPOCH,
size: 1,
confidence: Confidence::High,
last_verified: Instant::now(),
content_hash: Some([7u8; 32]),
},
);
cache.insert(
NormalizedPath::from("/tmp/nohash.c"),
FileMetadata {
mtime: SystemTime::UNIX_EPOCH,
size: 2,
confidence: Confidence::High,
last_verified: Instant::now(),
content_hash: None,
},
);
cache.save_to_disk(&path).unwrap();
let loaded = MetadataCache::load_from_disk(&path).unwrap();
assert_eq!(loaded.len(), 1);
assert!(loaded.get(&NormalizedPath::from("/tmp/hashed.c")).is_some());
assert!(loaded.get(&NormalizedPath::from("/tmp/nohash.c")).is_none());
}
#[test]
fn load_corrupt_file_returns_err() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("metadata.bin");
fs::write(&path, b"this is not bincode").unwrap();
let result = MetadataCache::load_from_disk(&path);
assert!(result.is_err());
}
#[test]
fn load_wrong_version_returns_err() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("metadata.bin");
let bad = PersistedMetadata {
version: FORMAT_VERSION.wrapping_add(999),
entries: vec![],
};
let bytes = bincode::serialize(&bad).unwrap();
fs::write(&path, &bytes).unwrap();
let result = MetadataCache::load_from_disk(&path);
assert!(result.is_err(), "wrong version must surface as an error");
}
#[test]
fn atomic_save_does_not_leave_tmp_file_behind() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("metadata.bin");
populated_cache().save_to_disk(&path).unwrap();
let entries: Vec<_> = fs::read_dir(dir.path())
.unwrap()
.filter_map(|e| e.ok())
.map(|e| e.file_name().to_string_lossy().into_owned())
.collect();
assert!(entries.iter().any(|n| n == "metadata.bin"));
assert!(
!entries.iter().any(|n| n.contains(".tmp-")),
"leftover tmp file: {entries:?}"
);
}
}