use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DirtyInfo {
pub timestamp: u64,
pub expected_files: Option<usize>,
}
impl DirtyInfo {
pub fn new() -> Self {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
Self {
timestamp,
expected_files: None,
}
}
pub fn with_expected_files(expected_files: usize) -> Self {
let mut info = Self::new();
info.expected_files = Some(expected_files);
info
}
pub fn is_stale(&self, max_age_secs: u64) -> bool {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
now.saturating_sub(self.timestamp) >= max_age_secs
}
pub fn age_secs(&self) -> u64 {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
now.saturating_sub(self.timestamp)
}
}
impl Default for DirtyInfo {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct HashCache {
pub roots: HashMap<String, HashMap<String, String>>,
#[serde(default)]
pub dirty_roots: HashMap<String, DirtyInfo>,
}
#[derive(Debug, Deserialize)]
struct LegacyHashCache {
roots: HashMap<String, HashMap<String, String>>,
#[serde(default)]
dirty_roots: std::collections::HashSet<String>,
}
impl HashCache {
pub fn load(cache_path: &Path) -> Result<Self> {
if !cache_path.exists() {
tracing::debug!("Cache file not found, starting with empty cache");
return Ok(Self::default());
}
let content = fs::read_to_string(cache_path).context("Failed to read cache file")?;
if let Ok(cache) = serde_json::from_str::<HashCache>(&content) {
tracing::info!("Loaded cache with {} indexed roots", cache.roots.len());
return Ok(cache);
}
if let Ok(legacy) = serde_json::from_str::<LegacyHashCache>(&content) {
tracing::info!(
"Migrating cache from legacy format ({} roots, {} dirty roots)",
legacy.roots.len(),
legacy.dirty_roots.len()
);
let dirty_roots: HashMap<String, DirtyInfo> = legacy
.dirty_roots
.into_iter()
.map(|root| (root, DirtyInfo::new()))
.collect();
let cache = HashCache {
roots: legacy.roots,
dirty_roots,
};
if let Err(e) = cache.save(cache_path) {
tracing::warn!("Failed to save migrated cache: {}", e);
} else {
tracing::info!("Successfully migrated cache to new format");
}
return Ok(cache);
}
anyhow::bail!("Failed to parse cache file as either new or legacy format")
}
pub fn save(&self, cache_path: &Path) -> Result<()> {
if let Some(parent) = cache_path.parent() {
fs::create_dir_all(parent).context("Failed to create cache directory")?;
}
let content = serde_json::to_string_pretty(self).context("Failed to serialize cache")?;
fs::write(cache_path, content).context("Failed to write cache file")?;
tracing::debug!("Saved cache to {:?}", cache_path);
Ok(())
}
pub fn get_root(&self, root: &str) -> Option<&HashMap<String, String>> {
self.roots.get(root)
}
pub fn update_root(&mut self, root: String, hashes: HashMap<String, String>) {
self.roots.insert(root, hashes);
}
pub fn remove_root(&mut self, root: &str) {
self.roots.remove(root);
self.dirty_roots.remove(root);
}
pub fn mark_dirty(&mut self, root: &str) {
self.dirty_roots.insert(root.to_string(), DirtyInfo::new());
}
pub fn mark_dirty_with_info(&mut self, root: &str, expected_files: usize) {
self.dirty_roots.insert(
root.to_string(),
DirtyInfo::with_expected_files(expected_files),
);
}
pub fn clear_dirty(&mut self, root: &str) {
self.dirty_roots.remove(root);
}
pub fn is_dirty(&self, root: &str) -> bool {
self.dirty_roots.contains_key(root)
}
pub fn get_dirty_info(&self, root: &str) -> Option<&DirtyInfo> {
self.dirty_roots.get(root)
}
pub fn get_dirty_roots(&self) -> &HashMap<String, DirtyInfo> {
&self.dirty_roots
}
pub fn has_dirty_roots(&self) -> bool {
!self.dirty_roots.is_empty()
}
pub fn is_dirty_stale(&self, root: &str, max_age_secs: u64) -> bool {
self.dirty_roots
.get(root)
.is_some_and(|info| info.is_stale(max_age_secs))
}
pub fn dirty_age_secs(&self, root: &str) -> Option<u64> {
self.dirty_roots.get(root).map(|info| info.age_secs())
}
pub fn clear_stale_dirty_flags(&mut self, max_age_secs: u64) -> usize {
let stale_roots: Vec<String> = self
.dirty_roots
.iter()
.filter(|(_, info)| info.is_stale(max_age_secs))
.map(|(root, _)| root.clone())
.collect();
let count = stale_roots.len();
for root in stale_roots {
tracing::info!(
"Clearing stale dirty flag for '{}' (age: {} seconds)",
root,
self.dirty_roots
.get(&root)
.map(|i| i.age_secs())
.unwrap_or(0)
);
self.dirty_roots.remove(&root);
}
count
}
pub fn default_path() -> PathBuf {
brainwires_storage::paths::PlatformPaths::default_hash_cache_path()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::NamedTempFile;
#[test]
fn test_cache_serialization() {
let mut cache = HashCache::default();
let mut hashes = HashMap::new();
hashes.insert("file1.rs".to_string(), "hash1".to_string());
hashes.insert("file2.rs".to_string(), "hash2".to_string());
cache.update_root("/test/path".to_string(), hashes);
let json = serde_json::to_string(&cache).unwrap();
let deserialized: HashCache = serde_json::from_str(&json).unwrap();
assert_eq!(cache.roots.len(), deserialized.roots.len());
assert_eq!(
cache.roots.get("/test/path"),
deserialized.roots.get("/test/path")
);
}
#[test]
fn test_cache_save_load() {
let temp_file = NamedTempFile::new().unwrap();
let cache_path = temp_file.path().to_path_buf();
let mut cache = HashCache::default();
let mut hashes = HashMap::new();
hashes.insert("file1.rs".to_string(), "hash1".to_string());
cache.update_root("/test/path".to_string(), hashes);
cache.save(&cache_path).unwrap();
let loaded = HashCache::load(&cache_path).unwrap();
assert_eq!(cache.roots.len(), loaded.roots.len());
assert_eq!(
cache.roots.get("/test/path"),
loaded.roots.get("/test/path")
);
}
#[test]
fn test_cache_operations() {
let mut cache = HashCache::default();
let mut hashes = HashMap::new();
hashes.insert("file1.rs".to_string(), "hash1".to_string());
cache.update_root("/test/path".to_string(), hashes);
assert!(cache.get_root("/test/path").is_some());
assert!(cache.get_root("/nonexistent").is_none());
cache.remove_root("/test/path");
assert!(cache.get_root("/test/path").is_none());
}
#[test]
fn test_load_nonexistent_cache() {
let result = HashCache::load(Path::new("/nonexistent/path/cache.json"));
assert!(result.is_ok());
assert_eq!(result.unwrap().roots.len(), 0);
}
#[test]
fn test_load_corrupted_cache() {
let temp_file = NamedTempFile::new().unwrap();
let cache_path = temp_file.path().to_path_buf();
fs::write(&cache_path, "{ invalid json }").unwrap();
let result = HashCache::load(&cache_path);
assert!(result.is_err());
}
#[test]
fn test_save_creates_parent_directory() {
let temp_dir = tempfile::tempdir().unwrap();
let cache_path = temp_dir.path().join("subdir").join("cache.json");
let cache = HashCache::default();
cache.save(&cache_path).unwrap();
assert!(cache_path.exists());
}
#[test]
fn test_default_path() {
let path = HashCache::default_path();
assert!(path.to_string_lossy().contains("brainwires-rag"));
assert!(path.to_string_lossy().contains("hash_cache.json"));
}
#[test]
fn test_update_root_replaces_existing() {
let mut cache = HashCache::default();
let mut hashes1 = HashMap::new();
hashes1.insert("file1.rs".to_string(), "hash1".to_string());
cache.update_root("/test/path".to_string(), hashes1);
let mut hashes2 = HashMap::new();
hashes2.insert("file2.rs".to_string(), "hash2".to_string());
cache.update_root("/test/path".to_string(), hashes2);
let root_hashes = cache.get_root("/test/path").unwrap();
assert_eq!(root_hashes.len(), 1);
assert!(root_hashes.contains_key("file2.rs"));
assert!(!root_hashes.contains_key("file1.rs"));
}
#[test]
fn test_multiple_roots() {
let mut cache = HashCache::default();
let mut hashes1 = HashMap::new();
hashes1.insert("file1.rs".to_string(), "hash1".to_string());
cache.update_root("/path1".to_string(), hashes1);
let mut hashes2 = HashMap::new();
hashes2.insert("file2.rs".to_string(), "hash2".to_string());
cache.update_root("/path2".to_string(), hashes2);
assert_eq!(cache.roots.len(), 2);
assert!(cache.get_root("/path1").is_some());
assert!(cache.get_root("/path2").is_some());
}
#[test]
fn test_empty_cache_operations() {
let cache = HashCache::default();
assert!(cache.get_root("/any/path").is_none());
assert_eq!(cache.roots.len(), 0);
}
#[test]
fn test_remove_root_nonexistent() {
let mut cache = HashCache::default();
cache.remove_root("/nonexistent");
assert_eq!(cache.roots.len(), 0);
}
#[test]
fn test_dirty_flag_operations() {
let mut cache = HashCache::default();
assert!(!cache.is_dirty("/test/path"));
assert!(!cache.has_dirty_roots());
assert!(cache.get_dirty_roots().is_empty());
cache.mark_dirty("/test/path");
assert!(cache.is_dirty("/test/path"));
assert!(cache.has_dirty_roots());
assert!(cache.get_dirty_roots().contains_key("/test/path"));
let info = cache.get_dirty_info("/test/path").unwrap();
assert!(info.timestamp > 0);
assert!(info.expected_files.is_none());
cache.clear_dirty("/test/path");
assert!(!cache.is_dirty("/test/path"));
assert!(!cache.has_dirty_roots());
}
#[test]
fn test_dirty_flag_with_expected_files() {
let mut cache = HashCache::default();
cache.mark_dirty_with_info("/test/path", 100);
assert!(cache.is_dirty("/test/path"));
let info = cache.get_dirty_info("/test/path").unwrap();
assert_eq!(info.expected_files, Some(100));
}
#[test]
fn test_dirty_flag_staleness() {
let mut cache = HashCache::default();
cache.mark_dirty("/test/path");
assert!(!cache.is_dirty_stale("/test/path", 3600));
let age = cache.dirty_age_secs("/test/path").unwrap();
assert!(age < 5);
assert!(cache.is_dirty_stale("/test/path", 0));
}
#[test]
fn test_clear_stale_dirty_flags() {
let mut cache = HashCache::default();
cache.mark_dirty("/path1");
cache.mark_dirty("/path2");
let cleared = cache.clear_stale_dirty_flags(0);
assert_eq!(cleared, 2);
assert!(!cache.has_dirty_roots());
}
#[test]
fn test_dirty_flag_persistence() {
let temp_file = NamedTempFile::new().unwrap();
let cache_path = temp_file.path().to_path_buf();
let mut cache = HashCache::default();
cache.mark_dirty("/test/path");
cache.save(&cache_path).unwrap();
let loaded = HashCache::load(&cache_path).unwrap();
assert!(loaded.is_dirty("/test/path"));
assert!(loaded.has_dirty_roots());
}
#[test]
fn test_remove_root_clears_dirty() {
let mut cache = HashCache::default();
let mut hashes = HashMap::new();
hashes.insert("file1.rs".to_string(), "hash1".to_string());
cache.update_root("/test/path".to_string(), hashes);
cache.mark_dirty("/test/path");
assert!(cache.is_dirty("/test/path"));
assert!(cache.get_root("/test/path").is_some());
cache.remove_root("/test/path");
assert!(!cache.is_dirty("/test/path"));
assert!(cache.get_root("/test/path").is_none());
}
#[test]
fn test_multiple_dirty_roots() {
let mut cache = HashCache::default();
cache.mark_dirty("/path1");
cache.mark_dirty("/path2");
cache.mark_dirty("/path3");
assert!(cache.is_dirty("/path1"));
assert!(cache.is_dirty("/path2"));
assert!(cache.is_dirty("/path3"));
assert_eq!(cache.get_dirty_roots().len(), 3);
cache.clear_dirty("/path2");
assert!(cache.is_dirty("/path1"));
assert!(!cache.is_dirty("/path2"));
assert!(cache.is_dirty("/path3"));
assert_eq!(cache.get_dirty_roots().len(), 2);
}
#[test]
fn test_dirty_flag_idempotent() {
let mut cache = HashCache::default();
cache.mark_dirty("/test/path");
cache.mark_dirty("/test/path");
cache.mark_dirty("/test/path");
assert_eq!(cache.get_dirty_roots().len(), 1);
cache.clear_dirty("/test/path");
cache.clear_dirty("/test/path");
assert!(!cache.is_dirty("/test/path"));
}
#[test]
fn test_dirty_flag_with_old_cache_format() {
let temp_file = NamedTempFile::new().unwrap();
let cache_path = temp_file.path().to_path_buf();
let old_format = r#"{"roots":{"/test/path":{"file1.rs":"hash1"}}}"#;
fs::write(&cache_path, old_format).unwrap();
let loaded = HashCache::load(&cache_path).unwrap();
assert!(loaded.get_root("/test/path").is_some());
assert!(!loaded.has_dirty_roots());
assert!(!loaded.is_dirty("/test/path"));
}
#[test]
fn test_dirty_flag_migration_from_hashset() {
let temp_file = NamedTempFile::new().unwrap();
let cache_path = temp_file.path().to_path_buf();
let old_format =
r#"{"roots":{"/test/path":{"file1.rs":"hash1"}},"dirty_roots":["/test/path"]}"#;
fs::write(&cache_path, old_format).unwrap();
let loaded = HashCache::load(&cache_path).unwrap();
assert!(loaded.get_root("/test/path").is_some());
assert!(loaded.is_dirty("/test/path"));
assert!(loaded.has_dirty_roots());
let info = loaded.get_dirty_info("/test/path").unwrap();
assert!(info.timestamp > 0);
let reloaded = HashCache::load(&cache_path).unwrap();
assert!(reloaded.is_dirty("/test/path"));
}
#[test]
fn test_dirty_info_default() {
let info = DirtyInfo::default();
assert!(info.timestamp > 0);
assert!(info.expected_files.is_none());
}
#[test]
fn test_dirty_info_with_expected_files() {
let info = DirtyInfo::with_expected_files(50);
assert!(info.timestamp > 0);
assert_eq!(info.expected_files, Some(50));
}
}