use anyhow::Result;
use std::collections::HashMap;
use std::path::Path;
pub const SOURCES_MANIFEST_FILENAME: &str = "documents.leann.sources.json";
#[derive(serde::Serialize, serde::Deserialize)]
struct Manifest {
sources: HashMap<String, f64>,
}
pub fn load_sources_manifest(index_dir: &Path) -> Result<HashMap<String, f64>> {
let path = index_dir.join(SOURCES_MANIFEST_FILENAME);
if !path.exists() {
return Ok(HashMap::new());
}
let data = std::fs::read_to_string(&path)?;
match serde_json::from_str::<Manifest>(&data) {
Ok(m) => Ok(m.sources),
Err(_) => Ok(HashMap::new()),
}
}
pub fn save_sources_manifest(index_dir: &Path, sources: &HashMap<String, f64>) -> Result<()> {
let path = index_dir.join(SOURCES_MANIFEST_FILENAME);
let manifest = Manifest {
sources: sources.clone(),
};
let json = serde_json::to_string_pretty(&manifest)?;
std::fs::write(&path, json)?;
Ok(())
}
pub fn collect_sources(documents: &[(String, String)]) -> HashMap<String, f64> {
let mut sources = HashMap::new();
for (path, _) in documents {
let mtime = std::fs::metadata(path)
.and_then(|m| m.modified())
.map(|t| {
t.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs_f64()
})
.unwrap_or(0.0);
let normalized = std::fs::canonicalize(path)
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|_| path.clone());
sources.insert(normalized, mtime);
}
sources
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_save_and_load_roundtrip() {
let dir = tempfile::tempdir().unwrap();
let mut sources = HashMap::new();
sources.insert("/tmp/a.txt".to_string(), 1700000000.123);
sources.insert("/tmp/b.md".to_string(), 1700000001.456);
save_sources_manifest(dir.path(), &sources).unwrap();
let loaded = load_sources_manifest(dir.path()).unwrap();
assert_eq!(loaded.len(), 2);
assert_eq!(loaded["/tmp/a.txt"], 1700000000.123);
assert_eq!(loaded["/tmp/b.md"], 1700000001.456);
}
#[test]
fn test_load_missing_returns_empty() {
let dir = tempfile::tempdir().unwrap();
let loaded = load_sources_manifest(dir.path()).unwrap();
assert!(loaded.is_empty());
}
#[test]
fn test_load_invalid_json_returns_empty() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join(SOURCES_MANIFEST_FILENAME);
std::fs::write(&path, "not valid json {{{").unwrap();
let loaded = load_sources_manifest(dir.path()).unwrap();
assert!(loaded.is_empty());
}
#[test]
fn test_collect_sources_from_real_files() {
let dir = tempfile::tempdir().unwrap();
let file_a = dir.path().join("a.txt");
let file_b = dir.path().join("b.txt");
std::fs::write(&file_a, "content a").unwrap();
std::fs::write(&file_b, "content b").unwrap();
let documents = vec![
(
file_a.to_string_lossy().to_string(),
"content a".to_string(),
),
(
file_b.to_string_lossy().to_string(),
"content b".to_string(),
),
];
let sources = collect_sources(&documents);
assert_eq!(sources.len(), 2);
let canonical_a = std::fs::canonicalize(&file_a)
.unwrap()
.to_string_lossy()
.to_string();
let canonical_b = std::fs::canonicalize(&file_b)
.unwrap()
.to_string_lossy()
.to_string();
assert!(sources.contains_key(&canonical_a), "Should contain a.txt");
assert!(sources.contains_key(&canonical_b), "Should contain b.txt");
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs_f64();
for mtime in sources.values() {
assert!(
*mtime > now - 60.0 && *mtime <= now + 1.0,
"mtime {mtime} should be recent (now={now})"
);
}
}
#[test]
fn test_collect_sources_nonexistent_file() {
let documents = vec![(
"/nonexistent/path/file.txt".to_string(),
"content".to_string(),
)];
let sources = collect_sources(&documents);
assert_eq!(sources.len(), 1);
assert!(sources.contains_key("/nonexistent/path/file.txt"));
assert_eq!(sources["/nonexistent/path/file.txt"], 0.0);
}
}