use std::fs;
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use super::diff::ContentSnapshot;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SnapshotMeta {
pub timestamp: u64,
pub content_hash: u64,
pub path: PathBuf,
}
#[derive(Debug, Clone)]
pub struct SnapshotStore {
root: PathBuf,
max_per_url: usize,
}
impl SnapshotStore {
pub const MAX_SNAPSHOTS_DEFAULT: usize = 20;
pub fn new() -> Self {
let root = default_root();
Self {
root,
max_per_url: Self::MAX_SNAPSHOTS_DEFAULT,
}
}
pub fn with_root(root: impl Into<PathBuf>) -> Self {
Self {
root: root.into(),
max_per_url: Self::MAX_SNAPSHOTS_DEFAULT,
}
}
#[must_use]
pub fn with_max_per_url(mut self, n: usize) -> Self {
self.max_per_url = n;
self
}
pub fn save_snapshot(&self, url: &str, snapshot: &ContentSnapshot) -> Result<()> {
let dir = self.url_dir(url);
fs::create_dir_all(&dir)
.with_context(|| format!("create snapshot dir {}", dir.display()))?;
let path = dir.join(format!("{}.json", snapshot.timestamp));
atomic_write(&path, &serde_json::to_vec_pretty(snapshot)?)
.with_context(|| format!("write snapshot {}", path.display()))?;
self.prune_old(&dir);
Ok(())
}
pub fn load_latest_snapshot(&self, url: &str) -> Option<ContentSnapshot> {
let mut metas = self.list_snapshots(url);
metas.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
metas.first().and_then(|m| Self::load_from_path(&m.path))
}
pub fn load_snapshot_at(&self, url: &str, timestamp: u64) -> Option<ContentSnapshot> {
let metas = self.list_snapshots(url);
metas
.into_iter()
.min_by_key(|m| m.timestamp.abs_diff(timestamp))
.and_then(|m| Self::load_from_path(&m.path))
}
pub fn list_snapshots(&self, url: &str) -> Vec<SnapshotMeta> {
let dir = self.url_dir(url);
read_metas(&dir)
}
fn url_dir(&self, url: &str) -> PathBuf {
let hash = url_hash(url);
self.root.join(hash)
}
fn load_from_path(path: &Path) -> Option<ContentSnapshot> {
let bytes = fs::read(path).ok()?;
serde_json::from_slice(&bytes).ok()
}
fn prune_old(&self, dir: &Path) {
let mut metas = read_metas(dir);
metas.sort_by(|a, b| a.timestamp.cmp(&b.timestamp)); let excess = metas.len().saturating_sub(self.max_per_url);
for meta in metas.iter().take(excess) {
let _ = fs::remove_file(&meta.path); }
}
}
impl Default for SnapshotStore {
fn default() -> Self {
Self::new()
}
}
fn default_root() -> PathBuf {
dirs::home_dir()
.unwrap_or_else(|| PathBuf::from("."))
.join(".nab")
.join("snapshots")
}
fn url_hash(url: &str) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut h = DefaultHasher::new();
url.hash(&mut h);
format!("{:016x}", h.finish())
}
fn read_metas(dir: &Path) -> Vec<SnapshotMeta> {
let Ok(entries) = fs::read_dir(dir) else {
return vec![];
};
entries
.flatten()
.filter(|e| e.path().extension().is_some_and(|x| x == "json"))
.filter_map(|e| meta_from_entry(&e.path()))
.collect()
}
fn meta_from_entry(path: &Path) -> Option<SnapshotMeta> {
let stem = path.file_stem()?.to_str()?;
let timestamp: u64 = stem.parse().ok()?;
let bytes = fs::read(path).ok()?;
let snap: ContentSnapshot = serde_json::from_slice(&bytes).ok()?;
Some(SnapshotMeta {
timestamp,
content_hash: snap.content_hash,
path: path.to_owned(),
})
}
fn atomic_write(path: &Path, data: &[u8]) -> Result<()> {
let tmp = path.with_extension("tmp");
fs::write(&tmp, data)?;
fs::rename(&tmp, path)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::SystemTime;
use tempfile::TempDir;
fn tmp_store() -> (TempDir, SnapshotStore) {
let dir = tempfile::tempdir().expect("tmp dir");
let store = SnapshotStore::with_root(dir.path());
(dir, store)
}
fn make_snap(url: &str, text: &str, ts_secs: u64) -> ContentSnapshot {
let ts = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(ts_secs);
ContentSnapshot::new(url, text, ts)
}
#[test]
fn save_and_load_latest_roundtrips_content() {
let (_dir, store) = tmp_store();
let snap = make_snap("https://example.com", "Hello world.", 1_000);
store.save_snapshot("https://example.com", &snap).unwrap();
let loaded = store.load_latest_snapshot("https://example.com").unwrap();
assert_eq!(loaded.text, snap.text);
assert_eq!(loaded.url, snap.url);
}
#[test]
fn load_latest_returns_most_recent_when_multiple() {
let (_dir, store) = tmp_store();
for ts in [100u64, 200, 300] {
let s = make_snap("https://example.com", &format!("Text at {ts}"), ts);
store.save_snapshot("https://example.com", &s).unwrap();
}
let loaded = store.load_latest_snapshot("https://example.com").unwrap();
assert!(loaded.text.contains("300"), "got: {}", loaded.text);
}
#[test]
fn load_latest_returns_none_for_unknown_url() {
let (_dir, store) = tmp_store();
let result = store.load_latest_snapshot("https://never-saved.com");
assert!(result.is_none());
}
#[test]
fn load_snapshot_at_returns_closest_match() {
let (_dir, store) = tmp_store();
for ts in [100u64, 300] {
let s = make_snap("https://example.com", &format!("ts={ts}"), ts);
store.save_snapshot("https://example.com", &s).unwrap();
}
let loaded = store.load_snapshot_at("https://example.com", 250).unwrap();
assert!(loaded.text.contains("300"), "got: {}", loaded.text);
}
#[test]
fn load_snapshot_at_exact_timestamp_returns_exact() {
let (_dir, store) = tmp_store();
let s = make_snap("https://example.com", "exact ts", 500);
store.save_snapshot("https://example.com", &s).unwrap();
let loaded = store.load_snapshot_at("https://example.com", 500).unwrap();
assert_eq!(loaded.text, "exact ts");
}
#[test]
fn list_snapshots_returns_all_saved() {
let (_dir, store) = tmp_store();
for ts in [10u64, 20, 30] {
let s = make_snap("https://x.com", "text", ts);
store.save_snapshot("https://x.com", &s).unwrap();
}
let metas = store.list_snapshots("https://x.com");
assert_eq!(metas.len(), 3);
}
#[test]
fn list_snapshots_returns_empty_for_unknown_url() {
let (_dir, store) = tmp_store();
let metas = store.list_snapshots("https://nope.com");
assert!(metas.is_empty());
}
#[test]
fn prune_keeps_at_most_max_per_url() {
let (_dir, store) = tmp_store();
let store = store.with_max_per_url(3);
for ts in 1u64..=5 {
let s = make_snap("https://prune.com", &format!("text {ts}"), ts);
store.save_snapshot("https://prune.com", &s).unwrap();
}
let metas = store.list_snapshots("https://prune.com");
assert!(metas.len() <= 3, "expected <=3, got {}", metas.len());
}
#[test]
fn prune_retains_newest_snapshots() {
let (_dir, store) = tmp_store();
let store = store.with_max_per_url(2);
for ts in 1u64..=4 {
let s = make_snap("https://prune.com", &format!("t{ts}"), ts);
store.save_snapshot("https://prune.com", &s).unwrap();
}
let latest = store.load_latest_snapshot("https://prune.com").unwrap();
assert_eq!(latest.text, "t4");
}
#[test]
fn different_urls_stored_separately() {
let (_dir, store) = tmp_store();
store
.save_snapshot("https://a.com", &make_snap("https://a.com", "A content", 1))
.unwrap();
store
.save_snapshot("https://b.com", &make_snap("https://b.com", "B content", 1))
.unwrap();
let a = store.load_latest_snapshot("https://a.com").unwrap();
let b = store.load_latest_snapshot("https://b.com").unwrap();
assert_eq!(a.text, "A content");
assert_eq!(b.text, "B content");
}
}