Skip to main content

adler_server/
persist.rs

1//! On-disk persistence for finished scans.
2//!
3//! Each scan is serialised as a single JSON file under [`default_dir`]
4//! (`$XDG_CACHE_HOME/adler/scans/`, falling back to
5//! `$HOME/.cache/adler/scans/`). The on-disk format is the full
6//! [`PersistedScan`] — enough for the history listing AND for replaying
7//! the scan into the UI without a fresh probe.
8//!
9//! Writes are atomic: serialise to `<id>.json.tmp`, then rename onto
10//! the final path. A crashed process leaves at most one orphan `.tmp`
11//! file behind, never a half-written `<id>.json`.
12
13use std::path::{Path, PathBuf};
14
15use adler_core::CheckOutcome;
16use serde::{Deserialize, Serialize};
17use tokio::fs;
18
19use crate::error::{Error, Result};
20use crate::scan::{FinishedScan, ScanId, Summary};
21
22/// Hard cap on how many scans we keep on disk. Beyond this, oldest
23/// (by `created_at_ms`) get [`prune`]d on the next save. Picked to be
24/// large enough for any plausible human-driven OSINT session.
25pub(crate) const MAX_PERSISTED_SCANS: usize = 200;
26
27/// Self-contained snapshot of a completed scan. Round-trips losslessly
28/// through JSON; tests assert that.
29#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct PersistedScan {
31    /// Stable identifier — same value as in-memory [`ScanId`].
32    pub scan_id: ScanId,
33    /// Username that was scanned.
34    pub username: String,
35    /// Total number of sites probed in this scan.
36    pub site_count: usize,
37    /// Unix epoch milliseconds when the scan was started.
38    pub created_at_ms: u64,
39    /// Per-verdict tally over [`Self::outcomes`].
40    pub summary: Summary,
41    /// All outcomes, in completion order.
42    pub outcomes: Vec<CheckOutcome>,
43    /// Wall-clock duration, milliseconds.
44    pub elapsed_ms: u64,
45}
46
47impl PersistedScan {
48    /// Build a snapshot from a freshly-completed in-memory scan.
49    #[must_use]
50    pub fn from_finished(
51        scan_id: ScanId,
52        username: String,
53        site_count: usize,
54        created_at_ms: u64,
55        finished: FinishedScan,
56    ) -> Self {
57        Self {
58            scan_id,
59            username,
60            site_count,
61            created_at_ms,
62            summary: finished.summary,
63            outcomes: finished.outcomes,
64            elapsed_ms: finished.elapsed_ms,
65        }
66    }
67}
68
69/// Default directory for persisted scans.
70///
71/// Mirrors [`adler_core::Cache::default_path`]'s discovery rules:
72/// `$XDG_CACHE_HOME/adler/scans/` → `$HOME/.cache/adler/scans/` →
73/// a relative fallback. The directory is created lazily by [`save`].
74#[must_use]
75pub fn default_dir() -> PathBuf {
76    if let Some(xdg) = std::env::var_os("XDG_CACHE_HOME") {
77        return PathBuf::from(xdg).join("adler").join("scans");
78    }
79    if let Some(home) = std::env::var_os("HOME") {
80        return PathBuf::from(home)
81            .join(".cache")
82            .join("adler")
83            .join("scans");
84    }
85    PathBuf::from("adler-scans")
86}
87
88/// Save `scan` to `<dir>/<id>.json` atomically. Creates `dir` if missing.
89pub(crate) async fn save(dir: &Path, scan: &PersistedScan) -> Result<()> {
90    fs::create_dir_all(dir).await.map_err(Error::Persist)?;
91    let path = dir.join(format!("{}.json", scan.scan_id));
92    let tmp = dir.join(format!("{}.json.tmp", scan.scan_id));
93    let body = serde_json::to_vec_pretty(scan).map_err(Error::PersistEncode)?;
94    fs::write(&tmp, &body).await.map_err(Error::Persist)?;
95    fs::rename(&tmp, &path).await.map_err(Error::Persist)?;
96    Ok(())
97}
98
99/// Read one scan from disk by id. Returns `None` on any I/O or parse
100/// error — callers should treat a missing scan as not-found rather
101/// than propagate the underlying cause.
102pub(crate) async fn load(dir: &Path, scan_id: &ScanId) -> Option<PersistedScan> {
103    let path = dir.join(format!("{scan_id}.json"));
104    let bytes = fs::read(&path).await.ok()?;
105    serde_json::from_slice(&bytes).ok()
106}
107
108/// Enumerate every persisted scan, newest first. Files that fail to
109/// parse are silently skipped — a corrupted file shouldn't break the
110/// whole listing.
111pub(crate) async fn load_all(dir: &Path) -> Vec<PersistedScan> {
112    let Ok(mut entries) = fs::read_dir(dir).await else {
113        return Vec::new();
114    };
115    let mut out = Vec::new();
116    while let Ok(Some(entry)) = entries.next_entry().await {
117        let path = entry.path();
118        if path.extension().and_then(|s| s.to_str()) != Some("json") {
119            continue;
120        }
121        let Ok(bytes) = fs::read(&path).await else {
122            continue;
123        };
124        let Ok(scan) = serde_json::from_slice::<PersistedScan>(&bytes) else {
125            continue;
126        };
127        out.push(scan);
128    }
129    out.sort_by_key(|s| std::cmp::Reverse(s.created_at_ms));
130    out
131}
132
133/// Delete scans beyond `keep_newest`. Newest-by-`created_at_ms` wins.
134/// Returns the number of files actually removed.
135pub(crate) async fn prune(dir: &Path, keep_newest: usize) -> usize {
136    let scans = load_all(dir).await;
137    if scans.len() <= keep_newest {
138        return 0;
139    }
140    let mut removed = 0;
141    for s in &scans[keep_newest..] {
142        let path = dir.join(format!("{}.json", s.scan_id));
143        if fs::remove_file(&path).await.is_ok() {
144            removed += 1;
145        }
146    }
147    removed
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153    use adler_core::MatchKind;
154    use std::collections::BTreeMap;
155    use tempfile::TempDir;
156
157    fn sample(scan_id: &str, ts: u64) -> PersistedScan {
158        PersistedScan {
159            scan_id: ScanId::from(scan_id.to_owned()),
160            username: "alice".into(),
161            site_count: 2,
162            created_at_ms: ts,
163            summary: Summary {
164                found: 1,
165                not_found: 1,
166                uncertain: 0,
167            },
168            outcomes: vec![
169                CheckOutcome {
170                    site: "GitHub".into(),
171                    url: "https://github.com/alice".into(),
172                    kind: MatchKind::Found,
173                    reason: None,
174                    elapsed_ms: 120,
175                    enrichment: BTreeMap::new(),
176                    evidence: vec!["HTTP 200 (status_found)".into()],
177                },
178                CheckOutcome {
179                    site: "GitLab".into(),
180                    url: "https://gitlab.com/alice".into(),
181                    kind: MatchKind::NotFound,
182                    reason: None,
183                    elapsed_ms: 90,
184                    enrichment: BTreeMap::new(),
185                    evidence: vec!["HTTP 404 (status_not_found)".into()],
186                },
187            ],
188            elapsed_ms: 210,
189        }
190    }
191
192    #[tokio::test]
193    async fn save_then_load_roundtrips() {
194        let tmp = TempDir::new().unwrap();
195        let s = sample("abc123", 1_700_000_000_000);
196        save(tmp.path(), &s).await.unwrap();
197
198        let loaded = load(tmp.path(), &s.scan_id).await.expect("loaded");
199        assert_eq!(loaded.scan_id, s.scan_id);
200        assert_eq!(loaded.username, "alice");
201        assert_eq!(loaded.outcomes.len(), 2);
202        assert_eq!(loaded.outcomes[0].site, "GitHub");
203        assert_eq!(loaded.summary.found, 1);
204    }
205
206    #[tokio::test]
207    async fn load_all_returns_newest_first() {
208        let tmp = TempDir::new().unwrap();
209        save(tmp.path(), &sample("old", 1_000)).await.unwrap();
210        save(tmp.path(), &sample("mid", 2_000)).await.unwrap();
211        save(tmp.path(), &sample("new", 3_000)).await.unwrap();
212        let all = load_all(tmp.path()).await;
213        assert_eq!(all.len(), 3);
214        assert_eq!(all[0].scan_id.as_str(), "new");
215        assert_eq!(all[1].scan_id.as_str(), "mid");
216        assert_eq!(all[2].scan_id.as_str(), "old");
217    }
218
219    #[tokio::test]
220    async fn load_returns_none_for_missing() {
221        let tmp = TempDir::new().unwrap();
222        let missing = load(tmp.path(), &ScanId::from("nope".to_owned())).await;
223        assert!(missing.is_none());
224    }
225
226    #[tokio::test]
227    async fn load_all_skips_unrelated_files() {
228        let tmp = TempDir::new().unwrap();
229        // Drop a non-JSON file and a malformed JSON file alongside.
230        fs::write(tmp.path().join("README"), b"not json")
231            .await
232            .unwrap();
233        fs::write(tmp.path().join("broken.json"), b"{ invalid")
234            .await
235            .unwrap();
236        save(tmp.path(), &sample("good", 9_999)).await.unwrap();
237        let all = load_all(tmp.path()).await;
238        assert_eq!(all.len(), 1);
239        assert_eq!(all[0].scan_id.as_str(), "good");
240    }
241
242    #[tokio::test]
243    async fn prune_keeps_only_newest_n() {
244        let tmp = TempDir::new().unwrap();
245        for i in 0u64..5 {
246            save(tmp.path(), &sample(&format!("s{i}"), i * 1_000))
247                .await
248                .unwrap();
249        }
250        let removed = prune(tmp.path(), 2).await;
251        assert_eq!(removed, 3);
252        let remaining = load_all(tmp.path()).await;
253        assert_eq!(remaining.len(), 2);
254        assert_eq!(remaining[0].scan_id.as_str(), "s4");
255        assert_eq!(remaining[1].scan_id.as_str(), "s3");
256    }
257}