Skip to main content

weave_content/
cache.rs

1use std::collections::HashMap;
2use std::path::Path;
3use std::time::{SystemTime, UNIX_EPOCH};
4
5use serde::{Deserialize, Serialize};
6
7use crate::verifier::CheckStatus;
8
9/// Cache TTL for `ok` results: 7 days in seconds.
10const OK_TTL_SECS: u64 = 7 * 24 * 60 * 60;
11
12/// Maximum cache entries to prevent unbounded growth.
13const MAX_CACHE_ENTRIES: usize = 10_000;
14
15/// A single cached URL check result.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct CacheEntry {
18    pub status: String,
19    pub checked_at: u64,
20    #[serde(skip_serializing_if = "Option::is_none")]
21    pub http_status: Option<u16>,
22    #[serde(skip_serializing_if = "Option::is_none")]
23    pub detail: Option<String>,
24}
25
26/// Verify cache backed by a JSON file.
27#[derive(Debug)]
28pub struct VerifyCache {
29    path: String,
30    entries: HashMap<String, CacheEntry>,
31}
32
33impl VerifyCache {
34    /// Create an empty in-memory cache (no file backing).
35    pub fn empty() -> Self {
36        Self {
37            path: String::new(),
38            entries: HashMap::new(),
39        }
40    }
41
42    /// Load cache from file, or create empty if file doesn't exist or is invalid.
43    ///
44    /// # Errors
45    ///
46    /// Returns an error if the file exists but cannot be read (permissions, etc.).
47    /// Invalid JSON is treated as empty cache (not an error).
48    pub fn load(path: &str) -> Result<Self, String> {
49        let entries = if Path::new(path).exists() {
50            let content = std::fs::read_to_string(path)
51                .map_err(|e| format!("failed to read cache file {path}: {e}"))?;
52            serde_json::from_str(&content).unwrap_or_default()
53        } else {
54            HashMap::new()
55        };
56
57        Ok(Self {
58            path: path.to_string(),
59            entries,
60        })
61    }
62
63    /// Check if a URL has a valid (non-expired) cache entry.
64    /// Returns `None` if the URL should be re-checked.
65    pub fn get(&self, url: &str) -> Option<&CacheEntry> {
66        let entry = self.entries.get(url)?;
67        let now = now_secs();
68
69        // Always re-check error results
70        if entry.status == "error" {
71            return None;
72        }
73
74        // Check TTL for ok/warn results
75        if now.saturating_sub(entry.checked_at) > OK_TTL_SECS {
76            return None;
77        }
78
79        Some(entry)
80    }
81
82    /// Record a check result in the cache.
83    pub fn put(&mut self, url: &str, status: CheckStatus, detail: Option<&str>) {
84        // Enforce boundary
85        if self.entries.len() >= MAX_CACHE_ENTRIES && !self.entries.contains_key(url) {
86            return;
87        }
88
89        self.entries.insert(
90            url.to_string(),
91            CacheEntry {
92                status: status.to_string(),
93                checked_at: now_secs(),
94                http_status: extract_http_status(detail),
95                detail: detail.map(String::from),
96            },
97        );
98    }
99
100    /// Save cache to file.
101    ///
102    /// # Errors
103    ///
104    /// Returns an error if the file cannot be written.
105    pub fn save(&self) -> Result<(), String> {
106        // Prune expired entries before saving
107        let now = now_secs();
108        let pruned: HashMap<&String, &CacheEntry> = self
109            .entries
110            .iter()
111            .filter(|(_, e)| {
112                if e.status == "error" {
113                    return false;
114                }
115                now.saturating_sub(e.checked_at) <= OK_TTL_SECS
116            })
117            .collect();
118
119        let json = serde_json::to_string_pretty(&pruned)
120            .map_err(|e| format!("failed to serialize cache: {e}"))?;
121        std::fs::write(&self.path, json)
122            .map_err(|e| format!("failed to write cache file {}: {e}", self.path))
123    }
124}
125
126fn now_secs() -> u64 {
127    SystemTime::now()
128        .duration_since(UNIX_EPOCH)
129        .map_or(0, |d| d.as_secs())
130}
131
132/// Try to extract HTTP status code from detail string like "HTTP 404 Not Found".
133fn extract_http_status(detail: Option<&str>) -> Option<u16> {
134    let detail = detail?;
135    let rest = detail.strip_prefix("HTTP ")?;
136    let code_str: String = rest.chars().take_while(char::is_ascii_digit).collect();
137    code_str.parse().ok()
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    #[test]
145    fn cache_put_and_get_ok() {
146        let mut cache = VerifyCache {
147            path: String::new(),
148            entries: HashMap::new(),
149        };
150
151        cache.put("https://example.com", CheckStatus::Ok, None);
152        assert!(cache.get("https://example.com").is_some());
153    }
154
155    #[test]
156    fn cache_error_always_rechecked() {
157        let mut cache = VerifyCache {
158            path: String::new(),
159            entries: HashMap::new(),
160        };
161
162        cache.put("https://example.com", CheckStatus::Error, Some("HTTP 404"));
163        assert!(cache.get("https://example.com").is_none());
164    }
165
166    #[test]
167    fn cache_expired_entry_not_returned() {
168        let mut cache = VerifyCache {
169            path: String::new(),
170            entries: HashMap::new(),
171        };
172
173        cache.entries.insert(
174            "https://old.com".into(),
175            CacheEntry {
176                status: "ok".into(),
177                checked_at: 0, // epoch = very expired
178                http_status: None,
179                detail: None,
180            },
181        );
182
183        assert!(cache.get("https://old.com").is_none());
184    }
185
186    #[test]
187    fn cache_warn_within_ttl() {
188        let mut cache = VerifyCache {
189            path: String::new(),
190            entries: HashMap::new(),
191        };
192
193        cache.put("https://example.com", CheckStatus::Warn, Some("timeout"));
194        assert!(cache.get("https://example.com").is_some());
195    }
196
197    #[test]
198    fn cache_unknown_url_returns_none() {
199        let cache = VerifyCache {
200            path: String::new(),
201            entries: HashMap::new(),
202        };
203
204        assert!(cache.get("https://unknown.com").is_none());
205    }
206
207    #[test]
208    fn extract_http_status_parses() {
209        assert_eq!(extract_http_status(Some("HTTP 404 Not Found")), Some(404));
210        assert_eq!(extract_http_status(Some("HTTP 200")), Some(200));
211        assert_eq!(extract_http_status(Some("timeout")), None);
212        assert_eq!(extract_http_status(None), None);
213    }
214
215    #[test]
216    fn cache_boundary_enforced() {
217        let mut cache = VerifyCache {
218            path: String::new(),
219            entries: HashMap::new(),
220        };
221
222        for i in 0..MAX_CACHE_ENTRIES {
223            cache.put(&format!("https://example.com/{i}"), CheckStatus::Ok, None);
224        }
225
226        // One more should be rejected
227        cache.put("https://overflow.com", CheckStatus::Ok, None);
228        assert!(cache.get("https://overflow.com").is_none());
229        assert_eq!(cache.entries.len(), MAX_CACHE_ENTRIES);
230    }
231
232    #[test]
233    fn cache_update_existing_within_boundary() {
234        let mut cache = VerifyCache {
235            path: String::new(),
236            entries: HashMap::new(),
237        };
238
239        for i in 0..MAX_CACHE_ENTRIES {
240            cache.put(&format!("https://example.com/{i}"), CheckStatus::Ok, None);
241        }
242
243        // Updating existing should work even at capacity
244        cache.put(
245            "https://example.com/0",
246            CheckStatus::Error,
247            Some("HTTP 500"),
248        );
249        assert_eq!(cache.entries.len(), MAX_CACHE_ENTRIES);
250    }
251}