Skip to main content

tatara_lisp_source/
cache.rs

1//! Cache abstraction — separates "we already fetched this URL" from the
2//! fetcher itself. Hosts can plug an in-memory, file-backed, or
3//! cluster-wide cache.
4
5use std::collections::HashMap;
6use std::path::PathBuf;
7use std::sync::Mutex;
8
9/// Cache contract. Keys are stable per-URL strings (see
10/// [`crate::Source::cache_key`]).
11pub trait Cache {
12    fn get(&self, key: &str) -> Option<Vec<u8>>;
13    fn put(&mut self, key: String, value: Vec<u8>);
14}
15
16/// In-memory cache — useful for one-off scripts and tests.
17#[derive(Debug, Default)]
18pub struct MemoryCache {
19    inner: Mutex<HashMap<String, Vec<u8>>>,
20}
21
22impl Cache for MemoryCache {
23    fn get(&self, key: &str) -> Option<Vec<u8>> {
24        self.inner.lock().ok()?.get(key).cloned()
25    }
26
27    fn put(&mut self, key: String, value: Vec<u8>) {
28        if let Ok(mut g) = self.inner.lock() {
29            g.insert(key, value);
30        }
31    }
32}
33
34/// File-system-backed cache — used by `tatara-script` host-side.
35///
36/// Layout:
37/// ```text
38/// <root>/
39/// ├── manifest.json         { url-cache-key → blake3-hex }
40/// └── sources/<blake3>/data raw bytes
41/// ```
42///
43/// On lookup, hash the URL → key → blake3, then read `sources/<blake3>/data`.
44/// On insert, write the bytes to `sources/<blake3>/data` and record the
45/// mapping in `manifest.json`.
46pub struct FileCache {
47    pub root: PathBuf,
48    in_memory: HashMap<String, Vec<u8>>, // hot cache for the current process
49}
50
51impl FileCache {
52    /// Construct a FileCache backed by `<root>/sources/`.
53    pub fn new(root: impl Into<PathBuf>) -> std::io::Result<Self> {
54        let root = root.into();
55        std::fs::create_dir_all(root.join("sources"))?;
56        Ok(Self {
57            root,
58            in_memory: HashMap::new(),
59        })
60    }
61
62    fn data_path(&self, blake3: &str) -> PathBuf {
63        self.root.join("sources").join(blake3).join("data")
64    }
65}
66
67impl Cache for FileCache {
68    fn get(&self, key: &str) -> Option<Vec<u8>> {
69        if let Some(bytes) = self.in_memory.get(key) {
70            return Some(bytes.clone());
71        }
72        // Fall back to disk: the manifest maps URL→blake3, but for
73        // simplicity FileCache hashes the key itself rather than a
74        // separate manifest. blake3 of the key acts as the file-name.
75        let key_hash = crate::blake3_hex(key.as_bytes());
76        let p = self.data_path(&key_hash);
77        std::fs::read(p).ok()
78    }
79
80    fn put(&mut self, key: String, value: Vec<u8>) {
81        self.in_memory.insert(key.clone(), value.clone());
82        let key_hash = crate::blake3_hex(key.as_bytes());
83        let p = self.data_path(&key_hash);
84        if let Some(parent) = p.parent() {
85            let _ = std::fs::create_dir_all(parent);
86        }
87        let _ = std::fs::write(p, value);
88    }
89}
90
91#[cfg(test)]
92mod tests {
93    use super::*;
94
95    #[test]
96    fn memory_cache_round_trip() {
97        let mut c = MemoryCache::default();
98        assert_eq!(c.get("k"), None);
99        c.put("k".into(), b"hello".to_vec());
100        assert_eq!(c.get("k").as_deref(), Some(b"hello".as_ref()));
101    }
102
103    #[test]
104    fn file_cache_round_trip() {
105        let dir = tempfile::tempdir().unwrap();
106        let mut c = FileCache::new(dir.path()).unwrap();
107        assert_eq!(c.get("github:foo"), None);
108        c.put("github:foo".into(), b"sexp bytes".to_vec());
109        assert_eq!(c.get("github:foo").as_deref(), Some(b"sexp bytes".as_ref()));
110    }
111
112    #[test]
113    fn file_cache_persists_across_instances() {
114        let dir = tempfile::tempdir().unwrap();
115        {
116            let mut c = FileCache::new(dir.path()).unwrap();
117            c.put("github:bar".into(), b"hello".to_vec());
118        }
119        // New instance — should hit the on-disk file.
120        let c2 = FileCache::new(dir.path()).unwrap();
121        assert_eq!(c2.get("github:bar").as_deref(), Some(b"hello".as_ref()));
122    }
123}