Skip to main content

shadow_core/store/
fs.rs

1//! Content-addressed trace store on the local filesystem (SPEC §8).
2//!
3//! Layout (git-objects-style sharding):
4//!
5//! ```text
6//! <root>/
7//! ├── ab/
8//! │   └── 1234…ef.agentlog          # trace root id = sha256:ab1234…ef
9//! └── cd/
10//!     └── 5678…90.agentlog
11//! ```
12//!
13//! The trace root's content id is the file's logical name; the first two
14//! hex characters of the digest become the shard directory.
15
16use std::fs;
17use std::io::{BufReader, BufWriter, Write};
18use std::path::{Path, PathBuf};
19
20use thiserror::Error;
21
22use crate::agentlog::hash::{HEX_LEN, ID_PREFIX};
23use crate::agentlog::{parser, writer, Record};
24
25/// Errors from [`Store`].
26#[derive(Debug, Error)]
27pub enum StoreError {
28    /// Attempted to store an empty trace (no records).
29    #[error(
30        "cannot store an empty trace\nhint: a trace needs at least one record (the metadata root)"
31    )]
32    Empty,
33
34    /// The trace id does not match SPEC §6 format (`sha256:<64 hex>`).
35    #[error("invalid trace id: {0}\nhint: expected `sha256:` followed by 64 lowercase hex chars (SPEC §6)")]
36    BadId(String),
37
38    /// Underlying I/O failure.
39    #[error(
40        "io error: {0}\nhint: check permissions on the store directory and available disk space"
41    )]
42    Io(#[from] std::io::Error),
43
44    /// On-disk parse error while reading a trace back.
45    #[error("parse error while reading trace: {0}\nhint: the on-disk trace may be corrupt; delete it and re-record if you have a source")]
46    Parse(#[from] parser::ParseError),
47}
48
49/// Result alias for store operations.
50pub type Result<T> = std::result::Result<T, StoreError>;
51
52/// Content-addressed trace store.
53///
54/// `root` should be an existing directory (typically `.shadow/traces/`).
55/// This struct does NOT create the root on construction — callers can
56/// choose whether to pre-create it.
57pub struct Store {
58    root: PathBuf,
59}
60
61impl Store {
62    /// Wrap an existing root directory. See SPEC §8 for the layout.
63    pub fn new(root: impl Into<PathBuf>) -> Self {
64        Self { root: root.into() }
65    }
66
67    /// The root directory wrapped by this store.
68    pub fn root(&self) -> &Path {
69        &self.root
70    }
71
72    /// Put a trace. Returns the trace's root id (the first record's id).
73    ///
74    /// Writes atomically: the final path does not appear until the write
75    /// has fully completed and flushed to disk.
76    pub fn put(&self, trace: &[Record]) -> Result<String> {
77        let root_record = trace.first().ok_or(StoreError::Empty)?;
78        let trace_id = root_record.id.clone();
79        let dest = self.path_for(&trace_id)?;
80        if let Some(parent) = dest.parent() {
81            fs::create_dir_all(parent)?;
82        }
83        // Write to `<dest>.tmp.<pid>`, fsync, rename. A crash mid-write
84        // leaves at most a .tmp. file, not a half-written real file.
85        let tmp = dest.with_extension("agentlog.tmp");
86        {
87            let file = fs::File::create(&tmp)?;
88            let mut w = BufWriter::new(file);
89            writer::write_all(&mut w, trace)?;
90            w.flush()?;
91        }
92        fs::rename(&tmp, &dest)?;
93        Ok(trace_id)
94    }
95
96    /// Read a trace by its root content id.
97    pub fn get(&self, trace_id: &str) -> Result<Vec<Record>> {
98        let path = self.path_for(trace_id)?;
99        let file = fs::File::open(&path)?;
100        let records = parser::parse_all(BufReader::new(file))?;
101        Ok(records)
102    }
103
104    /// Whether a trace with this id is stored.
105    pub fn exists(&self, trace_id: &str) -> bool {
106        self.path_for(trace_id)
107            .map(|p| p.is_file())
108            .unwrap_or(false)
109    }
110
111    /// Iterate over all trace ids currently in the store.
112    ///
113    /// Walk order is undefined; callers that need a deterministic order
114    /// should collect and sort.
115    pub fn list(&self) -> Result<Vec<String>> {
116        let mut ids = Vec::new();
117        if !self.root.is_dir() {
118            return Ok(ids);
119        }
120        for shard in fs::read_dir(&self.root)? {
121            let shard = shard?;
122            if !shard.file_type()?.is_dir() {
123                continue;
124            }
125            let shard_name = shard.file_name().to_string_lossy().to_string();
126            if shard_name.len() != 2 || !shard_name.chars().all(|c| c.is_ascii_hexdigit()) {
127                continue;
128            }
129            for entry in fs::read_dir(shard.path())? {
130                let entry = entry?;
131                let name = entry.file_name().to_string_lossy().to_string();
132                if let Some(rest) = name.strip_suffix(".agentlog") {
133                    if rest.len() == HEX_LEN - 2 && rest.chars().all(|c| c.is_ascii_hexdigit()) {
134                        ids.push(format!("{ID_PREFIX}{shard_name}{rest}"));
135                    }
136                }
137            }
138        }
139        Ok(ids)
140    }
141
142    /// Compute the on-disk path for a given trace id. Does NOT create
143    /// directories or check whether the file exists.
144    pub fn path_for(&self, trace_id: &str) -> Result<PathBuf> {
145        if !trace_id.starts_with(ID_PREFIX) {
146            return Err(StoreError::BadId(trace_id.to_string()));
147        }
148        let hex = &trace_id[ID_PREFIX.len()..];
149        if hex.len() != HEX_LEN || !hex.chars().all(|c| c.is_ascii_hexdigit()) {
150            return Err(StoreError::BadId(trace_id.to_string()));
151        }
152        let (shard, rest) = hex.split_at(2);
153        Ok(self.root.join(shard).join(format!("{rest}.agentlog")))
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160    use crate::agentlog::Kind;
161    use serde_json::json;
162    use tempfile::TempDir;
163
164    fn sample_trace() -> Vec<Record> {
165        let root = Record::new(
166            Kind::Metadata,
167            json!({"sdk": {"name": "shadow", "version": "0.1.0"}}),
168            "2026-04-21T10:00:00Z",
169            None,
170        );
171        let req = Record::new(
172            Kind::ChatRequest,
173            json!({"model": "claude-opus-4-7", "messages": [], "params": {}}),
174            "2026-04-21T10:00:00.100Z",
175            Some(root.id.clone()),
176        );
177        vec![root, req]
178    }
179
180    fn new_store() -> (Store, TempDir) {
181        let dir = tempfile::tempdir().unwrap();
182        (Store::new(dir.path()), dir)
183    }
184
185    #[test]
186    fn put_then_get_roundtrips() {
187        let (store, _dir) = new_store();
188        let trace = sample_trace();
189        let id = store.put(&trace).unwrap();
190        assert_eq!(id, trace[0].id);
191        let back = store.get(&id).unwrap();
192        assert_eq!(back, trace);
193    }
194
195    #[test]
196    fn put_creates_sharded_path() {
197        let (store, dir) = new_store();
198        let trace = sample_trace();
199        let id = store.put(&trace).unwrap();
200        let expected = store.path_for(&id).unwrap();
201        assert!(expected.is_file());
202        // Path starts with <root>/<2 hex chars>/
203        let rel = expected.strip_prefix(dir.path()).unwrap();
204        let mut parts = rel.iter();
205        let shard = parts.next().unwrap().to_string_lossy();
206        assert_eq!(shard.len(), 2);
207    }
208
209    #[test]
210    fn put_is_idempotent() {
211        let (store, _dir) = new_store();
212        let trace = sample_trace();
213        let id1 = store.put(&trace).unwrap();
214        let id2 = store.put(&trace).unwrap();
215        assert_eq!(id1, id2);
216        // Only one file — the shard dir has exactly one entry.
217        let path = store.path_for(&id1).unwrap();
218        let shard = path.parent().unwrap();
219        let entries: Vec<_> = fs::read_dir(shard).unwrap().collect();
220        assert_eq!(entries.len(), 1);
221    }
222
223    #[test]
224    fn exists_reports_presence() {
225        let (store, _dir) = new_store();
226        let trace = sample_trace();
227        assert!(!store.exists(&trace[0].id));
228        store.put(&trace).unwrap();
229        assert!(store.exists(&trace[0].id));
230    }
231
232    #[test]
233    fn list_returns_all_stored_traces() {
234        let (store, _dir) = new_store();
235        // Build two distinct traces by varying the payload.
236        let a = vec![Record::new(
237            Kind::Metadata,
238            json!({"sdk": {"name": "a"}}),
239            "2026-01-01T00:00:00Z",
240            None,
241        )];
242        let b = vec![Record::new(
243            Kind::Metadata,
244            json!({"sdk": {"name": "b"}}),
245            "2026-01-01T00:00:00Z",
246            None,
247        )];
248        let id_a = store.put(&a).unwrap();
249        let id_b = store.put(&b).unwrap();
250        let mut ids = store.list().unwrap();
251        ids.sort();
252        let mut expected = vec![id_a, id_b];
253        expected.sort();
254        assert_eq!(ids, expected);
255    }
256
257    #[test]
258    fn list_on_nonexistent_root_returns_empty() {
259        let store = Store::new("/this/path/should/not/exist/for/tests");
260        assert_eq!(store.list().unwrap().len(), 0);
261    }
262
263    #[test]
264    fn path_for_rejects_bad_ids() {
265        let (store, _dir) = new_store();
266        assert!(matches!(store.path_for("abc"), Err(StoreError::BadId(_))));
267        assert!(matches!(
268            store.path_for("md5:aaaa"),
269            Err(StoreError::BadId(_))
270        ));
271        assert!(matches!(
272            store.path_for(&format!("sha256:{}", "z".repeat(64))),
273            Err(StoreError::BadId(_))
274        ));
275    }
276
277    #[test]
278    fn put_empty_trace_errors() {
279        let (store, _dir) = new_store();
280        assert!(matches!(store.put(&[]), Err(StoreError::Empty)));
281    }
282
283    #[test]
284    fn get_missing_trace_errors() {
285        let (store, _dir) = new_store();
286        let fake = format!("sha256:{}", "a".repeat(64));
287        match store.get(&fake) {
288            Err(StoreError::Io(e)) => assert_eq!(e.kind(), std::io::ErrorKind::NotFound),
289            other => panic!("expected Io/NotFound, got {other:?}"),
290        }
291    }
292
293    #[test]
294    fn list_ignores_non_trace_files() {
295        let (store, dir) = new_store();
296        // Create a spurious shard-shaped dir with a non-.agentlog file.
297        let fake_shard = dir.path().join("ab");
298        fs::create_dir_all(&fake_shard).unwrap();
299        fs::write(fake_shard.join("not-a-trace.txt"), "oops").unwrap();
300        // And a non-shard directory name.
301        fs::create_dir_all(dir.path().join("notashard")).unwrap();
302
303        let trace = sample_trace();
304        let id = store.put(&trace).unwrap();
305        let ids = store.list().unwrap();
306        assert_eq!(ids, vec![id]);
307    }
308}