Skip to main content

arbor_graph/
store.rs

1use crate::builder::GraphBuilder;
2use crate::graph::ArborGraph;
3use arbor_core::CodeNode;
4use sled::{Batch, Db};
5use std::path::Path;
6use thiserror::Error;
7
8/// Current cache format version. Increment when schema changes.
9const CACHE_VERSION: &str = "arbor-1.3";
10
11#[derive(Error, Debug)]
12pub enum StoreError {
13    #[error("Database error: {0}")]
14    Sled(#[from] sled::Error),
15    #[error("Serialization error: {0}")]
16    Bincode(#[from] bincode::Error),
17    #[error("Corrupted data: {0}")]
18    Corrupted(String),
19    #[error("Cache version mismatch: expected {expected}, found {found}")]
20    VersionMismatch { expected: String, found: String },
21}
22
23pub struct GraphStore {
24    db: Db,
25}
26
27impl GraphStore {
28    /// Opens or creates a graph store at the specified path.
29    /// Returns an error if the cache version doesn't match.
30    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, StoreError> {
31        let db = sled::open(path)?;
32        let store = Self { db };
33
34        // Check cache version
35        if let Some(version_bytes) = store.db.get("meta:version")? {
36            let version: String = bincode::deserialize(&version_bytes)?;
37            if version != CACHE_VERSION {
38                return Err(StoreError::VersionMismatch {
39                    expected: CACHE_VERSION.to_string(),
40                    found: version,
41                });
42            }
43        } else {
44            // New cache, set version
45            let version_bytes = bincode::serialize(&CACHE_VERSION.to_string())?;
46            store.db.insert("meta:version", version_bytes)?;
47        }
48
49        Ok(store)
50    }
51
52    /// Opens a store, clearing it if version mismatches.
53    pub fn open_or_reset<P: AsRef<Path>>(path: P) -> Result<Self, StoreError> {
54        match Self::open(path.as_ref()) {
55            Ok(store) => Ok(store),
56            Err(StoreError::VersionMismatch { .. }) => {
57                // Clear and reopen
58                let db = sled::open(path.as_ref())?;
59                db.clear()?;
60                let version_bytes = bincode::serialize(&CACHE_VERSION.to_string())?;
61                db.insert("meta:version", version_bytes)?;
62                db.flush()?;
63                Ok(Self { db })
64            }
65            Err(e) => Err(e),
66        }
67    }
68
69    /// Gets the stored mtime for a file.
70    pub fn get_mtime(&self, file_path: &str) -> Result<Option<u64>, StoreError> {
71        let key = format!("m:{}", file_path);
72        match self.db.get(&key)? {
73            Some(bytes) => {
74                let mtime: u64 = bincode::deserialize(&bytes)?;
75                Ok(Some(mtime))
76            }
77            None => Ok(None),
78        }
79    }
80
81    /// Gets the stored nodes for a file.
82    pub fn get_file_nodes(&self, file_path: &str) -> Result<Option<Vec<CodeNode>>, StoreError> {
83        let file_key = format!("f:{}", file_path);
84        match self.db.get(&file_key)? {
85            Some(index_bytes) => {
86                let node_ids: Vec<String> = bincode::deserialize(&index_bytes)?;
87                let mut nodes = Vec::with_capacity(node_ids.len());
88                for id in node_ids {
89                    let node_key = format!("n:{}", id);
90                    if let Some(node_bytes) = self.db.get(&node_key)? {
91                        let node: CodeNode = bincode::deserialize(&node_bytes)?;
92                        nodes.push(node);
93                    }
94                }
95                Ok(Some(nodes))
96            }
97            None => Ok(None),
98        }
99    }
100
101    /// Updates the nodes and mtime for a specific file.
102    ///
103    /// This operation is atomic: it removes old nodes associated with the file
104    /// and inserts the new ones.
105    pub fn update_file(
106        &self,
107        file_path: &str,
108        nodes: &[CodeNode],
109        mtime: u64,
110    ) -> Result<(), StoreError> {
111        let file_key = format!("f:{}", file_path);
112        let mtime_key = format!("m:{}", file_path);
113        let mut batch = Batch::default();
114
115        // 1. Get old nodes for this file
116        if let Some(old_bytes) = self.db.get(&file_key)? {
117            let old_ids: Vec<String> = bincode::deserialize(&old_bytes)?;
118            for id in old_ids {
119                batch.remove(format!("n:{}", id).as_bytes());
120            }
121        }
122
123        // 2. Insert new nodes
124        let mut new_ids = Vec::with_capacity(nodes.len());
125        for node in nodes {
126            let node_key = format!("n:{}", node.id);
127            let bytes = bincode::serialize(node)?;
128            batch.insert(node_key.as_bytes(), bytes);
129            new_ids.push(node.id.clone());
130        }
131
132        // 3. Update file index
133        let index_bytes = bincode::serialize(&new_ids)?;
134        batch.insert(file_key.as_bytes(), index_bytes);
135
136        // 4. Update mtime
137        let mtime_bytes = bincode::serialize(&mtime)?;
138        batch.insert(mtime_key.as_bytes(), mtime_bytes);
139
140        // 5. Commit batch
141        self.db.apply_batch(batch)?;
142        self.db.flush()?;
143        Ok(())
144    }
145
146    /// Removes a file from the cache (for deleted files).
147    pub fn remove_file(&self, file_path: &str) -> Result<(), StoreError> {
148        let file_key = format!("f:{}", file_path);
149        let mtime_key = format!("m:{}", file_path);
150        let mut batch = Batch::default();
151
152        // Remove nodes
153        if let Some(old_bytes) = self.db.get(&file_key)? {
154            let old_ids: Vec<String> = bincode::deserialize(&old_bytes)?;
155            for id in old_ids {
156                batch.remove(format!("n:{}", id).as_bytes());
157            }
158        }
159
160        batch.remove(file_key.as_bytes());
161        batch.remove(mtime_key.as_bytes());
162
163        self.db.apply_batch(batch)?;
164        self.db.flush()?;
165        Ok(())
166    }
167
168    /// Lists all cached file paths.
169    pub fn list_cached_files(&self) -> Result<Vec<String>, StoreError> {
170        let mut files = Vec::new();
171        let prefix = b"f:";
172        for item in self.db.scan_prefix(prefix) {
173            let (key, _) = item?;
174            let key_str = String::from_utf8_lossy(&key);
175            if let Some(file_path) = key_str.strip_prefix("f:") {
176                files.push(file_path.to_string());
177            }
178        }
179        Ok(files)
180    }
181
182    /// Loads the entire graph from the store.
183    ///
184    /// This iterates over all stored nodes and reconstructs the ArborGraph
185    /// using the GraphBuilder (which re-links edges).
186    pub fn load_graph(&self) -> Result<ArborGraph, StoreError> {
187        let mut builder = GraphBuilder::new();
188        let mut nodes = Vec::new();
189
190        // Iterate over all keys starting with "n:"
191        let prefix = b"n:";
192        for item in self.db.scan_prefix(prefix) {
193            let (_key, value) = item?;
194            let node: CodeNode = bincode::deserialize(&value)?;
195            nodes.push(node);
196        }
197
198        if nodes.is_empty() {
199            // Return empty graph
200            return Ok(ArborGraph::new());
201        }
202
203        // Reconstruct graph
204        builder.add_nodes(nodes);
205        // resolve_edges() is called by build()
206        let graph = builder.build();
207
208        Ok(graph)
209    }
210
211    /// Clears the stored graph.
212    pub fn clear(&self) -> Result<(), StoreError> {
213        self.db.clear()?;
214        // Re-set version after clear
215        let version_bytes = bincode::serialize(&CACHE_VERSION.to_string())?;
216        self.db.insert("meta:version", version_bytes)?;
217        self.db.flush()?;
218        Ok(())
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use arbor_core::NodeKind;
226    use tempfile::tempdir;
227
228    #[test]
229    fn test_incremental_updates() {
230        let dir = tempdir().unwrap();
231        let store = GraphStore::open(dir.path()).unwrap();
232
233        let node1 = CodeNode::new("foo", "foo", NodeKind::Function, "test.rs");
234        let node2 = CodeNode::new("bar", "bar", NodeKind::Function, "test.rs");
235
236        // Initial update with mtime
237        store
238            .update_file("test.rs", &[node1.clone(), node2.clone()], 1000)
239            .unwrap();
240
241        // Verify load
242        let graph = store.load_graph().unwrap();
243        assert_eq!(graph.node_count(), 2);
244
245        // Verify mtime was stored
246        assert_eq!(store.get_mtime("test.rs").unwrap(), Some(1000));
247
248        // Update with one node removed
249        store
250            .update_file("test.rs", &[node1.clone()], 2000)
251            .unwrap();
252        let graph2 = store.load_graph().unwrap();
253        assert_eq!(graph2.node_count(), 1);
254        assert!(!graph2.find_by_name("foo").is_empty());
255        assert!(graph2.find_by_name("bar").is_empty());
256
257        // Verify mtime was updated
258        assert_eq!(store.get_mtime("test.rs").unwrap(), Some(2000));
259    }
260
261    #[test]
262    fn test_cache_version() {
263        let dir = tempdir().unwrap();
264
265        // First open sets version
266        let store = GraphStore::open(dir.path()).unwrap();
267        drop(store);
268
269        // Second open should succeed with same version
270        let store2 = GraphStore::open(dir.path()).unwrap();
271        drop(store2);
272    }
273
274    #[test]
275    fn test_remove_file() {
276        let dir = tempdir().unwrap();
277        let store = GraphStore::open(dir.path()).unwrap();
278
279        let node = CodeNode::new("foo", "foo", NodeKind::Function, "test.rs");
280        store.update_file("test.rs", &[node], 1000).unwrap();
281
282        // Verify file exists
283        assert!(store.get_mtime("test.rs").unwrap().is_some());
284        assert!(store.get_file_nodes("test.rs").unwrap().is_some());
285
286        // Remove file
287        store.remove_file("test.rs").unwrap();
288
289        // Verify file is gone
290        assert!(store.get_mtime("test.rs").unwrap().is_none());
291        assert!(store.get_file_nodes("test.rs").unwrap().is_none());
292    }
293
294    #[test]
295    fn test_list_cached_files() {
296        let dir = tempdir().unwrap();
297        let store = GraphStore::open(dir.path()).unwrap();
298
299        let node1 = CodeNode::new("foo", "foo", NodeKind::Function, "a.rs");
300        let node2 = CodeNode::new("bar", "bar", NodeKind::Function, "b.rs");
301
302        store.update_file("a.rs", &[node1], 1000).unwrap();
303        store.update_file("b.rs", &[node2], 2000).unwrap();
304
305        let files = store.list_cached_files().unwrap();
306        assert_eq!(files.len(), 2);
307        assert!(files.contains(&"a.rs".to_string()));
308        assert!(files.contains(&"b.rs".to_string()));
309    }
310}