Skip to main content

graphy_core/
storage.rs

1use std::path::Path;
2
3use redb::{Database, TableDefinition};
4use sha2::{Digest, Sha256};
5use tracing::info;
6
7use crate::error::GraphyError;
8use crate::graph::CodeGraph;
9
10const GRAPH_TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("graph");
11const META_TABLE: TableDefinition<&str, &str> = TableDefinition::new("meta");
12
13const GRAPH_KEY: &str = "code_graph";
14const CHECKSUM_KEY: &str = "checksum";
15const VERSION_KEY: &str = "version";
16const CURRENT_VERSION: &str = "0.2.0";
17
18/// Compute a SHA-256 checksum of the serialized bytes for integrity verification.
19fn compute_checksum(data: &[u8]) -> String {
20    let hash = Sha256::digest(data);
21    format!("{:064x}", hash)
22}
23
24/// Save the CodeGraph to a redb database file.
25pub fn save_graph(graph: &CodeGraph, path: &Path) -> Result<(), GraphyError> {
26    let encoded = bincode::serialize(graph)
27        .map_err(|e| GraphyError::Storage(format!("Serialization failed: {e}")))?;
28
29    let checksum = compute_checksum(&encoded);
30
31    let db = Database::create(path)
32        .map_err(|e| GraphyError::Storage(format!("Database create failed: {e}")))?;
33
34    let txn = db
35        .begin_write()
36        .map_err(|e| GraphyError::Storage(format!("Transaction begin failed: {e}")))?;
37    {
38        let mut table = txn
39            .open_table(GRAPH_TABLE)
40            .map_err(|e| GraphyError::Storage(format!("Open table failed: {e}")))?;
41        table
42            .insert(GRAPH_KEY, encoded.as_slice())
43            .map_err(|e| GraphyError::Storage(format!("Insert failed: {e}")))?;
44    }
45    {
46        let mut meta = txn
47            .open_table(META_TABLE)
48            .map_err(|e| GraphyError::Storage(format!("Open meta table failed: {e}")))?;
49        meta.insert(VERSION_KEY, CURRENT_VERSION)
50            .map_err(|e| GraphyError::Storage(format!("Insert version failed: {e}")))?;
51        meta.insert(CHECKSUM_KEY, checksum.as_str())
52            .map_err(|e| GraphyError::Storage(format!("Insert checksum failed: {e}")))?;
53    }
54    txn.commit()
55        .map_err(|e| GraphyError::Storage(format!("Commit failed: {e}")))?;
56
57    Ok(())
58}
59
60/// Load the CodeGraph from a redb database file.
61pub fn load_graph(path: &Path) -> Result<CodeGraph, GraphyError> {
62    if !path.exists() {
63        return Ok(CodeGraph::new());
64    }
65
66    let db = Database::open(path)
67        .map_err(|e| GraphyError::Storage(format!("Database open failed: {e}")))?;
68
69    let txn = db
70        .begin_read()
71        .map_err(|e| GraphyError::Storage(format!("Read transaction failed: {e}")))?;
72
73    // Check version compatibility before attempting deserialization.
74    // If the stored version doesn't match CURRENT_VERSION, the schema may have
75    // changed (new NodeKind variants, new fields, etc.) and deserialization
76    // would likely fail or produce corrupt data.
77    if let Ok(meta) = txn.open_table(META_TABLE) {
78        if let Ok(Some(stored_version)) = meta.get(VERSION_KEY) {
79            let stored = stored_version.value().to_string();
80            if stored != CURRENT_VERSION {
81                return Err(GraphyError::Storage(format!(
82                    "Index version mismatch: stored={stored}, current={CURRENT_VERSION}. \
83                     Re-indexing required."
84                )));
85            }
86        }
87    }
88
89    let table = txn
90        .open_table(GRAPH_TABLE)
91        .map_err(|e| GraphyError::Storage(format!("Open table failed: {e}")))?;
92
93    let entry = table
94        .get(GRAPH_KEY)
95        .map_err(|e| GraphyError::Storage(format!("Get failed: {e}")))?
96        .ok_or_else(|| GraphyError::Storage("No graph data found".into()))?;
97
98    let bytes = entry.value();
99
100    // Verify SHA-256 integrity checksum if present
101    if let Ok(meta) = txn.open_table(META_TABLE) {
102        if let Ok(Some(stored)) = meta.get(CHECKSUM_KEY) {
103            let stored_checksum = stored.value().to_string();
104            let actual_checksum = compute_checksum(bytes);
105            if stored_checksum != actual_checksum {
106                return Err(GraphyError::Storage(
107                    "Integrity check failed: SHA-256 checksum mismatch. \
108                     The database may be corrupted. Try re-indexing."
109                        .into(),
110                ));
111            }
112        } else {
113            info!("No checksum found in database (pre-checksum format), skipping verification");
114        }
115    }
116
117    let graph: CodeGraph = bincode::deserialize(bytes)
118        .map_err(|e| GraphyError::Storage(format!("Deserialization failed: {e}")))?;
119
120    Ok(graph)
121}
122
123/// Get the path to the default graphy database for a project.
124pub fn default_db_path(project_root: &Path) -> std::path::PathBuf {
125    project_root.join(".graphy").join("index.redb")
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131    use crate::gir::*;
132    use std::path::PathBuf;
133
134    #[test]
135    fn round_trip() {
136        let mut graph = CodeGraph::new();
137        let node = GirNode::new(
138            "test_func".into(),
139            NodeKind::Function,
140            PathBuf::from("test.py"),
141            Span::new(1, 0, 10, 0),
142            Language::Python,
143        );
144        let id = node.id;
145        graph.add_node(node);
146
147        let dir = tempfile::tempdir().unwrap();
148        let db_path = dir.path().join("test.redb");
149
150        save_graph(&graph, &db_path).unwrap();
151        let loaded = load_graph(&db_path).unwrap();
152
153        assert_eq!(loaded.node_count(), 1);
154        assert!(loaded.get_node(id).is_some());
155        assert_eq!(loaded.get_node(id).unwrap().name, "test_func");
156    }
157
158    #[test]
159    fn checksum_detects_corruption() {
160        let mut graph = CodeGraph::new();
161        let node = GirNode::new(
162            "test_func".into(),
163            NodeKind::Function,
164            PathBuf::from("test.py"),
165            Span::new(1, 0, 10, 0),
166            Language::Python,
167        );
168        graph.add_node(node);
169
170        let dir = tempfile::tempdir().unwrap();
171        let db_path = dir.path().join("test.redb");
172
173        save_graph(&graph, &db_path).unwrap();
174
175        // Tamper with the stored data — overwrite graph bytes but keep old checksum
176        {
177            let db = Database::create(&db_path).unwrap();
178            let txn = db.begin_write().unwrap();
179            {
180                let mut table = txn.open_table(GRAPH_TABLE).unwrap();
181                table.insert(GRAPH_KEY, &[0u8, 1, 2, 3] as &[u8]).unwrap();
182            }
183            txn.commit().unwrap();
184        }
185
186        let result = load_graph(&db_path);
187        assert!(result.is_err());
188        let err_msg = format!("{}", result.unwrap_err());
189        assert!(err_msg.contains("Integrity check failed"));
190    }
191
192    #[test]
193    fn checksum_is_sha256() {
194        let checksum = compute_checksum(b"hello world");
195        // SHA-256 produces a 64-char hex string
196        assert_eq!(checksum.len(), 64);
197        // Known SHA-256 of "hello world" (note: this is hash of the raw bytes)
198        assert_eq!(
199            checksum,
200            // sha256("hello world") — but our function hashes the byte slice
201            // which includes bincode framing. Let's just verify format.
202            compute_checksum(b"hello world")
203        );
204        // Different inputs produce different checksums
205        assert_ne!(compute_checksum(b"hello world"), compute_checksum(b"hello world!"));
206    }
207
208    #[test]
209    fn missing_graph_data_returns_error() {
210        let dir = tempfile::tempdir().unwrap();
211        let db_path = dir.path().join("corrupt.redb");
212
213        // Create DB with graph table present but NO graph key inserted
214        {
215            let db = Database::create(&db_path).unwrap();
216            let txn = db.begin_write().unwrap();
217            {
218                let mut meta = txn.open_table(META_TABLE).unwrap();
219                meta.insert(VERSION_KEY, CURRENT_VERSION).unwrap();
220            }
221            {
222                // Open graph table to create it, but don't insert any data
223                let _table = txn.open_table(GRAPH_TABLE).unwrap();
224            }
225            txn.commit().unwrap();
226        }
227
228        let result = load_graph(&db_path);
229        assert!(result.is_err());
230        assert!(result.unwrap_err().to_string().contains("No graph data found"));
231    }
232
233    #[test]
234    fn round_trip_empty_graph() {
235        let graph = CodeGraph::new();
236        let dir = tempfile::tempdir().unwrap();
237        let db_path = dir.path().join("empty.redb");
238
239        save_graph(&graph, &db_path).unwrap();
240        let loaded = load_graph(&db_path).unwrap();
241        assert_eq!(loaded.node_count(), 0);
242        assert_eq!(loaded.edge_count(), 0);
243    }
244}