infiniloom_engine/index/
storage.rs

1//! Index storage and serialization.
2//!
3//! Handles saving and loading the symbol index and dependency graph
4//! using bincode for fast binary serialization.
5
6use super::types::{DepGraph, SymbolIndex};
7use bincode::Options;
8use crate::bincode_safe::deserialize_from_with_limit;
9use std::fs::{self, File};
10use std::io::{BufReader, BufWriter, Write};
11use std::path::{Path, PathBuf};
12use thiserror::Error;
13
14/// Index storage directory name
15pub const INDEX_DIR: &str = ".infiniloom";
16
17/// Index file names
18pub const INDEX_FILE: &str = "index.bin";
19pub const GRAPH_FILE: &str = "graph.bin";
20pub const META_FILE: &str = "meta.json";
21pub const CONFIG_FILE: &str = "config.toml";
22
23/// Errors that can occur during index storage operations
24#[derive(Error, Debug)]
25pub enum StorageError {
26    #[error("IO error: {0}")]
27    Io(#[from] std::io::Error),
28
29    #[error("Serialization error: {0}")]
30    Serialize(#[from] bincode::Error),
31
32    #[error("JSON error: {0}")]
33    Json(#[from] serde_json::Error),
34
35    #[error("Index not found at {0}")]
36    NotFound(PathBuf),
37
38    #[error("Index version mismatch: found {found}, expected {expected}")]
39    VersionMismatch { found: u32, expected: u32 },
40
41    #[error("Invalid index directory: {0}")]
42    InvalidDirectory(String),
43}
44
45/// Metadata about the index (human-readable JSON)
46#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
47pub struct IndexMeta {
48    /// Index version
49    pub version: u32,
50    /// Repository name
51    pub repo_name: String,
52    /// Git commit hash when index was built
53    pub commit_hash: Option<String>,
54    /// Timestamp of index creation (Unix epoch seconds)
55    pub created_at: u64,
56    /// Number of files indexed
57    pub file_count: usize,
58    /// Number of symbols indexed
59    pub symbol_count: usize,
60    /// Total size of index files in bytes
61    pub index_size_bytes: u64,
62}
63
64/// Index storage manager
65pub struct IndexStorage {
66    /// Path to the index directory (.infiniloom)
67    index_dir: PathBuf,
68}
69
70impl IndexStorage {
71    /// Create a new storage manager for a repository
72    pub fn new(repo_root: impl AsRef<Path>) -> Self {
73        Self { index_dir: repo_root.as_ref().join(INDEX_DIR) }
74    }
75
76    /// Get path to the index directory
77    pub fn index_dir(&self) -> &Path {
78        &self.index_dir
79    }
80
81    /// Check if index exists
82    pub fn exists(&self) -> bool {
83        self.index_dir.join(INDEX_FILE).exists() && self.index_dir.join(GRAPH_FILE).exists()
84    }
85
86    /// Initialize the index directory structure
87    pub fn init(&self) -> Result<(), StorageError> {
88        // Create .infiniloom directory
89        fs::create_dir_all(&self.index_dir)?;
90
91        // Create .gitignore for temporary files only
92        let gitignore_path = self.index_dir.join(".gitignore");
93        if !gitignore_path.exists() {
94            fs::write(&gitignore_path, "*.tmp\n*.lock\n")?;
95        }
96
97        Ok(())
98    }
99
100    /// Save the symbol index to disk
101    pub fn save_index(&self, index: &SymbolIndex) -> Result<(), StorageError> {
102        self.init()?;
103
104        let path = self.index_dir.join(INDEX_FILE);
105        let tmp_path = self.index_dir.join(format!("{}.tmp", INDEX_FILE));
106
107        // Write to temp file first for atomicity
108        // Note: Must use bincode::options() to match deserialize_from_with_limit() in load()
109        let file = File::create(&tmp_path)?;
110        let mut writer = BufWriter::new(file);
111        bincode::options().serialize_into(&mut writer, index)?;
112        writer.flush()?;
113
114        // Atomic rename
115        fs::rename(&tmp_path, &path)?;
116
117        Ok(())
118    }
119
120    /// Load the symbol index from disk
121    pub fn load_index(&self) -> Result<SymbolIndex, StorageError> {
122        let path = self.index_dir.join(INDEX_FILE);
123
124        if !path.exists() {
125            return Err(StorageError::NotFound(path));
126        }
127
128        let file_size = fs::metadata(&path)?.len();
129        let file = File::open(&path)?;
130        let reader = BufReader::new(file);
131        let mut index: SymbolIndex = deserialize_from_with_limit(reader, file_size)?;
132
133        // Check version compatibility
134        if index.version != SymbolIndex::CURRENT_VERSION {
135            return Err(StorageError::VersionMismatch {
136                found: index.version,
137                expected: SymbolIndex::CURRENT_VERSION,
138            });
139        }
140
141        // Rebuild lookup tables
142        index.rebuild_lookups();
143
144        Ok(index)
145    }
146
147    /// Save the dependency graph to disk
148    pub fn save_graph(&self, graph: &DepGraph) -> Result<(), StorageError> {
149        self.init()?;
150
151        let path = self.index_dir.join(GRAPH_FILE);
152        let tmp_path = self.index_dir.join(format!("{}.tmp", GRAPH_FILE));
153
154        // Note: Must use bincode::options() to match deserialize_from_with_limit() in load()
155        let file = File::create(&tmp_path)?;
156        let mut writer = BufWriter::new(file);
157        bincode::options().serialize_into(&mut writer, graph)?;
158        writer.flush()?;
159
160        fs::rename(&tmp_path, &path)?;
161
162        Ok(())
163    }
164
165    /// Load the dependency graph from disk
166    pub fn load_graph(&self) -> Result<DepGraph, StorageError> {
167        let path = self.index_dir.join(GRAPH_FILE);
168
169        if !path.exists() {
170            return Err(StorageError::NotFound(path));
171        }
172
173        let file_size = fs::metadata(&path)?.len();
174        let file = File::open(&path)?;
175        let reader = BufReader::new(file);
176        let graph: DepGraph = deserialize_from_with_limit(reader, file_size)?;
177
178        Ok(graph)
179    }
180
181    /// Save index metadata (human-readable JSON)
182    pub fn save_meta(&self, meta: &IndexMeta) -> Result<(), StorageError> {
183        self.init()?;
184
185        let path = self.index_dir.join(META_FILE);
186        let json = serde_json::to_string_pretty(meta)?;
187        fs::write(&path, json)?;
188
189        Ok(())
190    }
191
192    /// Load index metadata
193    pub fn load_meta(&self) -> Result<IndexMeta, StorageError> {
194        let path = self.index_dir.join(META_FILE);
195
196        if !path.exists() {
197            return Err(StorageError::NotFound(path));
198        }
199
200        let content = fs::read_to_string(&path)?;
201        let meta: IndexMeta = serde_json::from_str(&content)?;
202
203        Ok(meta)
204    }
205
206    /// Save everything (index, graph, meta) atomically
207    pub fn save_all(
208        &self,
209        index: &SymbolIndex,
210        graph: &DepGraph,
211    ) -> Result<IndexMeta, StorageError> {
212        // Save index and graph
213        self.save_index(index)?;
214        self.save_graph(graph)?;
215
216        // Calculate sizes
217        let index_size = fs::metadata(self.index_dir.join(INDEX_FILE))?.len();
218        let graph_size = fs::metadata(self.index_dir.join(GRAPH_FILE))?.len();
219
220        // Create and save metadata
221        let meta = IndexMeta {
222            version: index.version,
223            repo_name: index.repo_name.clone(),
224            commit_hash: index.commit_hash.clone(),
225            created_at: index.created_at,
226            file_count: index.files.len(),
227            symbol_count: index.symbols.len(),
228            index_size_bytes: index_size + graph_size,
229        };
230
231        self.save_meta(&meta)?;
232
233        Ok(meta)
234    }
235
236    /// Load everything (index, graph)
237    pub fn load_all(&self) -> Result<(SymbolIndex, DepGraph), StorageError> {
238        let index = self.load_index()?;
239        let graph = self.load_graph()?;
240        Ok((index, graph))
241    }
242
243    /// Get size of stored index files
244    pub fn storage_size(&self) -> u64 {
245        let mut total = 0u64;
246
247        for name in [INDEX_FILE, GRAPH_FILE, META_FILE] {
248            if let Ok(metadata) = fs::metadata(self.index_dir.join(name)) {
249                total += metadata.len();
250            }
251        }
252
253        total
254    }
255
256    /// Delete the index
257    pub fn delete(&self) -> Result<(), StorageError> {
258        if self.index_dir.exists() {
259            fs::remove_dir_all(&self.index_dir)?;
260        }
261        Ok(())
262    }
263}
264
265// Note: Memory-mapped index loader can be added as a future optimization
266// for very large repositories. For now, the standard file-based loader
267// is sufficient and provides good performance.
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272    use crate::index::types::{
273        FileEntry, FileId, IndexSymbol, IndexSymbolKind, Language, Span, SymbolId, Visibility,
274    };
275    use tempfile::TempDir;
276
277    #[test]
278    fn test_storage_roundtrip() {
279        let tmp = TempDir::new().unwrap();
280        let storage = IndexStorage::new(tmp.path());
281
282        // Create test index
283        let mut index = SymbolIndex::new();
284        index.repo_name = "test-repo".to_owned();
285        index.created_at = 12345;
286        index.files.push(FileEntry {
287            id: FileId::new(0),
288            path: "src/main.rs".to_owned(),
289            language: Language::Rust,
290            content_hash: [1; 32],
291            symbols: 0..1,
292            imports: vec![],
293            lines: 100,
294            tokens: 500,
295        });
296        index.symbols.push(IndexSymbol {
297            id: SymbolId::new(0),
298            name: "main".to_owned(),
299            kind: IndexSymbolKind::Function,
300            file_id: FileId::new(0),
301            span: Span::new(1, 0, 10, 0),
302            signature: Some("fn main()".to_owned()),
303            parent: None,
304            visibility: Visibility::Public,
305            docstring: None,
306        });
307
308        // Create test graph
309        let mut graph = DepGraph::new();
310        graph.add_file_import(0, 1);
311
312        // Save
313        storage.save_all(&index, &graph).unwrap();
314
315        // Verify files exist
316        assert!(storage.exists());
317        assert!(storage.storage_size() > 0);
318
319        // Load and verify
320        let (loaded_index, loaded_graph) = storage.load_all().unwrap();
321        assert_eq!(loaded_index.repo_name, "test-repo");
322        assert_eq!(loaded_index.files.len(), 1);
323        assert_eq!(loaded_index.symbols.len(), 1);
324        assert_eq!(loaded_graph.file_imports.len(), 1);
325
326        // Verify lookups work
327        assert!(loaded_index.get_file("src/main.rs").is_some());
328    }
329
330    #[test]
331    fn test_meta_roundtrip() {
332        let tmp = TempDir::new().unwrap();
333        let storage = IndexStorage::new(tmp.path());
334        storage.init().unwrap();
335
336        let meta = IndexMeta {
337            version: 1,
338            repo_name: "test".to_owned(),
339            commit_hash: Some("abc123".to_owned()),
340            created_at: 12345,
341            file_count: 10,
342            symbol_count: 100,
343            index_size_bytes: 1024,
344        };
345
346        storage.save_meta(&meta).unwrap();
347        let loaded = storage.load_meta().unwrap();
348
349        assert_eq!(loaded.repo_name, "test");
350        assert_eq!(loaded.file_count, 10);
351    }
352
353    #[test]
354    fn test_not_found() {
355        let tmp = TempDir::new().unwrap();
356        let storage = IndexStorage::new(tmp.path());
357
358        assert!(!storage.exists());
359        assert!(matches!(storage.load_index(), Err(StorageError::NotFound(_))));
360    }
361}