Skip to main content

infiniloom_engine/index/
storage.rs

1//! Index storage and serialization.
2//!
3//! Handles saving and loading the symbol index and dependency graph
4//! using bincode for fast binary serialization.
5
6use super::types::{DepGraph, SymbolIndex};
7use crate::bincode_safe::{deserialize_from_with_limit, serialize_into};
8use std::fs::{self, File};
9use std::io::{BufReader, BufWriter, Write};
10use std::path::{Path, PathBuf};
11use thiserror::Error;
12
13/// Index storage directory name
14pub const INDEX_DIR: &str = ".infiniloom";
15
16/// Index file names
17pub const INDEX_FILE: &str = "index.bin";
18pub const GRAPH_FILE: &str = "graph.bin";
19pub const META_FILE: &str = "meta.json";
20pub const CONFIG_FILE: &str = "config.toml";
21
22/// Errors that can occur during index storage operations
23#[derive(Error, Debug)]
24pub enum StorageError {
25    #[error("IO error: {0}")]
26    Io(#[from] std::io::Error),
27
28    #[error("Serialization error: {0}")]
29    Encode(#[from] bincode::error::EncodeError),
30
31    #[error("Deserialization error (index may have been created by an older version of infiniloom; rebuild with `infiniloom index --force`): {0}")]
32    Decode(#[from] bincode::error::DecodeError),
33
34    #[error("JSON error: {0}")]
35    Json(#[from] serde_json::Error),
36
37    #[error("Index not found at {0}")]
38    NotFound(PathBuf),
39
40    #[error("Index version mismatch: found {found}, expected {expected}")]
41    VersionMismatch { found: u32, expected: u32 },
42
43    #[error("Invalid index directory: {0}")]
44    InvalidDirectory(String),
45}
46
47/// Metadata about the index (human-readable JSON)
48#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
49pub struct IndexMeta {
50    /// Index version
51    pub version: u32,
52    /// Repository name
53    pub repo_name: String,
54    /// Git commit hash when index was built
55    pub commit_hash: Option<String>,
56    /// Timestamp of index creation (Unix epoch seconds)
57    pub created_at: u64,
58    /// Number of files indexed
59    pub file_count: usize,
60    /// Number of symbols indexed
61    pub symbol_count: usize,
62    /// Total size of index files in bytes
63    pub index_size_bytes: u64,
64}
65
66/// Index storage manager
67pub struct IndexStorage {
68    /// Path to the index directory (.infiniloom)
69    index_dir: PathBuf,
70}
71
72impl IndexStorage {
73    /// Create a new storage manager for a repository
74    pub fn new(repo_root: impl AsRef<Path>) -> Self {
75        Self { index_dir: repo_root.as_ref().join(INDEX_DIR) }
76    }
77
78    /// Get path to the index directory
79    pub fn index_dir(&self) -> &Path {
80        &self.index_dir
81    }
82
83    /// Check if index exists
84    pub fn exists(&self) -> bool {
85        self.index_dir.join(INDEX_FILE).exists() && self.index_dir.join(GRAPH_FILE).exists()
86    }
87
88    /// Initialize the index directory structure
89    pub fn init(&self) -> Result<(), StorageError> {
90        // Create .infiniloom directory
91        fs::create_dir_all(&self.index_dir)?;
92
93        // Create .gitignore for temporary files only
94        let gitignore_path = self.index_dir.join(".gitignore");
95        if !gitignore_path.exists() {
96            fs::write(&gitignore_path, "*.tmp\n*.lock\n")?;
97        }
98
99        Ok(())
100    }
101
102    /// Save the symbol index to disk
103    pub fn save_index(&self, index: &SymbolIndex) -> Result<(), StorageError> {
104        self.init()?;
105
106        let path = self.index_dir.join(INDEX_FILE);
107        let tmp_path = self.index_dir.join(format!("{}.tmp", INDEX_FILE));
108
109        // Write to temp file first for atomicity
110        let file = File::create(&tmp_path)?;
111        let mut writer = BufWriter::new(file);
112        serialize_into(&mut writer, index)?;
113        writer.flush()?;
114
115        // Atomic rename
116        fs::rename(&tmp_path, &path)?;
117
118        Ok(())
119    }
120
121    /// Load the symbol index from disk
122    pub fn load_index(&self) -> Result<SymbolIndex, StorageError> {
123        let path = self.index_dir.join(INDEX_FILE);
124
125        if !path.exists() {
126            return Err(StorageError::NotFound(path));
127        }
128
129        let file = File::open(&path)?;
130        let reader = BufReader::new(file);
131        let mut index: SymbolIndex = deserialize_from_with_limit(reader)?;
132
133        // Check version compatibility
134        if index.version != SymbolIndex::CURRENT_VERSION {
135            return Err(StorageError::VersionMismatch {
136                found: index.version,
137                expected: SymbolIndex::CURRENT_VERSION,
138            });
139        }
140
141        // Rebuild lookup tables
142        index.rebuild_lookups();
143
144        Ok(index)
145    }
146
147    /// Save the dependency graph to disk
148    pub fn save_graph(&self, graph: &DepGraph) -> Result<(), StorageError> {
149        self.init()?;
150
151        let path = self.index_dir.join(GRAPH_FILE);
152        let tmp_path = self.index_dir.join(format!("{}.tmp", GRAPH_FILE));
153
154        let file = File::create(&tmp_path)?;
155        let mut writer = BufWriter::new(file);
156        serialize_into(&mut writer, graph)?;
157        writer.flush()?;
158
159        fs::rename(&tmp_path, &path)?;
160
161        Ok(())
162    }
163
164    /// Load the dependency graph from disk
165    pub fn load_graph(&self) -> Result<DepGraph, StorageError> {
166        let path = self.index_dir.join(GRAPH_FILE);
167
168        if !path.exists() {
169            return Err(StorageError::NotFound(path));
170        }
171
172        let file = File::open(&path)?;
173        let reader = BufReader::new(file);
174        let mut graph: DepGraph = deserialize_from_with_limit(reader)?;
175
176        // Rebuild adjacency maps (skipped by serde)
177        graph.rebuild_adjacency_maps();
178
179        Ok(graph)
180    }
181
182    /// Save index metadata (human-readable JSON)
183    pub fn save_meta(&self, meta: &IndexMeta) -> Result<(), StorageError> {
184        self.init()?;
185
186        let path = self.index_dir.join(META_FILE);
187        let json = serde_json::to_string_pretty(meta)?;
188        fs::write(&path, json)?;
189
190        Ok(())
191    }
192
193    /// Load index metadata
194    pub fn load_meta(&self) -> Result<IndexMeta, StorageError> {
195        let path = self.index_dir.join(META_FILE);
196
197        if !path.exists() {
198            return Err(StorageError::NotFound(path));
199        }
200
201        let content = fs::read_to_string(&path)?;
202        let meta: IndexMeta = serde_json::from_str(&content)?;
203
204        Ok(meta)
205    }
206
207    /// Save everything (index, graph, meta) atomically
208    pub fn save_all(
209        &self,
210        index: &SymbolIndex,
211        graph: &DepGraph,
212    ) -> Result<IndexMeta, StorageError> {
213        // Save index and graph
214        self.save_index(index)?;
215        self.save_graph(graph)?;
216
217        // Calculate sizes
218        let index_size = fs::metadata(self.index_dir.join(INDEX_FILE))?.len();
219        let graph_size = fs::metadata(self.index_dir.join(GRAPH_FILE))?.len();
220
221        // Create and save metadata
222        let meta = IndexMeta {
223            version: index.version,
224            repo_name: index.repo_name.clone(),
225            commit_hash: index.commit_hash.clone(),
226            created_at: index.created_at,
227            file_count: index.files.len(),
228            symbol_count: index.symbols.len(),
229            index_size_bytes: index_size + graph_size,
230        };
231
232        self.save_meta(&meta)?;
233
234        Ok(meta)
235    }
236
237    /// Load everything (index, graph)
238    pub fn load_all(&self) -> Result<(SymbolIndex, DepGraph), StorageError> {
239        let index = self.load_index()?;
240        let graph = self.load_graph()?;
241        Ok((index, graph))
242    }
243
244    /// Get size of stored index files
245    pub fn storage_size(&self) -> u64 {
246        let mut total = 0u64;
247
248        for name in [INDEX_FILE, GRAPH_FILE, META_FILE] {
249            if let Ok(metadata) = fs::metadata(self.index_dir.join(name)) {
250                total += metadata.len();
251            }
252        }
253
254        total
255    }
256
257    /// Delete the index
258    pub fn delete(&self) -> Result<(), StorageError> {
259        if self.index_dir.exists() {
260            fs::remove_dir_all(&self.index_dir)?;
261        }
262        Ok(())
263    }
264}
265
266// Note: Memory-mapped index loader can be added as a future optimization
267// for very large repositories. For now, the standard file-based loader
268// is sufficient and provides good performance.
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273    use crate::index::types::{
274        FileEntry, FileId, IndexSymbol, IndexSymbolKind, Language, Span, SymbolId, Visibility,
275    };
276    use tempfile::TempDir;
277
278    #[test]
279    fn test_storage_roundtrip() {
280        let tmp = TempDir::new().unwrap();
281        let storage = IndexStorage::new(tmp.path());
282
283        // Create test index
284        let mut index = SymbolIndex::new();
285        index.repo_name = "test-repo".to_owned();
286        index.created_at = 12345;
287        index.files.push(FileEntry {
288            id: FileId::new(0),
289            path: "src/main.rs".to_owned(),
290            language: Language::Rust,
291            content_hash: [1; 32],
292            symbols: 0..1,
293            imports: vec![],
294            lines: 100,
295            tokens: 500,
296        });
297        index.symbols.push(IndexSymbol {
298            id: SymbolId::new(0),
299            name: "main".to_owned(),
300            kind: IndexSymbolKind::Function,
301            file_id: FileId::new(0),
302            span: Span::new(1, 0, 10, 0),
303            signature: Some("fn main()".to_owned()),
304            parent: None,
305            visibility: Visibility::Public,
306            docstring: None,
307        });
308
309        // Create test graph
310        let mut graph = DepGraph::new();
311        graph.add_file_import(0, 1);
312
313        // Save
314        storage.save_all(&index, &graph).unwrap();
315
316        // Verify files exist
317        assert!(storage.exists());
318        assert!(storage.storage_size() > 0);
319
320        // Load and verify
321        let (loaded_index, loaded_graph) = storage.load_all().unwrap();
322        assert_eq!(loaded_index.repo_name, "test-repo");
323        assert_eq!(loaded_index.files.len(), 1);
324        assert_eq!(loaded_index.symbols.len(), 1);
325        assert_eq!(loaded_graph.file_imports.len(), 1);
326
327        // Verify adjacency maps are rebuilt after deserialization
328        let importers = loaded_graph.get_importers(1);
329        assert_eq!(importers.len(), 1);
330        assert_eq!(importers[0], 0);
331
332        // Verify lookups work
333        assert!(loaded_index.get_file("src/main.rs").is_some());
334    }
335
336    #[test]
337    fn test_meta_roundtrip() {
338        let tmp = TempDir::new().unwrap();
339        let storage = IndexStorage::new(tmp.path());
340        storage.init().unwrap();
341
342        let meta = IndexMeta {
343            version: 1,
344            repo_name: "test".to_owned(),
345            commit_hash: Some("abc123".to_owned()),
346            created_at: 12345,
347            file_count: 10,
348            symbol_count: 100,
349            index_size_bytes: 1024,
350        };
351
352        storage.save_meta(&meta).unwrap();
353        let loaded = storage.load_meta().unwrap();
354
355        assert_eq!(loaded.repo_name, "test");
356        assert_eq!(loaded.file_count, 10);
357    }
358
359    #[test]
360    fn test_not_found() {
361        let tmp = TempDir::new().unwrap();
362        let storage = IndexStorage::new(tmp.path());
363
364        assert!(!storage.exists());
365        assert!(matches!(storage.load_index(), Err(StorageError::NotFound(_))));
366    }
367}