Skip to main content

seekr_code/index/
incremental.rs

1//! Incremental index updates.
2//!
3//! Detects file changes via mtime + content blake3 hash comparison.
4//! Only re-processes changed files' chunks, avoiding full rebuild.
5
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use std::time::SystemTime;
9
10use serde::{Deserialize, Serialize};
11
12use crate::error::IndexError;
13
14/// State of a previously indexed file.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct FileState {
17    /// Last modification time.
18    pub mtime: SystemTime,
19
20    /// Blake3 hash of file content.
21    pub content_hash: String,
22
23    /// Chunk IDs produced from this file.
24    pub chunk_ids: Vec<u64>,
25}
26
27/// The incremental state, tracking which files have been indexed.
28#[derive(Debug, Default, Serialize, Deserialize)]
29pub struct IncrementalState {
30    /// Map from file path to its last indexed state.
31    pub files: HashMap<PathBuf, FileState>,
32}
33
34/// Classification of file changes.
35#[derive(Debug)]
36pub struct FileChanges {
37    /// Files that are new or have been modified.
38    pub changed: Vec<PathBuf>,
39
40    /// Files that have been deleted since last index.
41    pub deleted: Vec<PathBuf>,
42
43    /// Files that are unchanged.
44    pub unchanged: Vec<PathBuf>,
45}
46
47impl IncrementalState {
48    /// Load incremental state from disk.
49    pub fn load(path: &Path) -> Result<Self, IndexError> {
50        if !path.exists() {
51            return Ok(Self::default());
52        }
53
54        let data = std::fs::read(path)?;
55        serde_json::from_slice(&data).map_err(|e| {
56            IndexError::Serialization(format!("Failed to load incremental state: {}", e))
57        })
58    }
59
60    /// Save incremental state to disk.
61    pub fn save(&self, path: &Path) -> Result<(), IndexError> {
62        if let Some(parent) = path.parent() {
63            std::fs::create_dir_all(parent)?;
64        }
65
66        let data = serde_json::to_vec_pretty(self)
67            .map_err(|e| IndexError::Serialization(e.to_string()))?;
68        std::fs::write(path, data)?;
69        Ok(())
70    }
71
72    /// Detect changes between the current file system state and the last index.
73    pub fn detect_changes(&self, current_files: &[PathBuf]) -> FileChanges {
74        let mut changed = Vec::new();
75        let mut unchanged = Vec::new();
76
77        let current_set: std::collections::HashSet<&PathBuf> = current_files.iter().collect();
78
79        for file in current_files {
80            if let Some(prev_state) = self.files.get(file) {
81                // Check if file has changed
82                let mtime = std::fs::metadata(file).and_then(|m| m.modified()).ok();
83
84                if mtime != Some(prev_state.mtime) {
85                    // Mtime changed, verify with content hash
86                    if let Ok(content) = std::fs::read(file) {
87                        let hash = blake3::hash(&content).to_hex().to_string();
88                        if hash != prev_state.content_hash {
89                            changed.push(file.clone());
90                        } else {
91                            unchanged.push(file.clone());
92                        }
93                    } else {
94                        changed.push(file.clone());
95                    }
96                } else {
97                    unchanged.push(file.clone());
98                }
99            } else {
100                // New file
101                changed.push(file.clone());
102            }
103        }
104
105        // Detect deleted files
106        let deleted: Vec<PathBuf> = self
107            .files
108            .keys()
109            .filter(|f| !current_set.contains(f))
110            .cloned()
111            .collect();
112
113        FileChanges {
114            changed,
115            deleted,
116            unchanged,
117        }
118    }
119
120    /// Update the state for a file that has been indexed.
121    pub fn update_file(&mut self, path: PathBuf, content: &[u8], chunk_ids: Vec<u64>) {
122        let hash = blake3::hash(content).to_hex().to_string();
123        let mtime = std::fs::metadata(&path)
124            .and_then(|m| m.modified())
125            .unwrap_or(SystemTime::UNIX_EPOCH);
126
127        self.files.insert(
128            path,
129            FileState {
130                mtime,
131                content_hash: hash,
132                chunk_ids,
133            },
134        );
135    }
136
137    /// Remove a file from the incremental state.
138    pub fn remove_file(&mut self, path: &Path) -> Option<FileState> {
139        self.files.remove(path)
140    }
141
142    /// Get chunk IDs associated with a file.
143    pub fn chunk_ids_for_file(&self, path: &Path) -> Vec<u64> {
144        self.files
145            .get(path)
146            .map(|state| state.chunk_ids.clone())
147            .unwrap_or_default()
148    }
149
150    /// Get all chunk IDs from deleted files.
151    pub fn chunk_ids_to_remove(&self, deleted_files: &[PathBuf]) -> Vec<u64> {
152        deleted_files
153            .iter()
154            .flat_map(|path| self.chunk_ids_for_file(path))
155            .collect()
156    }
157
158    /// Merge changes: remove deleted file entries, return IDs to remove from index.
159    pub fn apply_deletions(&mut self, deleted_files: &[PathBuf]) -> Vec<u64> {
160        let mut removed_ids = Vec::new();
161        for path in deleted_files {
162            if let Some(state) = self.remove_file(path) {
163                removed_ids.extend(state.chunk_ids);
164            }
165        }
166        removed_ids
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn test_incremental_state_save_load() {
176        let dir = tempfile::tempdir().unwrap();
177        let state_path = dir.path().join("state.json");
178
179        let mut state = IncrementalState::default();
180        state.update_file(
181            PathBuf::from("/test/file.rs"),
182            b"fn main() {}",
183            vec![1, 2, 3],
184        );
185
186        state.save(&state_path).unwrap();
187
188        let loaded = IncrementalState::load(&state_path).unwrap();
189        assert_eq!(loaded.files.len(), 1);
190        assert!(loaded.files.contains_key(&PathBuf::from("/test/file.rs")));
191    }
192
193    #[test]
194    fn test_detect_new_file() {
195        let state = IncrementalState::default();
196        let changes = state.detect_changes(&[PathBuf::from("/new/file.rs")]);
197        assert_eq!(changes.changed.len(), 1);
198        assert!(changes.deleted.is_empty());
199        assert!(changes.unchanged.is_empty());
200    }
201}