Skip to main content

seekr_code/index/
incremental.rs

1//! Incremental index updates.
2//!
3//! Detects file changes via mtime + content blake3 hash comparison.
4//! Only re-processes changed files' chunks, avoiding full rebuild.
5
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use std::time::SystemTime;
9
10use serde::{Deserialize, Serialize};
11
12use crate::error::IndexError;
13
14/// State of a previously indexed file.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct FileState {
17    /// Last modification time.
18    pub mtime: SystemTime,
19
20    /// Blake3 hash of file content.
21    pub content_hash: String,
22
23    /// Chunk IDs produced from this file.
24    pub chunk_ids: Vec<u64>,
25}
26
27/// The incremental state, tracking which files have been indexed.
28#[derive(Debug, Default, Serialize, Deserialize)]
29pub struct IncrementalState {
30    /// Map from file path to its last indexed state.
31    pub files: HashMap<PathBuf, FileState>,
32}
33
34/// Classification of file changes.
35#[derive(Debug)]
36pub struct FileChanges {
37    /// Files that are new or have been modified.
38    pub changed: Vec<PathBuf>,
39
40    /// Files that have been deleted since last index.
41    pub deleted: Vec<PathBuf>,
42
43    /// Files that are unchanged.
44    pub unchanged: Vec<PathBuf>,
45}
46
47impl IncrementalState {
48    /// Load incremental state from disk.
49    pub fn load(path: &Path) -> Result<Self, IndexError> {
50        if !path.exists() {
51            return Ok(Self::default());
52        }
53
54        let data = std::fs::read(path)?;
55        serde_json::from_slice(&data)
56            .map_err(|e| IndexError::Serialization(format!("Failed to load incremental state: {}", e)))
57    }
58
59    /// Save incremental state to disk.
60    pub fn save(&self, path: &Path) -> Result<(), IndexError> {
61        if let Some(parent) = path.parent() {
62            std::fs::create_dir_all(parent)?;
63        }
64
65        let data = serde_json::to_vec_pretty(self)
66            .map_err(|e| IndexError::Serialization(e.to_string()))?;
67        std::fs::write(path, data)?;
68        Ok(())
69    }
70
71    /// Detect changes between the current file system state and the last index.
72    pub fn detect_changes(&self, current_files: &[PathBuf]) -> FileChanges {
73        let mut changed = Vec::new();
74        let mut unchanged = Vec::new();
75
76        let current_set: std::collections::HashSet<&PathBuf> = current_files.iter().collect();
77
78        for file in current_files {
79            if let Some(prev_state) = self.files.get(file) {
80                // Check if file has changed
81                let mtime = std::fs::metadata(file)
82                    .and_then(|m| m.modified())
83                    .ok();
84
85                if mtime != Some(prev_state.mtime) {
86                    // Mtime changed, verify with content hash
87                    if let Ok(content) = std::fs::read(file) {
88                        let hash = blake3::hash(&content).to_hex().to_string();
89                        if hash != prev_state.content_hash {
90                            changed.push(file.clone());
91                        } else {
92                            unchanged.push(file.clone());
93                        }
94                    } else {
95                        changed.push(file.clone());
96                    }
97                } else {
98                    unchanged.push(file.clone());
99                }
100            } else {
101                // New file
102                changed.push(file.clone());
103            }
104        }
105
106        // Detect deleted files
107        let deleted: Vec<PathBuf> = self
108            .files
109            .keys()
110            .filter(|f| !current_set.contains(f))
111            .cloned()
112            .collect();
113
114        FileChanges {
115            changed,
116            deleted,
117            unchanged,
118        }
119    }
120
121    /// Update the state for a file that has been indexed.
122    pub fn update_file(&mut self, path: PathBuf, content: &[u8], chunk_ids: Vec<u64>) {
123        let hash = blake3::hash(content).to_hex().to_string();
124        let mtime = std::fs::metadata(&path)
125            .and_then(|m| m.modified())
126            .unwrap_or(SystemTime::UNIX_EPOCH);
127
128        self.files.insert(
129            path,
130            FileState {
131                mtime,
132                content_hash: hash,
133                chunk_ids,
134            },
135        );
136    }
137
138    /// Remove a file from the incremental state.
139    pub fn remove_file(&mut self, path: &Path) -> Option<FileState> {
140        self.files.remove(path)
141    }
142
143    /// Get chunk IDs associated with a file.
144    pub fn chunk_ids_for_file(&self, path: &Path) -> Vec<u64> {
145        self.files
146            .get(path)
147            .map(|state| state.chunk_ids.clone())
148            .unwrap_or_default()
149    }
150
151    /// Get all chunk IDs from deleted files.
152    pub fn chunk_ids_to_remove(&self, deleted_files: &[PathBuf]) -> Vec<u64> {
153        deleted_files
154            .iter()
155            .flat_map(|path| self.chunk_ids_for_file(path))
156            .collect()
157    }
158
159    /// Merge changes: remove deleted file entries, return IDs to remove from index.
160    pub fn apply_deletions(&mut self, deleted_files: &[PathBuf]) -> Vec<u64> {
161        let mut removed_ids = Vec::new();
162        for path in deleted_files {
163            if let Some(state) = self.remove_file(path) {
164                removed_ids.extend(state.chunk_ids);
165            }
166        }
167        removed_ids
168    }
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174
175    #[test]
176    fn test_incremental_state_save_load() {
177        let dir = tempfile::tempdir().unwrap();
178        let state_path = dir.path().join("state.json");
179
180        let mut state = IncrementalState::default();
181        state.update_file(
182            PathBuf::from("/test/file.rs"),
183            b"fn main() {}",
184            vec![1, 2, 3],
185        );
186
187        state.save(&state_path).unwrap();
188
189        let loaded = IncrementalState::load(&state_path).unwrap();
190        assert_eq!(loaded.files.len(), 1);
191        assert!(loaded.files.contains_key(&PathBuf::from("/test/file.rs")));
192    }
193
194    #[test]
195    fn test_detect_new_file() {
196        let state = IncrementalState::default();
197        let changes = state.detect_changes(&[PathBuf::from("/new/file.rs")]);
198        assert_eq!(changes.changed.len(), 1);
199        assert!(changes.deleted.is_empty());
200        assert!(changes.unchanged.is_empty());
201    }
202}