rusty_files/indexer/
incremental.rs

1use crate::core::config::SearchConfig;
2use crate::core::error::Result;
3use crate::core::types::ProgressCallback;
4use crate::filters::ExclusionFilter;
5use crate::indexer::builder::IndexBuilder;
6use crate::indexer::metadata::MetadataExtractor;
7use crate::storage::Database;
8use std::collections::HashSet;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11
12pub struct IncrementalIndexer {
13    database: Arc<Database>,
14    config: Arc<SearchConfig>,
15    _builder: Arc<IndexBuilder>,
16}
17
18impl IncrementalIndexer {
19    pub fn new(
20        database: Arc<Database>,
21        config: Arc<SearchConfig>,
22        exclusion_filter: Arc<ExclusionFilter>,
23    ) -> Self {
24        let builder = Arc::new(IndexBuilder::new(
25            Arc::clone(&database),
26            Arc::clone(&config),
27            exclusion_filter,
28        ));
29
30        Self {
31            database,
32            config,
33            _builder: builder,
34        }
35    }
36
37    pub fn update<P: AsRef<Path>>(
38        &self,
39        root: P,
40        progress_callback: Option<ProgressCallback>,
41    ) -> Result<UpdateStats> {
42        let root = root.as_ref();
43
44        let existing_files = self.get_indexed_files(root)?;
45        let current_files = self.scan_current_files(root)?;
46
47        let mut stats = UpdateStats::default();
48
49        for path in &current_files {
50            if !existing_files.contains(path) {
51                if let Ok(entry) = MetadataExtractor::extract(path) {
52                    self.database.insert_file(&entry)?;
53                    stats.added += 1;
54                }
55            } else if self.needs_update(path)? {
56                if let Ok(entry) = MetadataExtractor::extract(path) {
57                    self.database.insert_file(&entry)?;
58                    stats.updated += 1;
59                }
60            }
61        }
62
63        for path in &existing_files {
64            if !current_files.contains(path) {
65                self.database.delete_by_path(path)?;
66                stats.removed += 1;
67            }
68        }
69
70        if let Some(callback) = progress_callback {
71            callback(crate::core::types::Progress::new(
72                stats.total(),
73                stats.total(),
74                format!("Update complete: {} changes", stats.total()),
75            ));
76        }
77
78        Ok(stats)
79    }
80
81    pub fn update_file<P: AsRef<Path>>(&self, path: P) -> Result<bool> {
82        let path = path.as_ref();
83
84        if !path.exists() {
85            self.database.delete_by_path(path)?;
86            return Ok(true);
87        }
88
89        let entry = MetadataExtractor::extract(path)?;
90        self.database.insert_file(&entry)?;
91
92        Ok(true)
93    }
94
95    pub fn update_files(&self, paths: &[PathBuf]) -> Result<usize> {
96        let mut updated = 0;
97
98        for path in paths {
99            if self.update_file(path).is_ok() {
100                updated += 1;
101            }
102        }
103
104        Ok(updated)
105    }
106
107    fn get_indexed_files<P: AsRef<Path>>(&self, root: P) -> Result<HashSet<PathBuf>> {
108        let root = root.as_ref();
109        let mut files = HashSet::new();
110        let mut offset = 0;
111        let limit = 1000;
112
113        loop {
114            let batch = self.database.get_all_files(limit, offset)?;
115            if batch.is_empty() {
116                break;
117            }
118
119            for entry in batch {
120                if entry.path.starts_with(root) {
121                    files.insert(entry.path);
122                }
123            }
124
125            offset += limit;
126        }
127
128        Ok(files)
129    }
130
131    fn scan_current_files<P: AsRef<Path>>(&self, root: P) -> Result<HashSet<PathBuf>> {
132        use crate::indexer::walker::DirectoryWalker;
133
134        let walker = DirectoryWalker::new(
135            Arc::clone(&self.config),
136            Arc::new(ExclusionFilter::default()),
137        );
138
139        let paths = walker.walk_parallel(root)?;
140        Ok(paths.into_iter().collect())
141    }
142
143    fn needs_update<P: AsRef<Path>>(&self, path: P) -> Result<bool> {
144        let path = path.as_ref();
145
146        if let Some(existing) = self.database.find_by_path(path)? {
147            if let Some(last_modified) = existing.modified_at {
148                return MetadataExtractor::is_modified_since(path, last_modified);
149            }
150        }
151
152        Ok(true)
153    }
154
155    pub fn verify_index<P: AsRef<Path>>(&self, root: P) -> Result<VerificationStats> {
156        let root = root.as_ref();
157        let indexed_files = self.get_indexed_files(root)?;
158
159        let mut stats = VerificationStats::default();
160        stats.total_indexed = indexed_files.len();
161
162        for path in indexed_files {
163            if !path.exists() {
164                stats.missing += 1;
165            } else if self.needs_update(&path)? {
166                stats.outdated += 1;
167            } else {
168                stats.valid += 1;
169            }
170        }
171
172        Ok(stats)
173    }
174}
175
176#[derive(Debug, Default, Clone)]
177pub struct UpdateStats {
178    pub added: usize,
179    pub updated: usize,
180    pub removed: usize,
181}
182
183impl UpdateStats {
184    pub fn total(&self) -> usize {
185        self.added + self.updated + self.removed
186    }
187}
188
189#[derive(Debug, Default, Clone)]
190pub struct VerificationStats {
191    pub total_indexed: usize,
192    pub valid: usize,
193    pub outdated: usize,
194    pub missing: usize,
195}
196
197impl VerificationStats {
198    pub fn health_percentage(&self) -> f64 {
199        if self.total_indexed == 0 {
200            return 100.0;
201        }
202        (self.valid as f64 / self.total_indexed as f64) * 100.0
203    }
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209    use std::fs;
210    use tempfile::TempDir;
211
212    #[test]
213    fn test_incremental_update() {
214        let temp_dir = TempDir::new().unwrap();
215        let root = temp_dir.path();
216
217        fs::write(root.join("file1.txt"), "content1").unwrap();
218
219        let db = Arc::new(Database::in_memory(10).unwrap());
220        // Enable hidden files indexing since temp dirs often start with a dot
221        let mut config = SearchConfig::default();
222        config.index_hidden_files = true;
223        let config = Arc::new(config);
224        // Use empty exclusion filter to avoid any pattern matching issues
225        let filter = Arc::new(ExclusionFilter::from_patterns(&[]).unwrap());
226
227        let indexer = IncrementalIndexer::new(db.clone(), config, filter);
228
229        let stats = indexer.update(root, None).unwrap();
230        assert!(stats.added > 0, "Expected at least one file to be added");
231
232        fs::write(root.join("file2.txt"), "content2").unwrap();
233
234        let stats = indexer.update(root, None).unwrap();
235        assert!(stats.added > 0, "Expected at least one file to be added on second update");
236    }
237
238    #[test]
239    fn test_file_removal_detection() {
240        let temp_dir = TempDir::new().unwrap();
241        let root = temp_dir.path();
242        let file_path = root.join("file.txt");
243
244        fs::write(&file_path, "content").unwrap();
245
246        let db = Arc::new(Database::in_memory(10).unwrap());
247        // Enable hidden files indexing since temp dirs often start with a dot
248        let mut config = SearchConfig::default();
249        config.index_hidden_files = true;
250        let config = Arc::new(config);
251        // Use empty exclusion filter to avoid any pattern matching issues
252        let filter = Arc::new(ExclusionFilter::from_patterns(&[]).unwrap());
253
254        let indexer = IncrementalIndexer::new(db.clone(), config, filter);
255
256        let stats = indexer.update(root, None).unwrap();
257        assert!(stats.added > 0, "Expected at least one file to be added");
258
259        fs::remove_file(&file_path).unwrap();
260
261        let stats = indexer.update(root, None).unwrap();
262        assert!(stats.removed > 0, "Expected at least one file to be removed");
263    }
264}