rusty_files/indexer/
incremental.rs1use crate::core::config::SearchConfig;
2use crate::core::error::Result;
3use crate::core::types::ProgressCallback;
4use crate::filters::ExclusionFilter;
5use crate::indexer::builder::IndexBuilder;
6use crate::indexer::metadata::MetadataExtractor;
7use crate::storage::Database;
8use std::collections::HashSet;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11
12pub struct IncrementalIndexer {
13 database: Arc<Database>,
14 config: Arc<SearchConfig>,
15 _builder: Arc<IndexBuilder>,
16}
17
18impl IncrementalIndexer {
19 pub fn new(
20 database: Arc<Database>,
21 config: Arc<SearchConfig>,
22 exclusion_filter: Arc<ExclusionFilter>,
23 ) -> Self {
24 let builder = Arc::new(IndexBuilder::new(
25 Arc::clone(&database),
26 Arc::clone(&config),
27 exclusion_filter,
28 ));
29
30 Self {
31 database,
32 config,
33 _builder: builder,
34 }
35 }
36
37 pub fn update<P: AsRef<Path>>(
38 &self,
39 root: P,
40 progress_callback: Option<ProgressCallback>,
41 ) -> Result<UpdateStats> {
42 let root = root.as_ref();
43
44 let existing_files = self.get_indexed_files(root)?;
45 let current_files = self.scan_current_files(root)?;
46
47 let mut stats = UpdateStats::default();
48
49 for path in ¤t_files {
50 if !existing_files.contains(path) {
51 if let Ok(entry) = MetadataExtractor::extract(path) {
52 self.database.insert_file(&entry)?;
53 stats.added += 1;
54 }
55 } else if self.needs_update(path)? {
56 if let Ok(entry) = MetadataExtractor::extract(path) {
57 self.database.insert_file(&entry)?;
58 stats.updated += 1;
59 }
60 }
61 }
62
63 for path in &existing_files {
64 if !current_files.contains(path) {
65 self.database.delete_by_path(path)?;
66 stats.removed += 1;
67 }
68 }
69
70 if let Some(callback) = progress_callback {
71 callback(crate::core::types::Progress::new(
72 stats.total(),
73 stats.total(),
74 format!("Update complete: {} changes", stats.total()),
75 ));
76 }
77
78 Ok(stats)
79 }
80
81 pub fn update_file<P: AsRef<Path>>(&self, path: P) -> Result<bool> {
82 let path = path.as_ref();
83
84 if !path.exists() {
85 self.database.delete_by_path(path)?;
86 return Ok(true);
87 }
88
89 let entry = MetadataExtractor::extract(path)?;
90 self.database.insert_file(&entry)?;
91
92 Ok(true)
93 }
94
95 pub fn update_files(&self, paths: &[PathBuf]) -> Result<usize> {
96 let mut updated = 0;
97
98 for path in paths {
99 if self.update_file(path).is_ok() {
100 updated += 1;
101 }
102 }
103
104 Ok(updated)
105 }
106
107 fn get_indexed_files<P: AsRef<Path>>(&self, root: P) -> Result<HashSet<PathBuf>> {
108 let root = root.as_ref();
109 let mut files = HashSet::new();
110 let mut offset = 0;
111 let limit = 1000;
112
113 loop {
114 let batch = self.database.get_all_files(limit, offset)?;
115 if batch.is_empty() {
116 break;
117 }
118
119 for entry in batch {
120 if entry.path.starts_with(root) {
121 files.insert(entry.path);
122 }
123 }
124
125 offset += limit;
126 }
127
128 Ok(files)
129 }
130
131 fn scan_current_files<P: AsRef<Path>>(&self, root: P) -> Result<HashSet<PathBuf>> {
132 use crate::indexer::walker::DirectoryWalker;
133
134 let walker = DirectoryWalker::new(
135 Arc::clone(&self.config),
136 Arc::new(ExclusionFilter::default()),
137 );
138
139 let paths = walker.walk_parallel(root)?;
140 Ok(paths.into_iter().collect())
141 }
142
143 fn needs_update<P: AsRef<Path>>(&self, path: P) -> Result<bool> {
144 let path = path.as_ref();
145
146 if let Some(existing) = self.database.find_by_path(path)? {
147 if let Some(last_modified) = existing.modified_at {
148 return MetadataExtractor::is_modified_since(path, last_modified);
149 }
150 }
151
152 Ok(true)
153 }
154
155 pub fn verify_index<P: AsRef<Path>>(&self, root: P) -> Result<VerificationStats> {
156 let root = root.as_ref();
157 let indexed_files = self.get_indexed_files(root)?;
158
159 let mut stats = VerificationStats::default();
160 stats.total_indexed = indexed_files.len();
161
162 for path in indexed_files {
163 if !path.exists() {
164 stats.missing += 1;
165 } else if self.needs_update(&path)? {
166 stats.outdated += 1;
167 } else {
168 stats.valid += 1;
169 }
170 }
171
172 Ok(stats)
173 }
174}
175
176#[derive(Debug, Default, Clone)]
177pub struct UpdateStats {
178 pub added: usize,
179 pub updated: usize,
180 pub removed: usize,
181}
182
183impl UpdateStats {
184 pub fn total(&self) -> usize {
185 self.added + self.updated + self.removed
186 }
187}
188
189#[derive(Debug, Default, Clone)]
190pub struct VerificationStats {
191 pub total_indexed: usize,
192 pub valid: usize,
193 pub outdated: usize,
194 pub missing: usize,
195}
196
197impl VerificationStats {
198 pub fn health_percentage(&self) -> f64 {
199 if self.total_indexed == 0 {
200 return 100.0;
201 }
202 (self.valid as f64 / self.total_indexed as f64) * 100.0
203 }
204}
205
206#[cfg(test)]
207mod tests {
208 use super::*;
209 use std::fs;
210 use tempfile::TempDir;
211
212 #[test]
213 fn test_incremental_update() {
214 let temp_dir = TempDir::new().unwrap();
215 let root = temp_dir.path();
216
217 fs::write(root.join("file1.txt"), "content1").unwrap();
218
219 let db = Arc::new(Database::in_memory(10).unwrap());
220 let mut config = SearchConfig::default();
222 config.index_hidden_files = true;
223 let config = Arc::new(config);
224 let filter = Arc::new(ExclusionFilter::from_patterns(&[]).unwrap());
226
227 let indexer = IncrementalIndexer::new(db.clone(), config, filter);
228
229 let stats = indexer.update(root, None).unwrap();
230 assert!(stats.added > 0, "Expected at least one file to be added");
231
232 fs::write(root.join("file2.txt"), "content2").unwrap();
233
234 let stats = indexer.update(root, None).unwrap();
235 assert!(stats.added > 0, "Expected at least one file to be added on second update");
236 }
237
238 #[test]
239 fn test_file_removal_detection() {
240 let temp_dir = TempDir::new().unwrap();
241 let root = temp_dir.path();
242 let file_path = root.join("file.txt");
243
244 fs::write(&file_path, "content").unwrap();
245
246 let db = Arc::new(Database::in_memory(10).unwrap());
247 let mut config = SearchConfig::default();
249 config.index_hidden_files = true;
250 let config = Arc::new(config);
251 let filter = Arc::new(ExclusionFilter::from_patterns(&[]).unwrap());
253
254 let indexer = IncrementalIndexer::new(db.clone(), config, filter);
255
256 let stats = indexer.update(root, None).unwrap();
257 assert!(stats.added > 0, "Expected at least one file to be added");
258
259 fs::remove_file(&file_path).unwrap();
260
261 let stats = indexer.update(root, None).unwrap();
262 assert!(stats.removed > 0, "Expected at least one file to be removed");
263 }
264}