1use anyhow::{anyhow, Result};
2use serde::{Deserialize, Serialize};
3use sha2::{Digest, Sha256};
4use std::collections::HashMap;
5use std::fs;
6use std::path::Path;
7use std::time::SystemTime;
8
9use crate::constants::FILE_META_DB_NAME;
10
11pub fn normalize_path(path: &Path) -> String {
18 let s = path.to_string_lossy();
19 s.trim_start_matches(r"\\?\").replace('\\', "/")
20}
21
22pub fn normalize_path_str(path: &str) -> String {
24 path.trim_start_matches(r"\\?\").replace('\\', "/")
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct FileMeta {
30 pub hash: String,
32 pub mtime: u64,
34 pub size: u64,
36 pub chunk_count: usize,
38 pub chunk_ids: Vec<u32>,
40}
41
42#[derive(Debug, Serialize, Deserialize)]
49pub struct FileMetaStore {
50 files: HashMap<String, FileMeta>,
52 pub model_name: String,
54 pub dimensions: usize,
56 pub last_full_index: Option<u64>,
58 version: u32,
60}
61
62impl FileMetaStore {
63 const CURRENT_VERSION: u32 = 1;
64 const FILENAME: &'static str = FILE_META_DB_NAME;
65
66 pub fn new(model_name: String, dimensions: usize) -> Self {
68 Self {
69 files: HashMap::new(),
70 model_name,
71 dimensions,
72 last_full_index: None,
73 version: Self::CURRENT_VERSION,
74 }
75 }
76
77 pub fn load_or_create(db_path: &Path, model_name: &str, dimensions: usize) -> Result<Self> {
79 let meta_path = db_path.join(Self::FILENAME);
80
81 if meta_path.exists() {
82 let content = fs::read_to_string(&meta_path)?;
83 let mut store: FileMetaStore = serde_json::from_str(&content)
84 .map_err(|e| anyhow!("Failed to parse file metadata: {}", e))?;
85
86 if store.model_name != model_name || store.dimensions != dimensions {
88 println!(
89 "⚠️ Model changed ({} -> {}), full re-index required",
90 store.model_name, model_name
91 );
92 store = Self::new(model_name.to_string(), dimensions);
93 }
94
95 store.migrate_paths();
98
99 Ok(store)
100 } else {
101 Ok(Self::new(model_name.to_string(), dimensions))
102 }
103 }
104
105 pub fn save(&self, db_path: &Path) -> Result<()> {
107 let meta_path = db_path.join(Self::FILENAME);
108 let content = serde_json::to_string_pretty(self)?;
109 fs::write(meta_path, content)?;
110 Ok(())
111 }
112
113 fn migrate_paths(&mut self) {
119 let old_files = std::mem::take(&mut self.files);
120 let capacity = old_files.len();
121 let mut new_files = HashMap::with_capacity(capacity);
122 let mut migrated = 0;
123
124 for (old_key, meta) in old_files {
125 let new_key = normalize_path_str(&old_key);
126 if new_key != old_key {
127 migrated += 1;
128 }
129 new_files.insert(new_key, meta);
130 }
131
132 self.files = new_files;
133
134 if migrated > 0 {
135 tracing::info!("🔄 Migrated {} file paths to normalized format", migrated);
136 }
137 }
138
139 pub fn compute_hash(path: &Path) -> Result<String> {
141 let content = fs::read(path)?;
142 let mut hasher = Sha256::new();
143 hasher.update(&content);
144 Ok(format!("{:x}", hasher.finalize()))
145 }
146
147 fn get_mtime(path: &Path) -> Result<u64> {
149 let metadata = fs::metadata(path)?;
150 let mtime = metadata.modified()?;
151 Ok(mtime.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
152 }
153
154 pub fn check_file(&self, path: &Path) -> Result<(bool, Vec<u32>)> {
157 let path_str = normalize_path(path);
158
159 let current_mtime = Self::get_mtime(path)?;
161 let current_size = fs::metadata(path)?.len();
162
163 if let Some(meta) = self.files.get(&path_str) {
164 if meta.mtime == current_mtime && meta.size == current_size {
166 return Ok((false, vec![]));
167 }
168
169 let current_hash = Self::compute_hash(path)?;
171 if meta.hash == current_hash {
172 return Ok((false, vec![]));
174 }
175
176 Ok((true, meta.chunk_ids.clone()))
178 } else {
179 Ok((true, vec![]))
181 }
182 }
183
184 pub fn update_file(&mut self, path: &Path, chunk_ids: Vec<u32>) -> Result<()> {
186 let path_str = normalize_path(path);
187 let hash = Self::compute_hash(path)?;
188 let mtime = Self::get_mtime(path)?;
189 let size = fs::metadata(path)?.len();
190
191 self.files.insert(
192 path_str,
193 FileMeta {
194 hash,
195 mtime,
196 size,
197 chunk_count: chunk_ids.len(),
198 chunk_ids,
199 },
200 );
201
202 Ok(())
203 }
204
205 pub fn remove_file(&mut self, path: &Path) -> Option<FileMeta> {
207 let path_str = normalize_path(path);
208 self.files.remove(&path_str)
209 }
210
211 #[allow(dead_code)] pub fn tracked_files(&self) -> impl Iterator<Item = &String> {
214 self.files.keys()
215 }
216
217 pub fn find_deleted_files(&self) -> Vec<(String, Vec<u32>)> {
219 self.files
220 .iter()
221 .filter(|(path, _)| !Path::new(path).exists())
222 .map(|(path, meta)| (path.clone(), meta.chunk_ids.clone()))
223 .collect()
224 }
225
226 #[allow(dead_code)] pub fn stats(&self) -> FileMetaStats {
229 let total_chunks: usize = self.files.values().map(|m| m.chunk_count).sum();
230 let total_size: u64 = self.files.values().map(|m| m.size).sum();
231
232 FileMetaStats {
233 total_files: self.files.len(),
234 total_chunks,
235 total_size_bytes: total_size,
236 }
237 }
238
239 #[allow(dead_code)] pub fn clear(&mut self) {
242 self.files.clear();
243 self.last_full_index = None;
244 }
245
246 pub fn mark_full_index(&mut self) {
248 self.last_full_index = Some(
249 SystemTime::now()
250 .duration_since(SystemTime::UNIX_EPOCH)
251 .unwrap()
252 .as_secs(),
253 );
254 }
255}
256
257#[derive(Debug)]
258#[allow(dead_code)] pub struct FileMetaStats {
260 pub total_files: usize,
261 pub total_chunks: usize,
262 pub total_size_bytes: u64,
263}
264
265impl FileMetaStats {
266 #[allow(dead_code)] pub fn total_size_mb(&self) -> f64 {
268 self.total_size_bytes as f64 / (1024.0 * 1024.0)
269 }
270}
271
272#[cfg(test)]
273mod tests {
274 use super::*;
275 use tempfile::tempdir;
276
277 #[test]
278 fn test_normalize_path_strips_unc_prefix() {
279 let path = Path::new(r"\\?\C:\WorkArea\AI\codesearch\src\main.rs");
280 assert_eq!(
281 normalize_path(path),
282 "C:/WorkArea/AI/codesearch/src/main.rs"
283 );
284 }
285
286 #[test]
287 fn test_normalize_path_converts_backslashes() {
288 let path = Path::new(r"C:\WorkArea\AI\codesearch\src\main.rs");
289 assert_eq!(
290 normalize_path(path),
291 "C:/WorkArea/AI/codesearch/src/main.rs"
292 );
293 }
294
295 #[test]
296 fn test_normalize_path_forward_slashes_unchanged() {
297 let path = Path::new("C:/WorkArea/AI/codesearch/src/main.rs");
298 let result = normalize_path(path);
299 assert!(!result.contains('\\'));
302 assert!(!result.starts_with(r"\\?\"));
303 }
304
305 #[test]
306 fn test_normalize_path_str_strips_unc() {
307 assert_eq!(normalize_path_str(r"\\?\C:\foo\bar.rs"), "C:/foo/bar.rs");
308 }
309
310 #[test]
311 fn test_normalize_path_unix_style() {
312 let path = Path::new("/home/user/project/src/main.rs");
314 assert_eq!(normalize_path(path), "/home/user/project/src/main.rs");
315 }
316
317 #[test]
318 fn test_normalize_path_mixed_separators() {
319 let path = Path::new(r"C:\Users\project/src/lib.rs");
321 assert_eq!(normalize_path(path), "C:/Users/project/src/lib.rs");
322 }
323
324 #[test]
325 fn test_normalize_path_str_mixed_separators() {
326 assert_eq!(
327 normalize_path_str(r"C:\Users\project/src/lib.rs"),
328 "C:/Users/project/src/lib.rs"
329 );
330 }
331
332 #[test]
333 fn test_normalize_path_already_normalized() {
334 let path = Path::new("C:/WorkArea/AI/codesearch/src/main.rs");
336 assert_eq!(
337 normalize_path(path),
338 "C:/WorkArea/AI/codesearch/src/main.rs"
339 );
340 }
341
342 #[test]
343 fn test_normalize_path_deeply_nested() {
344 let path = Path::new(r"\\?\C:\Very\Deep\Nested\Path\To\Some\File.rs");
346 assert_eq!(
347 normalize_path(path),
348 "C:/Very/Deep/Nested/Path/To/Some/File.rs"
349 );
350 }
351
352 #[test]
353 fn test_normalize_path_consecutive_backslashes() {
354 let path = Path::new(r"C:\\Double\\Backslashes\\file.rs");
356 assert_eq!(normalize_path(path), "C://Double//Backslashes//file.rs");
357 }
358
359 #[test]
360 fn test_migrate_paths_normalizes_keys() {
361 let mut store = FileMetaStore::new("test-model".to_string(), 384);
362 store.files.insert(
364 r"C:\WorkArea\src\main.rs".to_string(),
365 FileMeta {
366 hash: "abc123".to_string(),
367 mtime: 1000,
368 size: 100,
369 chunk_count: 2,
370 chunk_ids: vec![1, 2],
371 },
372 );
373 store.files.insert(
374 r"\\?\C:\WorkArea\src\lib.rs".to_string(),
375 FileMeta {
376 hash: "def456".to_string(),
377 mtime: 2000,
378 size: 200,
379 chunk_count: 3,
380 chunk_ids: vec![3, 4, 5],
381 },
382 );
383
384 store.migrate_paths();
385
386 assert!(store.files.contains_key("C:/WorkArea/src/main.rs"));
388 assert!(store.files.contains_key("C:/WorkArea/src/lib.rs"));
389 assert!(!store.files.contains_key(r"C:\WorkArea\src\main.rs"));
391 assert!(!store.files.contains_key(r"\\?\C:\WorkArea\src\lib.rs"));
392 }
393
394 #[test]
395 fn test_file_meta_store() {
396 let dir = tempdir().unwrap();
397 let db_path = dir.path();
398
399 let mut store = FileMetaStore::new("test-model".to_string(), 384);
400
401 let test_file = dir.path().join("test.txt");
403 fs::write(&test_file, "hello world").unwrap();
404
405 let (needs_reindex, old_chunks) = store.check_file(&test_file).unwrap();
407 assert!(needs_reindex);
408 assert!(old_chunks.is_empty());
409
410 store.update_file(&test_file, vec![1, 2, 3]).unwrap();
412
413 let (needs_reindex, _) = store.check_file(&test_file).unwrap();
415 assert!(!needs_reindex);
416
417 fs::write(&test_file, "hello world modified").unwrap();
419
420 let (needs_reindex, old_chunks) = store.check_file(&test_file).unwrap();
422 assert!(needs_reindex);
423 assert_eq!(old_chunks, vec![1, 2, 3]);
424
425 store.save(db_path).unwrap();
427 let loaded = FileMetaStore::load_or_create(db_path, "test-model", 384).unwrap();
428 assert_eq!(loaded.files.len(), 1);
429 }
430}