1use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use std::fs;
8use std::path::{Path, PathBuf};
9use std::time::SystemTime;
10use thiserror::Error;
11
12use crate::tokenizer::TokenCounts;
13use crate::types::Symbol;
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct CachedFile {
18 pub path: String,
20 pub mtime: u64,
22 pub size: u64,
24 pub hash: u64,
26 pub tokens: TokenCounts,
28 pub symbols: Vec<CachedSymbol>,
30 pub symbols_extracted: bool,
32 pub language: Option<String>,
34 pub lines: usize,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct CachedSymbol {
41 pub name: String,
42 pub kind: String,
43 pub start_line: u32,
44 pub end_line: u32,
45 pub signature: Option<String>,
46}
47
48impl From<&Symbol> for CachedSymbol {
49 fn from(s: &Symbol) -> Self {
50 Self {
51 name: s.name.clone(),
52 kind: s.kind.name().to_owned(),
53 start_line: s.start_line,
54 end_line: s.end_line,
55 signature: s.signature.clone(),
56 }
57 }
58}
59
60impl From<&CachedSymbol> for Symbol {
61 fn from(s: &CachedSymbol) -> Self {
62 use crate::types::{SymbolKind, Visibility};
63 Self {
64 name: s.name.clone(),
65 kind: SymbolKind::from_str(&s.kind).unwrap_or(SymbolKind::Variable),
66 start_line: s.start_line,
67 end_line: s.end_line,
68 signature: s.signature.clone(),
69 docstring: None,
70 visibility: Visibility::Public,
71 references: 0,
72 importance: 0.5,
73 parent: None,
74 calls: Vec::new(),
75 extends: None,
76 implements: Vec::new(),
77 }
78 }
79}
80
81#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct RepoCache {
84 pub version: u32,
86 pub root_path: String,
88 pub created_at: u64,
90 pub updated_at: u64,
92 pub files: HashMap<String, CachedFile>,
94 pub total_tokens: TokenCounts,
96 pub external_deps: Vec<String>,
98}
99
100impl RepoCache {
101 pub const VERSION: u32 = 2;
103
104 pub fn new(root_path: &str) -> Self {
106 let now = SystemTime::now()
107 .duration_since(SystemTime::UNIX_EPOCH)
108 .map(|d| d.as_secs())
109 .unwrap_or(0);
110
111 Self {
112 version: Self::VERSION,
113 root_path: root_path.to_owned(),
114 created_at: now,
115 updated_at: now,
116 files: HashMap::new(),
117 total_tokens: TokenCounts::default(),
118 external_deps: Vec::new(),
119 }
120 }
121
122 pub fn load(cache_path: &Path) -> Result<Self, CacheError> {
124 let content = fs::read(cache_path).map_err(|e| CacheError::IoError(e.to_string()))?;
125
126 let cache: Self = bincode::deserialize(&content)
127 .map_err(|e| CacheError::DeserializeError(e.to_string()))?;
128
129 if cache.version != Self::VERSION {
131 return Err(CacheError::VersionMismatch {
132 expected: Self::VERSION,
133 found: cache.version,
134 });
135 }
136
137 Ok(cache)
138 }
139
140 pub fn save(&self, cache_path: &Path) -> Result<(), CacheError> {
142 if let Some(parent) = cache_path.parent() {
144 fs::create_dir_all(parent).map_err(|e| CacheError::IoError(e.to_string()))?;
145 }
146
147 let content =
148 bincode::serialize(self).map_err(|e| CacheError::SerializeError(e.to_string()))?;
149
150 fs::write(cache_path, content).map_err(|e| CacheError::IoError(e.to_string()))?;
151
152 Ok(())
153 }
154
155 pub fn default_cache_path(repo_path: &Path) -> PathBuf {
157 repo_path.join(".infiniloom/cache/repo.cache")
158 }
159
160 pub fn needs_rescan(&self, path: &str, current_mtime: u64, current_size: u64) -> bool {
162 match self.files.get(path) {
163 Some(cached) => cached.mtime != current_mtime || cached.size != current_size,
164 None => true,
165 }
166 }
167
168 pub fn needs_rescan_with_hash(
171 &self,
172 path: &str,
173 current_mtime: u64,
174 current_size: u64,
175 current_hash: u64,
176 ) -> bool {
177 match self.files.get(path) {
178 Some(cached) => {
179 cached.mtime != current_mtime
180 || cached.size != current_size
181 || (cached.hash != 0 && current_hash != 0 && cached.hash != current_hash)
182 },
183 None => true,
184 }
185 }
186
187 pub fn get(&self, path: &str) -> Option<&CachedFile> {
189 self.files.get(path)
190 }
191
192 pub fn update_file(&mut self, file: CachedFile) {
194 self.files.insert(file.path.clone(), file);
195 self.updated_at = SystemTime::now()
196 .duration_since(SystemTime::UNIX_EPOCH)
197 .map(|d| d.as_secs())
198 .unwrap_or(0);
199 }
200
201 pub fn remove_file(&mut self, path: &str) {
203 self.files.remove(path);
204 }
205
206 pub fn find_deleted_files(&self, current_files: &[&str]) -> Vec<String> {
208 let current_set: std::collections::HashSet<&str> = current_files.iter().copied().collect();
209 self.files
210 .keys()
211 .filter(|p| !current_set.contains(p.as_str()))
212 .cloned()
213 .collect()
214 }
215
216 pub fn recalculate_totals(&mut self) {
218 self.total_tokens = self.files.values().map(|f| f.tokens).sum();
219 }
220
221 pub fn stats(&self) -> CacheStats {
223 CacheStats {
224 file_count: self.files.len(),
225 total_tokens: self.total_tokens,
226 total_bytes: self.files.values().map(|f| f.size).sum(),
227 age_seconds: SystemTime::now()
228 .duration_since(SystemTime::UNIX_EPOCH)
229 .map(|d| d.as_secs())
230 .unwrap_or(0)
231 .saturating_sub(self.updated_at),
232 }
233 }
234}
235
236#[derive(Debug, Clone)]
238pub struct CacheStats {
239 pub file_count: usize,
240 pub total_tokens: TokenCounts,
241 pub total_bytes: u64,
242 pub age_seconds: u64,
243}
244
245#[derive(Debug, Error)]
247pub enum CacheError {
248 #[error("I/O error: {0}")]
249 IoError(String),
250 #[error("Serialization error: {0}")]
251 SerializeError(String),
252 #[error("Deserialization error: {0}")]
253 DeserializeError(String),
254 #[error("Cache version mismatch: expected {expected}, found {found}")]
255 VersionMismatch { expected: u32, found: u32 },
256}
257
258pub struct IncrementalScanner {
260 cache: RepoCache,
261 cache_path: PathBuf,
262 dirty: bool,
263}
264
265impl IncrementalScanner {
266 pub fn new(repo_path: &Path) -> Self {
268 let cache_path = RepoCache::default_cache_path(repo_path);
269
270 let cache = RepoCache::load(&cache_path)
271 .unwrap_or_else(|_| RepoCache::new(&repo_path.to_string_lossy()));
272
273 Self { cache, cache_path, dirty: false }
274 }
275
276 pub fn with_cache_path(repo_path: &Path, cache_path: PathBuf) -> Self {
278 let cache = RepoCache::load(&cache_path)
279 .unwrap_or_else(|_| RepoCache::new(&repo_path.to_string_lossy()));
280
281 Self { cache, cache_path, dirty: false }
282 }
283
284 pub fn needs_rescan(&self, path: &Path) -> bool {
286 let metadata = match path.metadata() {
287 Ok(m) => m,
288 Err(_) => return true,
289 };
290
291 let mtime = metadata
292 .modified()
293 .ok()
294 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
295 .map(|d| d.as_secs())
296 .unwrap_or(0);
297
298 let relative_path = path.to_string_lossy();
299 self.cache
300 .needs_rescan(&relative_path, mtime, metadata.len())
301 }
302
303 pub fn needs_rescan_with_content(&self, path: &Path, content: &[u8]) -> bool {
306 let metadata = match path.metadata() {
307 Ok(m) => m,
308 Err(_) => return true,
309 };
310
311 let mtime = metadata
312 .modified()
313 .ok()
314 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
315 .map(|d| d.as_secs())
316 .unwrap_or(0);
317
318 let content_hash = hash_content(content);
319 let relative_path = path.to_string_lossy();
320 self.cache
321 .needs_rescan_with_hash(&relative_path, mtime, metadata.len(), content_hash)
322 }
323
324 pub fn get_cached(&self, path: &str) -> Option<&CachedFile> {
326 self.cache.files.get(path)
327 }
328
329 pub fn update(&mut self, file: CachedFile) {
331 self.cache.update_file(file);
332 self.dirty = true;
333 }
334
335 pub fn remove(&mut self, path: &str) {
337 self.cache.remove_file(path);
338 self.dirty = true;
339 }
340
341 pub fn save(&mut self) -> Result<(), CacheError> {
343 if self.dirty {
344 self.cache.recalculate_totals();
345 self.cache.save(&self.cache_path)?;
346 self.dirty = false;
347 }
348 Ok(())
349 }
350
351 pub fn force_save(&mut self) -> Result<(), CacheError> {
353 self.cache.recalculate_totals();
354 self.cache.save(&self.cache_path)?;
355 self.dirty = false;
356 Ok(())
357 }
358
359 pub fn stats(&self) -> CacheStats {
361 self.cache.stats()
362 }
363
364 pub fn clear(&mut self) {
366 self.cache = RepoCache::new(&self.cache.root_path);
367 self.dirty = true;
368 }
369
370 pub fn get_changed_files<'a>(
372 &self,
373 current_files: &'a [(PathBuf, u64, u64)],
374 ) -> Vec<&'a PathBuf> {
375 current_files
376 .iter()
377 .filter(|(path, mtime, size)| {
378 let relative = path.to_string_lossy();
379 self.cache.needs_rescan(&relative, *mtime, *size)
380 })
381 .map(|(path, _, _)| path)
382 .collect()
383 }
384}
385
386#[derive(Debug, Clone)]
388pub enum FileChange {
389 Created(PathBuf),
390 Modified(PathBuf),
391 Deleted(PathBuf),
392 Renamed { from: PathBuf, to: PathBuf },
393}
394
395#[cfg(feature = "watch")]
397pub mod watcher {
398 use super::*;
399 use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
400 use std::sync::mpsc::{channel, Receiver};
401
402 pub struct FileWatcher {
404 watcher: RecommendedWatcher,
405 receiver: Receiver<Result<Event, notify::Error>>,
406 root_path: PathBuf,
407 }
408
409 impl FileWatcher {
410 pub fn new(path: &Path) -> Result<Self, notify::Error> {
412 let (tx, rx) = channel();
413
414 let watcher = RecommendedWatcher::new(
415 move |res| {
416 let _ = tx.send(res);
417 },
418 Config::default(),
419 )?;
420
421 let mut fw = Self { watcher, receiver: rx, root_path: path.to_path_buf() };
422
423 fw.watcher.watch(path, RecursiveMode::Recursive)?;
424
425 Ok(fw)
426 }
427
428 pub fn try_next(&self) -> Option<FileChange> {
430 match self.receiver.try_recv() {
431 Ok(Ok(event)) => self.event_to_change(event),
432 _ => None,
433 }
434 }
435
436 pub fn next(&self) -> Option<FileChange> {
438 match self.receiver.recv() {
439 Ok(Ok(event)) => self.event_to_change(event),
440 _ => None,
441 }
442 }
443
444 fn event_to_change(&self, event: Event) -> Option<FileChange> {
446 let path = event.paths.first()?.clone();
447
448 match event.kind {
449 EventKind::Create(_) => Some(FileChange::Created(path)),
450 EventKind::Modify(_) => Some(FileChange::Modified(path)),
451 EventKind::Remove(_) => Some(FileChange::Deleted(path)),
452 _ => None,
453 }
454 }
455
456 pub fn stop(mut self) -> Result<(), notify::Error> {
458 self.watcher.unwatch(&self.root_path)
459 }
460 }
461}
462
463pub fn hash_content(content: &[u8]) -> u64 {
465 use std::collections::hash_map::DefaultHasher;
466 use std::hash::{Hash, Hasher};
467
468 let mut hasher = DefaultHasher::new();
469 content.hash(&mut hasher);
470 hasher.finish()
471}
472
473pub fn get_mtime(path: &Path) -> Option<u64> {
475 path.metadata()
476 .ok()?
477 .modified()
478 .ok()?
479 .duration_since(SystemTime::UNIX_EPOCH)
480 .ok()
481 .map(|d| d.as_secs())
482}
483
484#[cfg(test)]
485#[allow(clippy::str_to_string)]
486mod tests {
487 use super::*;
488 use tempfile::TempDir;
489
490 #[test]
491 fn test_cache_create_save_load() {
492 let temp = TempDir::new().unwrap();
493 let cache_path = temp.path().join("test.cache");
494
495 let mut cache = RepoCache::new("/test/repo");
496 cache.files.insert(
497 "test.py".to_string(),
498 CachedFile {
499 path: "test.py".to_string(),
500 mtime: 12345,
501 size: 100,
502 hash: 0,
503 tokens: TokenCounts {
504 o200k: 45,
505 cl100k: 48,
506 claude: 50,
507 gemini: 46,
508 llama: 50,
509 mistral: 50,
510 deepseek: 50,
511 qwen: 50,
512 cohere: 48,
513 grok: 50,
514 },
515 symbols: vec![],
516 symbols_extracted: false,
517 language: Some("python".to_string()),
518 lines: 10,
519 },
520 );
521
522 cache.save(&cache_path).unwrap();
523
524 let loaded = RepoCache::load(&cache_path).unwrap();
525 assert_eq!(loaded.files.len(), 1);
526 assert!(loaded.files.contains_key("test.py"));
527 }
528
529 #[test]
530 fn test_needs_rescan() {
531 let cache = RepoCache::new("/test");
532 assert!(cache.needs_rescan("new_file.py", 0, 0));
533
534 let mut cache = RepoCache::new("/test");
535 cache.files.insert(
536 "existing.py".to_string(),
537 CachedFile {
538 path: "existing.py".to_string(),
539 mtime: 1000,
540 size: 500,
541 hash: 0,
542 tokens: TokenCounts::default(),
543 symbols: vec![],
544 symbols_extracted: false,
545 language: None,
546 lines: 0,
547 },
548 );
549
550 assert!(!cache.needs_rescan("existing.py", 1000, 500));
551 assert!(cache.needs_rescan("existing.py", 2000, 500)); assert!(cache.needs_rescan("existing.py", 1000, 600)); }
554
555 #[test]
556 fn test_incremental_scanner() {
557 let temp = TempDir::new().unwrap();
558
559 let mut scanner = IncrementalScanner::new(temp.path());
560 assert!(scanner.needs_rescan(&temp.path().join("test.py")));
561
562 scanner.update(CachedFile {
563 path: "test.py".to_string(),
564 mtime: 1000,
565 size: 100,
566 hash: 0,
567 tokens: TokenCounts::default(),
568 symbols: vec![],
569 symbols_extracted: false,
570 language: Some("python".to_string()),
571 lines: 5,
572 });
573
574 assert!(scanner.get_cached("test.py").is_some());
575 }
576
577 #[test]
578 fn test_hash_content() {
579 let h1 = hash_content(b"hello world");
580 let h2 = hash_content(b"hello world");
581 let h3 = hash_content(b"different");
582
583 assert_eq!(h1, h2);
584 assert_ne!(h1, h3);
585 }
586
587 #[test]
588 fn test_needs_rescan_with_hash() {
589 let mut cache = RepoCache::new("/test");
590 let original_hash = hash_content(b"original content");
591 let modified_hash = hash_content(b"modified content");
592
593 cache.files.insert(
594 "file.py".to_string(),
595 CachedFile {
596 path: "file.py".to_string(),
597 mtime: 1000,
598 size: 500,
599 hash: original_hash,
600 tokens: TokenCounts::default(),
601 symbols: vec![],
602 symbols_extracted: false,
603 language: None,
604 lines: 0,
605 },
606 );
607
608 assert!(!cache.needs_rescan_with_hash("file.py", 1000, 500, original_hash));
610
611 assert!(cache.needs_rescan_with_hash("file.py", 1000, 500, modified_hash));
613
614 assert!(cache.needs_rescan_with_hash("file.py", 2000, 500, original_hash));
616
617 assert!(!cache.needs_rescan_with_hash("file.py", 1000, 500, 0));
619 }
620}