1use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use std::fs;
8use std::path::{Path, PathBuf};
9use std::time::SystemTime;
10use thiserror::Error;
11
12use crate::bincode_safe::{deserialize_with_limit, serialize};
13use crate::tokenizer::TokenCounts;
14use crate::types::Symbol;
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct CachedFile {
19 pub path: String,
21 pub mtime: u64,
23 pub size: u64,
25 pub hash: u64,
27 pub tokens: TokenCounts,
29 pub symbols: Vec<CachedSymbol>,
31 pub symbols_extracted: bool,
33 pub language: Option<String>,
35 pub lines: usize,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct CachedSymbol {
42 pub name: String,
43 pub kind: String,
44 pub start_line: u32,
45 pub end_line: u32,
46 pub signature: Option<String>,
47}
48
49impl From<&Symbol> for CachedSymbol {
50 fn from(s: &Symbol) -> Self {
51 Self {
52 name: s.name.clone(),
53 kind: s.kind.name().to_owned(),
54 start_line: s.start_line,
55 end_line: s.end_line,
56 signature: s.signature.clone(),
57 }
58 }
59}
60
61impl From<&CachedSymbol> for Symbol {
62 fn from(s: &CachedSymbol) -> Self {
63 use crate::types::{SymbolKind, Visibility};
64 Self {
65 name: s.name.clone(),
66 kind: SymbolKind::from_str(&s.kind).unwrap_or(SymbolKind::Variable),
67 start_line: s.start_line,
68 end_line: s.end_line,
69 signature: s.signature.clone(),
70 docstring: None,
71 visibility: Visibility::Public,
72 references: 0,
73 importance: 0.5,
74 parent: None,
75 calls: Vec::new(),
76 extends: None,
77 implements: Vec::new(),
78 }
79 }
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct RepoCache {
85 pub version: u32,
87 pub root_path: String,
89 pub created_at: u64,
91 pub updated_at: u64,
93 pub files: HashMap<String, CachedFile>,
95 pub total_tokens: TokenCounts,
97 pub external_deps: Vec<String>,
99}
100
101impl RepoCache {
102 pub const VERSION: u32 = 4;
104
105 pub fn new(root_path: &str) -> Self {
107 let now = SystemTime::now()
108 .duration_since(SystemTime::UNIX_EPOCH)
109 .map(|d| d.as_secs())
110 .unwrap_or(0);
111
112 Self {
113 version: Self::VERSION,
114 root_path: root_path.to_owned(),
115 created_at: now,
116 updated_at: now,
117 files: HashMap::new(),
118 total_tokens: TokenCounts::default(),
119 external_deps: Vec::new(),
120 }
121 }
122
123 pub fn load(cache_path: &Path) -> Result<Self, CacheError> {
125 let content = fs::read(cache_path).map_err(|e| CacheError::IoError(e.to_string()))?;
126
127 let cache: Self = deserialize_with_limit(&content)
128 .map_err(|e| CacheError::DeserializeError(e.to_string()))?;
129
130 if cache.version != Self::VERSION {
132 return Err(CacheError::VersionMismatch {
133 expected: Self::VERSION,
134 found: cache.version,
135 });
136 }
137
138 Ok(cache)
139 }
140
141 pub fn save(&self, cache_path: &Path) -> Result<(), CacheError> {
143 if let Some(parent) = cache_path.parent() {
145 fs::create_dir_all(parent).map_err(|e| CacheError::IoError(e.to_string()))?;
146 }
147
148 let content = serialize(self).map_err(|e| CacheError::SerializeError(e.to_string()))?;
149
150 let tmp_path = cache_path.with_extension("tmp");
152 fs::write(&tmp_path, content).map_err(|e| CacheError::IoError(e.to_string()))?;
153 fs::rename(&tmp_path, cache_path).map_err(|e| CacheError::IoError(e.to_string()))?;
154
155 Ok(())
156 }
157
158 pub fn default_cache_path(repo_path: &Path) -> PathBuf {
160 repo_path.join(".infiniloom/cache/repo.cache")
161 }
162
163 pub fn needs_rescan(&self, path: &str, current_mtime: u64, current_size: u64) -> bool {
165 match self.files.get(path) {
166 Some(cached) => cached.mtime != current_mtime || cached.size != current_size,
167 None => true,
168 }
169 }
170
171 pub fn needs_rescan_with_hash(
174 &self,
175 path: &str,
176 current_mtime: u64,
177 current_size: u64,
178 current_hash: u64,
179 ) -> bool {
180 match self.files.get(path) {
181 Some(cached) => {
182 cached.mtime != current_mtime
183 || cached.size != current_size
184 || (cached.hash != 0 && current_hash != 0 && cached.hash != current_hash)
185 },
186 None => true,
187 }
188 }
189
190 pub fn get(&self, path: &str) -> Option<&CachedFile> {
192 self.files.get(path)
193 }
194
195 pub fn update_file(&mut self, file: CachedFile) {
197 self.files.insert(file.path.clone(), file);
198 self.updated_at = SystemTime::now()
199 .duration_since(SystemTime::UNIX_EPOCH)
200 .map(|d| d.as_secs())
201 .unwrap_or(0);
202 }
203
204 pub fn remove_file(&mut self, path: &str) {
206 self.files.remove(path);
207 }
208
209 pub fn find_deleted_files(&self, current_files: &[&str]) -> Vec<String> {
211 let current_set: std::collections::HashSet<&str> = current_files.iter().copied().collect();
212 self.files
213 .keys()
214 .filter(|p| !current_set.contains(p.as_str()))
215 .cloned()
216 .collect()
217 }
218
219 pub fn recalculate_totals(&mut self) {
221 self.total_tokens = self.files.values().map(|f| f.tokens).sum();
222 }
223
224 pub fn stats(&self) -> CacheStats {
226 CacheStats {
227 file_count: self.files.len(),
228 total_tokens: self.total_tokens,
229 total_bytes: self.files.values().map(|f| f.size).sum(),
230 age_seconds: SystemTime::now()
231 .duration_since(SystemTime::UNIX_EPOCH)
232 .map(|d| d.as_secs())
233 .unwrap_or(0)
234 .saturating_sub(self.updated_at),
235 }
236 }
237}
238
239#[derive(Debug, Clone)]
241pub struct CacheStats {
242 pub file_count: usize,
243 pub total_tokens: TokenCounts,
244 pub total_bytes: u64,
245 pub age_seconds: u64,
246}
247
248#[derive(Debug, Error)]
250pub enum CacheError {
251 #[error("I/O error: {0}")]
252 IoError(String),
253 #[error("Serialization error: {0}")]
254 SerializeError(String),
255 #[error("Deserialization error: {0}. Try clearing the cache by deleting `.infiniloom/cache/` and re-running.")]
256 DeserializeError(String),
257 #[error("Cache version mismatch: expected {expected}, found {found}")]
258 VersionMismatch { expected: u32, found: u32 },
259}
260
261pub struct IncrementalScanner {
263 cache: RepoCache,
264 cache_path: PathBuf,
265 dirty: bool,
266}
267
268impl IncrementalScanner {
269 pub fn new(repo_path: &Path) -> Self {
271 let cache_path = RepoCache::default_cache_path(repo_path);
272
273 let cache = RepoCache::load(&cache_path)
274 .unwrap_or_else(|_| RepoCache::new(&repo_path.to_string_lossy()));
275
276 Self { cache, cache_path, dirty: false }
277 }
278
279 pub fn with_cache_path(repo_path: &Path, cache_path: PathBuf) -> Self {
281 let cache = RepoCache::load(&cache_path)
282 .unwrap_or_else(|_| RepoCache::new(&repo_path.to_string_lossy()));
283
284 Self { cache, cache_path, dirty: false }
285 }
286
287 pub fn needs_rescan(&self, path: &Path) -> bool {
289 let metadata = match path.metadata() {
290 Ok(m) => m,
291 Err(_) => return true,
292 };
293
294 let mtime = metadata
295 .modified()
296 .ok()
297 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
298 .map_or(0, |d| d.as_secs());
299
300 let relative_path = path.to_string_lossy();
301 self.cache
302 .needs_rescan(&relative_path, mtime, metadata.len())
303 }
304
305 pub fn needs_rescan_with_content(&self, path: &Path, content: &[u8]) -> bool {
308 let metadata = match path.metadata() {
309 Ok(m) => m,
310 Err(_) => return true,
311 };
312
313 let mtime = metadata
314 .modified()
315 .ok()
316 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
317 .map_or(0, |d| d.as_secs());
318
319 let content_hash = hash_content(content);
320 let relative_path = path.to_string_lossy();
321 self.cache
322 .needs_rescan_with_hash(&relative_path, mtime, metadata.len(), content_hash)
323 }
324
325 pub fn get_cached(&self, path: &str) -> Option<&CachedFile> {
327 self.cache.files.get(path)
328 }
329
330 pub fn update(&mut self, file: CachedFile) {
332 self.cache.update_file(file);
333 self.dirty = true;
334 }
335
336 pub fn remove(&mut self, path: &str) {
338 self.cache.remove_file(path);
339 self.dirty = true;
340 }
341
342 pub fn save(&mut self) -> Result<(), CacheError> {
344 if self.dirty {
345 self.cache.recalculate_totals();
346 self.cache.save(&self.cache_path)?;
347 self.dirty = false;
348 }
349 Ok(())
350 }
351
352 pub fn force_save(&mut self) -> Result<(), CacheError> {
354 self.cache.recalculate_totals();
355 self.cache.save(&self.cache_path)?;
356 self.dirty = false;
357 Ok(())
358 }
359
360 pub fn stats(&self) -> CacheStats {
362 self.cache.stats()
363 }
364
365 pub fn clear(&mut self) {
367 self.cache = RepoCache::new(&self.cache.root_path);
368 self.dirty = true;
369 }
370
371 pub fn get_changed_files<'a>(
373 &self,
374 current_files: &'a [(PathBuf, u64, u64)],
375 ) -> Vec<&'a PathBuf> {
376 current_files
377 .iter()
378 .filter(|(path, mtime, size)| {
379 let relative = path.to_string_lossy();
380 self.cache.needs_rescan(&relative, *mtime, *size)
381 })
382 .map(|(path, _, _)| path)
383 .collect()
384 }
385}
386
387#[derive(Debug, Clone)]
389pub enum FileChange {
390 Created(PathBuf),
391 Modified(PathBuf),
392 Deleted(PathBuf),
393 Renamed { from: PathBuf, to: PathBuf },
394}
395
396#[cfg(feature = "watch")]
398pub mod watcher {
399 use super::*;
400 use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
401 use std::sync::mpsc::{channel, Receiver};
402
403 pub struct FileWatcher {
405 watcher: RecommendedWatcher,
406 receiver: Receiver<Result<Event, notify::Error>>,
407 root_path: PathBuf,
408 }
409
410 impl FileWatcher {
411 pub fn new(path: &Path) -> Result<Self, notify::Error> {
413 let (tx, rx) = channel();
414
415 let watcher = RecommendedWatcher::new(
416 move |res| {
417 let _ = tx.send(res);
418 },
419 Config::default(),
420 )?;
421
422 let mut fw = Self { watcher, receiver: rx, root_path: path.to_path_buf() };
423
424 fw.watcher.watch(path, RecursiveMode::Recursive)?;
425
426 Ok(fw)
427 }
428
429 pub fn try_next(&self) -> Option<FileChange> {
431 match self.receiver.try_recv() {
432 Ok(Ok(event)) => self.event_to_change(event),
433 _ => None,
434 }
435 }
436
437 pub fn next(&self) -> Option<FileChange> {
439 match self.receiver.recv() {
440 Ok(Ok(event)) => self.event_to_change(event),
441 _ => None,
442 }
443 }
444
445 fn event_to_change(&self, event: Event) -> Option<FileChange> {
447 let path = event.paths.first()?.clone();
448
449 match event.kind {
450 EventKind::Create(_) => Some(FileChange::Created(path)),
451 EventKind::Modify(_) => Some(FileChange::Modified(path)),
452 EventKind::Remove(_) => Some(FileChange::Deleted(path)),
453 _ => None,
454 }
455 }
456
457 pub fn stop(mut self) -> Result<(), notify::Error> {
459 self.watcher.unwatch(&self.root_path)
460 }
461 }
462}
463
464pub fn hash_content(content: &[u8]) -> u64 {
470 let hash = blake3::hash(content);
471 let bytes = hash.as_bytes();
472 u64::from_le_bytes([
474 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
475 ])
476}
477
478pub fn get_mtime(path: &Path) -> Option<u64> {
480 path.metadata()
481 .ok()?
482 .modified()
483 .ok()?
484 .duration_since(SystemTime::UNIX_EPOCH)
485 .ok()
486 .map(|d| d.as_secs())
487}
488
489#[cfg(test)]
490#[allow(clippy::str_to_string)]
491mod tests {
492 use super::*;
493 use tempfile::TempDir;
494
495 #[test]
496 fn test_cache_create_save_load() {
497 let temp = TempDir::new().unwrap();
498 let cache_path = temp.path().join("test.cache");
499
500 let mut cache = RepoCache::new("/test/repo");
501 cache.files.insert(
502 "test.py".to_string(),
503 CachedFile {
504 path: "test.py".to_string(),
505 mtime: 12345,
506 size: 100,
507 hash: 0,
508 tokens: TokenCounts {
509 o200k: 45,
510 cl100k: 48,
511 claude: 50,
512 gemini: 46,
513 llama: 50,
514 mistral: 50,
515 deepseek: 50,
516 qwen: 50,
517 cohere: 48,
518 grok: 50,
519 },
520 symbols: vec![],
521 symbols_extracted: false,
522 language: Some("python".to_string()),
523 lines: 10,
524 },
525 );
526
527 cache.save(&cache_path).unwrap();
528
529 let loaded = RepoCache::load(&cache_path).unwrap();
530 assert_eq!(loaded.files.len(), 1);
531 assert!(loaded.files.contains_key("test.py"));
532 }
533
534 #[test]
535 fn test_needs_rescan() {
536 let cache = RepoCache::new("/test");
537 assert!(cache.needs_rescan("new_file.py", 0, 0));
538
539 let mut cache = RepoCache::new("/test");
540 cache.files.insert(
541 "existing.py".to_string(),
542 CachedFile {
543 path: "existing.py".to_string(),
544 mtime: 1000,
545 size: 500,
546 hash: 0,
547 tokens: TokenCounts::default(),
548 symbols: vec![],
549 symbols_extracted: false,
550 language: None,
551 lines: 0,
552 },
553 );
554
555 assert!(!cache.needs_rescan("existing.py", 1000, 500));
556 assert!(cache.needs_rescan("existing.py", 2000, 500)); assert!(cache.needs_rescan("existing.py", 1000, 600)); }
559
560 #[test]
561 fn test_incremental_scanner() {
562 let temp = TempDir::new().unwrap();
563
564 let mut scanner = IncrementalScanner::new(temp.path());
565 assert!(scanner.needs_rescan(&temp.path().join("test.py")));
566
567 scanner.update(CachedFile {
568 path: "test.py".to_string(),
569 mtime: 1000,
570 size: 100,
571 hash: 0,
572 tokens: TokenCounts::default(),
573 symbols: vec![],
574 symbols_extracted: false,
575 language: Some("python".to_string()),
576 lines: 5,
577 });
578
579 assert!(scanner.get_cached("test.py").is_some());
580 }
581
582 #[test]
583 fn test_hash_content() {
584 let h1 = hash_content(b"hello world");
585 let h2 = hash_content(b"hello world");
586 let h3 = hash_content(b"different");
587
588 assert_eq!(h1, h2);
589 assert_ne!(h1, h3);
590 }
591
592 #[test]
593 fn test_needs_rescan_with_hash() {
594 let mut cache = RepoCache::new("/test");
595 let original_hash = hash_content(b"original content");
596 let modified_hash = hash_content(b"modified content");
597
598 cache.files.insert(
599 "file.py".to_string(),
600 CachedFile {
601 path: "file.py".to_string(),
602 mtime: 1000,
603 size: 500,
604 hash: original_hash,
605 tokens: TokenCounts::default(),
606 symbols: vec![],
607 symbols_extracted: false,
608 language: None,
609 lines: 0,
610 },
611 );
612
613 assert!(!cache.needs_rescan_with_hash("file.py", 1000, 500, original_hash));
615
616 assert!(cache.needs_rescan_with_hash("file.py", 1000, 500, modified_hash));
618
619 assert!(cache.needs_rescan_with_hash("file.py", 2000, 500, original_hash));
621
622 assert!(!cache.needs_rescan_with_hash("file.py", 1000, 500, 0));
624 }
625}