1use bincode::Options;
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs;
9use std::path::{Path, PathBuf};
10use std::time::SystemTime;
11use thiserror::Error;
12
13use crate::bincode_safe::deserialize_with_limit;
14use crate::tokenizer::TokenCounts;
15use crate::types::Symbol;
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct CachedFile {
20 pub path: String,
22 pub mtime: u64,
24 pub size: u64,
26 pub hash: u64,
28 pub tokens: TokenCounts,
30 pub symbols: Vec<CachedSymbol>,
32 pub symbols_extracted: bool,
34 pub language: Option<String>,
36 pub lines: usize,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct CachedSymbol {
43 pub name: String,
44 pub kind: String,
45 pub start_line: u32,
46 pub end_line: u32,
47 pub signature: Option<String>,
48}
49
50impl From<&Symbol> for CachedSymbol {
51 fn from(s: &Symbol) -> Self {
52 Self {
53 name: s.name.clone(),
54 kind: s.kind.name().to_owned(),
55 start_line: s.start_line,
56 end_line: s.end_line,
57 signature: s.signature.clone(),
58 }
59 }
60}
61
62impl From<&CachedSymbol> for Symbol {
63 fn from(s: &CachedSymbol) -> Self {
64 use crate::types::{SymbolKind, Visibility};
65 Self {
66 name: s.name.clone(),
67 kind: SymbolKind::from_str(&s.kind).unwrap_or(SymbolKind::Variable),
68 start_line: s.start_line,
69 end_line: s.end_line,
70 signature: s.signature.clone(),
71 docstring: None,
72 visibility: Visibility::Public,
73 references: 0,
74 importance: 0.5,
75 parent: None,
76 calls: Vec::new(),
77 extends: None,
78 implements: Vec::new(),
79 }
80 }
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct RepoCache {
86 pub version: u32,
88 pub root_path: String,
90 pub created_at: u64,
92 pub updated_at: u64,
94 pub files: HashMap<String, CachedFile>,
96 pub total_tokens: TokenCounts,
98 pub external_deps: Vec<String>,
100}
101
102impl RepoCache {
103 pub const VERSION: u32 = 2;
105
106 pub fn new(root_path: &str) -> Self {
108 let now = SystemTime::now()
109 .duration_since(SystemTime::UNIX_EPOCH)
110 .map(|d| d.as_secs())
111 .unwrap_or(0);
112
113 Self {
114 version: Self::VERSION,
115 root_path: root_path.to_owned(),
116 created_at: now,
117 updated_at: now,
118 files: HashMap::new(),
119 total_tokens: TokenCounts::default(),
120 external_deps: Vec::new(),
121 }
122 }
123
124 pub fn load(cache_path: &Path) -> Result<Self, CacheError> {
126 let content = fs::read(cache_path).map_err(|e| CacheError::IoError(e.to_string()))?;
127
128 let cache: Self = deserialize_with_limit(&content)
129 .map_err(|e| CacheError::DeserializeError(e.to_string()))?;
130
131 if cache.version != Self::VERSION {
133 return Err(CacheError::VersionMismatch {
134 expected: Self::VERSION,
135 found: cache.version,
136 });
137 }
138
139 Ok(cache)
140 }
141
142 pub fn save(&self, cache_path: &Path) -> Result<(), CacheError> {
144 if let Some(parent) = cache_path.parent() {
146 fs::create_dir_all(parent).map_err(|e| CacheError::IoError(e.to_string()))?;
147 }
148
149 let content = bincode::options()
151 .serialize(self)
152 .map_err(|e| CacheError::SerializeError(e.to_string()))?;
153
154 fs::write(cache_path, content).map_err(|e| CacheError::IoError(e.to_string()))?;
155
156 Ok(())
157 }
158
159 pub fn default_cache_path(repo_path: &Path) -> PathBuf {
161 repo_path.join(".infiniloom/cache/repo.cache")
162 }
163
164 pub fn needs_rescan(&self, path: &str, current_mtime: u64, current_size: u64) -> bool {
166 match self.files.get(path) {
167 Some(cached) => cached.mtime != current_mtime || cached.size != current_size,
168 None => true,
169 }
170 }
171
172 pub fn needs_rescan_with_hash(
175 &self,
176 path: &str,
177 current_mtime: u64,
178 current_size: u64,
179 current_hash: u64,
180 ) -> bool {
181 match self.files.get(path) {
182 Some(cached) => {
183 cached.mtime != current_mtime
184 || cached.size != current_size
185 || (cached.hash != 0 && current_hash != 0 && cached.hash != current_hash)
186 },
187 None => true,
188 }
189 }
190
191 pub fn get(&self, path: &str) -> Option<&CachedFile> {
193 self.files.get(path)
194 }
195
196 pub fn update_file(&mut self, file: CachedFile) {
198 self.files.insert(file.path.clone(), file);
199 self.updated_at = SystemTime::now()
200 .duration_since(SystemTime::UNIX_EPOCH)
201 .map(|d| d.as_secs())
202 .unwrap_or(0);
203 }
204
205 pub fn remove_file(&mut self, path: &str) {
207 self.files.remove(path);
208 }
209
210 pub fn find_deleted_files(&self, current_files: &[&str]) -> Vec<String> {
212 let current_set: std::collections::HashSet<&str> = current_files.iter().copied().collect();
213 self.files
214 .keys()
215 .filter(|p| !current_set.contains(p.as_str()))
216 .cloned()
217 .collect()
218 }
219
220 pub fn recalculate_totals(&mut self) {
222 self.total_tokens = self.files.values().map(|f| f.tokens).sum();
223 }
224
225 pub fn stats(&self) -> CacheStats {
227 CacheStats {
228 file_count: self.files.len(),
229 total_tokens: self.total_tokens,
230 total_bytes: self.files.values().map(|f| f.size).sum(),
231 age_seconds: SystemTime::now()
232 .duration_since(SystemTime::UNIX_EPOCH)
233 .map(|d| d.as_secs())
234 .unwrap_or(0)
235 .saturating_sub(self.updated_at),
236 }
237 }
238}
239
240#[derive(Debug, Clone)]
242pub struct CacheStats {
243 pub file_count: usize,
244 pub total_tokens: TokenCounts,
245 pub total_bytes: u64,
246 pub age_seconds: u64,
247}
248
249#[derive(Debug, Error)]
251pub enum CacheError {
252 #[error("I/O error: {0}")]
253 IoError(String),
254 #[error("Serialization error: {0}")]
255 SerializeError(String),
256 #[error("Deserialization error: {0}")]
257 DeserializeError(String),
258 #[error("Cache version mismatch: expected {expected}, found {found}")]
259 VersionMismatch { expected: u32, found: u32 },
260}
261
262pub struct IncrementalScanner {
264 cache: RepoCache,
265 cache_path: PathBuf,
266 dirty: bool,
267}
268
269impl IncrementalScanner {
270 pub fn new(repo_path: &Path) -> Self {
272 let cache_path = RepoCache::default_cache_path(repo_path);
273
274 let cache = RepoCache::load(&cache_path)
275 .unwrap_or_else(|_| RepoCache::new(&repo_path.to_string_lossy()));
276
277 Self { cache, cache_path, dirty: false }
278 }
279
280 pub fn with_cache_path(repo_path: &Path, cache_path: PathBuf) -> Self {
282 let cache = RepoCache::load(&cache_path)
283 .unwrap_or_else(|_| RepoCache::new(&repo_path.to_string_lossy()));
284
285 Self { cache, cache_path, dirty: false }
286 }
287
288 pub fn needs_rescan(&self, path: &Path) -> bool {
290 let metadata = match path.metadata() {
291 Ok(m) => m,
292 Err(_) => return true,
293 };
294
295 let mtime = metadata
296 .modified()
297 .ok()
298 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
299 .map(|d| d.as_secs())
300 .unwrap_or(0);
301
302 let relative_path = path.to_string_lossy();
303 self.cache
304 .needs_rescan(&relative_path, mtime, metadata.len())
305 }
306
307 pub fn needs_rescan_with_content(&self, path: &Path, content: &[u8]) -> bool {
310 let metadata = match path.metadata() {
311 Ok(m) => m,
312 Err(_) => return true,
313 };
314
315 let mtime = metadata
316 .modified()
317 .ok()
318 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
319 .map(|d| d.as_secs())
320 .unwrap_or(0);
321
322 let content_hash = hash_content(content);
323 let relative_path = path.to_string_lossy();
324 self.cache
325 .needs_rescan_with_hash(&relative_path, mtime, metadata.len(), content_hash)
326 }
327
328 pub fn get_cached(&self, path: &str) -> Option<&CachedFile> {
330 self.cache.files.get(path)
331 }
332
333 pub fn update(&mut self, file: CachedFile) {
335 self.cache.update_file(file);
336 self.dirty = true;
337 }
338
339 pub fn remove(&mut self, path: &str) {
341 self.cache.remove_file(path);
342 self.dirty = true;
343 }
344
345 pub fn save(&mut self) -> Result<(), CacheError> {
347 if self.dirty {
348 self.cache.recalculate_totals();
349 self.cache.save(&self.cache_path)?;
350 self.dirty = false;
351 }
352 Ok(())
353 }
354
355 pub fn force_save(&mut self) -> Result<(), CacheError> {
357 self.cache.recalculate_totals();
358 self.cache.save(&self.cache_path)?;
359 self.dirty = false;
360 Ok(())
361 }
362
363 pub fn stats(&self) -> CacheStats {
365 self.cache.stats()
366 }
367
368 pub fn clear(&mut self) {
370 self.cache = RepoCache::new(&self.cache.root_path);
371 self.dirty = true;
372 }
373
374 pub fn get_changed_files<'a>(
376 &self,
377 current_files: &'a [(PathBuf, u64, u64)],
378 ) -> Vec<&'a PathBuf> {
379 current_files
380 .iter()
381 .filter(|(path, mtime, size)| {
382 let relative = path.to_string_lossy();
383 self.cache.needs_rescan(&relative, *mtime, *size)
384 })
385 .map(|(path, _, _)| path)
386 .collect()
387 }
388}
389
390#[derive(Debug, Clone)]
392pub enum FileChange {
393 Created(PathBuf),
394 Modified(PathBuf),
395 Deleted(PathBuf),
396 Renamed { from: PathBuf, to: PathBuf },
397}
398
399#[cfg(feature = "watch")]
401pub mod watcher {
402 use super::*;
403 use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
404 use std::sync::mpsc::{channel, Receiver};
405
406 pub struct FileWatcher {
408 watcher: RecommendedWatcher,
409 receiver: Receiver<Result<Event, notify::Error>>,
410 root_path: PathBuf,
411 }
412
413 impl FileWatcher {
414 pub fn new(path: &Path) -> Result<Self, notify::Error> {
416 let (tx, rx) = channel();
417
418 let watcher = RecommendedWatcher::new(
419 move |res| {
420 let _ = tx.send(res);
421 },
422 Config::default(),
423 )?;
424
425 let mut fw = Self { watcher, receiver: rx, root_path: path.to_path_buf() };
426
427 fw.watcher.watch(path, RecursiveMode::Recursive)?;
428
429 Ok(fw)
430 }
431
432 pub fn try_next(&self) -> Option<FileChange> {
434 match self.receiver.try_recv() {
435 Ok(Ok(event)) => self.event_to_change(event),
436 _ => None,
437 }
438 }
439
440 pub fn next(&self) -> Option<FileChange> {
442 match self.receiver.recv() {
443 Ok(Ok(event)) => self.event_to_change(event),
444 _ => None,
445 }
446 }
447
448 fn event_to_change(&self, event: Event) -> Option<FileChange> {
450 let path = event.paths.first()?.clone();
451
452 match event.kind {
453 EventKind::Create(_) => Some(FileChange::Created(path)),
454 EventKind::Modify(_) => Some(FileChange::Modified(path)),
455 EventKind::Remove(_) => Some(FileChange::Deleted(path)),
456 _ => None,
457 }
458 }
459
460 pub fn stop(mut self) -> Result<(), notify::Error> {
462 self.watcher.unwatch(&self.root_path)
463 }
464 }
465}
466
467pub fn hash_content(content: &[u8]) -> u64 {
469 use std::collections::hash_map::DefaultHasher;
470 use std::hash::{Hash, Hasher};
471
472 let mut hasher = DefaultHasher::new();
473 content.hash(&mut hasher);
474 hasher.finish()
475}
476
477pub fn get_mtime(path: &Path) -> Option<u64> {
479 path.metadata()
480 .ok()?
481 .modified()
482 .ok()?
483 .duration_since(SystemTime::UNIX_EPOCH)
484 .ok()
485 .map(|d| d.as_secs())
486}
487
488#[cfg(test)]
489#[allow(clippy::str_to_string)]
490mod tests {
491 use super::*;
492 use tempfile::TempDir;
493
494 #[test]
495 fn test_cache_create_save_load() {
496 let temp = TempDir::new().unwrap();
497 let cache_path = temp.path().join("test.cache");
498
499 let mut cache = RepoCache::new("/test/repo");
500 cache.files.insert(
501 "test.py".to_string(),
502 CachedFile {
503 path: "test.py".to_string(),
504 mtime: 12345,
505 size: 100,
506 hash: 0,
507 tokens: TokenCounts {
508 o200k: 45,
509 cl100k: 48,
510 claude: 50,
511 gemini: 46,
512 llama: 50,
513 mistral: 50,
514 deepseek: 50,
515 qwen: 50,
516 cohere: 48,
517 grok: 50,
518 },
519 symbols: vec![],
520 symbols_extracted: false,
521 language: Some("python".to_string()),
522 lines: 10,
523 },
524 );
525
526 cache.save(&cache_path).unwrap();
527
528 let loaded = RepoCache::load(&cache_path).unwrap();
529 assert_eq!(loaded.files.len(), 1);
530 assert!(loaded.files.contains_key("test.py"));
531 }
532
533 #[test]
534 fn test_needs_rescan() {
535 let cache = RepoCache::new("/test");
536 assert!(cache.needs_rescan("new_file.py", 0, 0));
537
538 let mut cache = RepoCache::new("/test");
539 cache.files.insert(
540 "existing.py".to_string(),
541 CachedFile {
542 path: "existing.py".to_string(),
543 mtime: 1000,
544 size: 500,
545 hash: 0,
546 tokens: TokenCounts::default(),
547 symbols: vec![],
548 symbols_extracted: false,
549 language: None,
550 lines: 0,
551 },
552 );
553
554 assert!(!cache.needs_rescan("existing.py", 1000, 500));
555 assert!(cache.needs_rescan("existing.py", 2000, 500)); assert!(cache.needs_rescan("existing.py", 1000, 600)); }
558
559 #[test]
560 fn test_incremental_scanner() {
561 let temp = TempDir::new().unwrap();
562
563 let mut scanner = IncrementalScanner::new(temp.path());
564 assert!(scanner.needs_rescan(&temp.path().join("test.py")));
565
566 scanner.update(CachedFile {
567 path: "test.py".to_string(),
568 mtime: 1000,
569 size: 100,
570 hash: 0,
571 tokens: TokenCounts::default(),
572 symbols: vec![],
573 symbols_extracted: false,
574 language: Some("python".to_string()),
575 lines: 5,
576 });
577
578 assert!(scanner.get_cached("test.py").is_some());
579 }
580
581 #[test]
582 fn test_hash_content() {
583 let h1 = hash_content(b"hello world");
584 let h2 = hash_content(b"hello world");
585 let h3 = hash_content(b"different");
586
587 assert_eq!(h1, h2);
588 assert_ne!(h1, h3);
589 }
590
591 #[test]
592 fn test_needs_rescan_with_hash() {
593 let mut cache = RepoCache::new("/test");
594 let original_hash = hash_content(b"original content");
595 let modified_hash = hash_content(b"modified content");
596
597 cache.files.insert(
598 "file.py".to_string(),
599 CachedFile {
600 path: "file.py".to_string(),
601 mtime: 1000,
602 size: 500,
603 hash: original_hash,
604 tokens: TokenCounts::default(),
605 symbols: vec![],
606 symbols_extracted: false,
607 language: None,
608 lines: 0,
609 },
610 );
611
612 assert!(!cache.needs_rescan_with_hash("file.py", 1000, 500, original_hash));
614
615 assert!(cache.needs_rescan_with_hash("file.py", 1000, 500, modified_hash));
617
618 assert!(cache.needs_rescan_with_hash("file.py", 2000, 500, original_hash));
620
621 assert!(!cache.needs_rescan_with_hash("file.py", 1000, 500, 0));
623 }
624}