1use bincode::Options;
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs;
9use std::path::{Path, PathBuf};
10use std::time::SystemTime;
11use thiserror::Error;
12
13use crate::bincode_safe::deserialize_with_limit;
14use crate::tokenizer::TokenCounts;
15use crate::types::Symbol;
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct CachedFile {
20 pub path: String,
22 pub mtime: u64,
24 pub size: u64,
26 pub hash: u64,
28 pub tokens: TokenCounts,
30 pub symbols: Vec<CachedSymbol>,
32 pub symbols_extracted: bool,
34 pub language: Option<String>,
36 pub lines: usize,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct CachedSymbol {
43 pub name: String,
44 pub kind: String,
45 pub start_line: u32,
46 pub end_line: u32,
47 pub signature: Option<String>,
48}
49
50impl From<&Symbol> for CachedSymbol {
51 fn from(s: &Symbol) -> Self {
52 Self {
53 name: s.name.clone(),
54 kind: s.kind.name().to_owned(),
55 start_line: s.start_line,
56 end_line: s.end_line,
57 signature: s.signature.clone(),
58 }
59 }
60}
61
62impl From<&CachedSymbol> for Symbol {
63 fn from(s: &CachedSymbol) -> Self {
64 use crate::types::{SymbolKind, Visibility};
65 Self {
66 name: s.name.clone(),
67 kind: SymbolKind::from_str(&s.kind).unwrap_or(SymbolKind::Variable),
68 start_line: s.start_line,
69 end_line: s.end_line,
70 signature: s.signature.clone(),
71 docstring: None,
72 visibility: Visibility::Public,
73 references: 0,
74 importance: 0.5,
75 parent: None,
76 calls: Vec::new(),
77 extends: None,
78 implements: Vec::new(),
79 }
80 }
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct RepoCache {
86 pub version: u32,
88 pub root_path: String,
90 pub created_at: u64,
92 pub updated_at: u64,
94 pub files: HashMap<String, CachedFile>,
96 pub total_tokens: TokenCounts,
98 pub external_deps: Vec<String>,
100}
101
102impl RepoCache {
103 pub const VERSION: u32 = 2;
105
106 pub fn new(root_path: &str) -> Self {
108 let now = SystemTime::now()
109 .duration_since(SystemTime::UNIX_EPOCH)
110 .map(|d| d.as_secs())
111 .unwrap_or(0);
112
113 Self {
114 version: Self::VERSION,
115 root_path: root_path.to_owned(),
116 created_at: now,
117 updated_at: now,
118 files: HashMap::new(),
119 total_tokens: TokenCounts::default(),
120 external_deps: Vec::new(),
121 }
122 }
123
124 pub fn load(cache_path: &Path) -> Result<Self, CacheError> {
126 let content = fs::read(cache_path).map_err(|e| CacheError::IoError(e.to_string()))?;
127
128 let cache: Self = deserialize_with_limit(&content)
129 .map_err(|e| CacheError::DeserializeError(e.to_string()))?;
130
131 if cache.version != Self::VERSION {
133 return Err(CacheError::VersionMismatch {
134 expected: Self::VERSION,
135 found: cache.version,
136 });
137 }
138
139 Ok(cache)
140 }
141
142 pub fn save(&self, cache_path: &Path) -> Result<(), CacheError> {
144 if let Some(parent) = cache_path.parent() {
146 fs::create_dir_all(parent).map_err(|e| CacheError::IoError(e.to_string()))?;
147 }
148
149 let content = bincode::options()
151 .serialize(self)
152 .map_err(|e| CacheError::SerializeError(e.to_string()))?;
153
154 fs::write(cache_path, content).map_err(|e| CacheError::IoError(e.to_string()))?;
155
156 Ok(())
157 }
158
159 pub fn default_cache_path(repo_path: &Path) -> PathBuf {
161 repo_path.join(".infiniloom/cache/repo.cache")
162 }
163
164 pub fn needs_rescan(&self, path: &str, current_mtime: u64, current_size: u64) -> bool {
166 match self.files.get(path) {
167 Some(cached) => cached.mtime != current_mtime || cached.size != current_size,
168 None => true,
169 }
170 }
171
172 pub fn needs_rescan_with_hash(
175 &self,
176 path: &str,
177 current_mtime: u64,
178 current_size: u64,
179 current_hash: u64,
180 ) -> bool {
181 match self.files.get(path) {
182 Some(cached) => {
183 cached.mtime != current_mtime
184 || cached.size != current_size
185 || (cached.hash != 0 && current_hash != 0 && cached.hash != current_hash)
186 },
187 None => true,
188 }
189 }
190
191 pub fn get(&self, path: &str) -> Option<&CachedFile> {
193 self.files.get(path)
194 }
195
196 pub fn update_file(&mut self, file: CachedFile) {
198 self.files.insert(file.path.clone(), file);
199 self.updated_at = SystemTime::now()
200 .duration_since(SystemTime::UNIX_EPOCH)
201 .map(|d| d.as_secs())
202 .unwrap_or(0);
203 }
204
205 pub fn remove_file(&mut self, path: &str) {
207 self.files.remove(path);
208 }
209
210 pub fn find_deleted_files(&self, current_files: &[&str]) -> Vec<String> {
212 let current_set: std::collections::HashSet<&str> = current_files.iter().copied().collect();
213 self.files
214 .keys()
215 .filter(|p| !current_set.contains(p.as_str()))
216 .cloned()
217 .collect()
218 }
219
220 pub fn recalculate_totals(&mut self) {
222 self.total_tokens = self.files.values().map(|f| f.tokens).sum();
223 }
224
225 pub fn stats(&self) -> CacheStats {
227 CacheStats {
228 file_count: self.files.len(),
229 total_tokens: self.total_tokens,
230 total_bytes: self.files.values().map(|f| f.size).sum(),
231 age_seconds: SystemTime::now()
232 .duration_since(SystemTime::UNIX_EPOCH)
233 .map(|d| d.as_secs())
234 .unwrap_or(0)
235 .saturating_sub(self.updated_at),
236 }
237 }
238}
239
240#[derive(Debug, Clone)]
242pub struct CacheStats {
243 pub file_count: usize,
244 pub total_tokens: TokenCounts,
245 pub total_bytes: u64,
246 pub age_seconds: u64,
247}
248
249#[derive(Debug, Error)]
251pub enum CacheError {
252 #[error("I/O error: {0}")]
253 IoError(String),
254 #[error("Serialization error: {0}")]
255 SerializeError(String),
256 #[error("Deserialization error: {0}")]
257 DeserializeError(String),
258 #[error("Cache version mismatch: expected {expected}, found {found}")]
259 VersionMismatch { expected: u32, found: u32 },
260}
261
262pub struct IncrementalScanner {
264 cache: RepoCache,
265 cache_path: PathBuf,
266 dirty: bool,
267}
268
269impl IncrementalScanner {
270 pub fn new(repo_path: &Path) -> Self {
272 let cache_path = RepoCache::default_cache_path(repo_path);
273
274 let cache = RepoCache::load(&cache_path)
275 .unwrap_or_else(|_| RepoCache::new(&repo_path.to_string_lossy()));
276
277 Self { cache, cache_path, dirty: false }
278 }
279
280 pub fn with_cache_path(repo_path: &Path, cache_path: PathBuf) -> Self {
282 let cache = RepoCache::load(&cache_path)
283 .unwrap_or_else(|_| RepoCache::new(&repo_path.to_string_lossy()));
284
285 Self { cache, cache_path, dirty: false }
286 }
287
288 pub fn needs_rescan(&self, path: &Path) -> bool {
290 let metadata = match path.metadata() {
291 Ok(m) => m,
292 Err(_) => return true,
293 };
294
295 let mtime = metadata
296 .modified()
297 .ok()
298 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
299 .map_or(0, |d| d.as_secs());
300
301 let relative_path = path.to_string_lossy();
302 self.cache
303 .needs_rescan(&relative_path, mtime, metadata.len())
304 }
305
306 pub fn needs_rescan_with_content(&self, path: &Path, content: &[u8]) -> bool {
309 let metadata = match path.metadata() {
310 Ok(m) => m,
311 Err(_) => return true,
312 };
313
314 let mtime = metadata
315 .modified()
316 .ok()
317 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
318 .map_or(0, |d| d.as_secs());
319
320 let content_hash = hash_content(content);
321 let relative_path = path.to_string_lossy();
322 self.cache
323 .needs_rescan_with_hash(&relative_path, mtime, metadata.len(), content_hash)
324 }
325
326 pub fn get_cached(&self, path: &str) -> Option<&CachedFile> {
328 self.cache.files.get(path)
329 }
330
331 pub fn update(&mut self, file: CachedFile) {
333 self.cache.update_file(file);
334 self.dirty = true;
335 }
336
337 pub fn remove(&mut self, path: &str) {
339 self.cache.remove_file(path);
340 self.dirty = true;
341 }
342
343 pub fn save(&mut self) -> Result<(), CacheError> {
345 if self.dirty {
346 self.cache.recalculate_totals();
347 self.cache.save(&self.cache_path)?;
348 self.dirty = false;
349 }
350 Ok(())
351 }
352
353 pub fn force_save(&mut self) -> Result<(), CacheError> {
355 self.cache.recalculate_totals();
356 self.cache.save(&self.cache_path)?;
357 self.dirty = false;
358 Ok(())
359 }
360
361 pub fn stats(&self) -> CacheStats {
363 self.cache.stats()
364 }
365
366 pub fn clear(&mut self) {
368 self.cache = RepoCache::new(&self.cache.root_path);
369 self.dirty = true;
370 }
371
372 pub fn get_changed_files<'a>(
374 &self,
375 current_files: &'a [(PathBuf, u64, u64)],
376 ) -> Vec<&'a PathBuf> {
377 current_files
378 .iter()
379 .filter(|(path, mtime, size)| {
380 let relative = path.to_string_lossy();
381 self.cache.needs_rescan(&relative, *mtime, *size)
382 })
383 .map(|(path, _, _)| path)
384 .collect()
385 }
386}
387
388#[derive(Debug, Clone)]
390pub enum FileChange {
391 Created(PathBuf),
392 Modified(PathBuf),
393 Deleted(PathBuf),
394 Renamed { from: PathBuf, to: PathBuf },
395}
396
397#[cfg(feature = "watch")]
399pub mod watcher {
400 use super::*;
401 use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
402 use std::sync::mpsc::{channel, Receiver};
403
404 pub struct FileWatcher {
406 watcher: RecommendedWatcher,
407 receiver: Receiver<Result<Event, notify::Error>>,
408 root_path: PathBuf,
409 }
410
411 impl FileWatcher {
412 pub fn new(path: &Path) -> Result<Self, notify::Error> {
414 let (tx, rx) = channel();
415
416 let watcher = RecommendedWatcher::new(
417 move |res| {
418 let _ = tx.send(res);
419 },
420 Config::default(),
421 )?;
422
423 let mut fw = Self { watcher, receiver: rx, root_path: path.to_path_buf() };
424
425 fw.watcher.watch(path, RecursiveMode::Recursive)?;
426
427 Ok(fw)
428 }
429
430 pub fn try_next(&self) -> Option<FileChange> {
432 match self.receiver.try_recv() {
433 Ok(Ok(event)) => self.event_to_change(event),
434 _ => None,
435 }
436 }
437
438 pub fn next(&self) -> Option<FileChange> {
440 match self.receiver.recv() {
441 Ok(Ok(event)) => self.event_to_change(event),
442 _ => None,
443 }
444 }
445
446 fn event_to_change(&self, event: Event) -> Option<FileChange> {
448 let path = event.paths.first()?.clone();
449
450 match event.kind {
451 EventKind::Create(_) => Some(FileChange::Created(path)),
452 EventKind::Modify(_) => Some(FileChange::Modified(path)),
453 EventKind::Remove(_) => Some(FileChange::Deleted(path)),
454 _ => None,
455 }
456 }
457
458 pub fn stop(mut self) -> Result<(), notify::Error> {
460 self.watcher.unwatch(&self.root_path)
461 }
462 }
463}
464
465pub fn hash_content(content: &[u8]) -> u64 {
467 use std::collections::hash_map::DefaultHasher;
468 use std::hash::{Hash, Hasher};
469
470 let mut hasher = DefaultHasher::new();
471 content.hash(&mut hasher);
472 hasher.finish()
473}
474
475pub fn get_mtime(path: &Path) -> Option<u64> {
477 path.metadata()
478 .ok()?
479 .modified()
480 .ok()?
481 .duration_since(SystemTime::UNIX_EPOCH)
482 .ok()
483 .map(|d| d.as_secs())
484}
485
486#[cfg(test)]
487#[allow(clippy::str_to_string)]
488mod tests {
489 use super::*;
490 use tempfile::TempDir;
491
492 #[test]
493 fn test_cache_create_save_load() {
494 let temp = TempDir::new().unwrap();
495 let cache_path = temp.path().join("test.cache");
496
497 let mut cache = RepoCache::new("/test/repo");
498 cache.files.insert(
499 "test.py".to_string(),
500 CachedFile {
501 path: "test.py".to_string(),
502 mtime: 12345,
503 size: 100,
504 hash: 0,
505 tokens: TokenCounts {
506 o200k: 45,
507 cl100k: 48,
508 claude: 50,
509 gemini: 46,
510 llama: 50,
511 mistral: 50,
512 deepseek: 50,
513 qwen: 50,
514 cohere: 48,
515 grok: 50,
516 },
517 symbols: vec![],
518 symbols_extracted: false,
519 language: Some("python".to_string()),
520 lines: 10,
521 },
522 );
523
524 cache.save(&cache_path).unwrap();
525
526 let loaded = RepoCache::load(&cache_path).unwrap();
527 assert_eq!(loaded.files.len(), 1);
528 assert!(loaded.files.contains_key("test.py"));
529 }
530
531 #[test]
532 fn test_needs_rescan() {
533 let cache = RepoCache::new("/test");
534 assert!(cache.needs_rescan("new_file.py", 0, 0));
535
536 let mut cache = RepoCache::new("/test");
537 cache.files.insert(
538 "existing.py".to_string(),
539 CachedFile {
540 path: "existing.py".to_string(),
541 mtime: 1000,
542 size: 500,
543 hash: 0,
544 tokens: TokenCounts::default(),
545 symbols: vec![],
546 symbols_extracted: false,
547 language: None,
548 lines: 0,
549 },
550 );
551
552 assert!(!cache.needs_rescan("existing.py", 1000, 500));
553 assert!(cache.needs_rescan("existing.py", 2000, 500)); assert!(cache.needs_rescan("existing.py", 1000, 600)); }
556
557 #[test]
558 fn test_incremental_scanner() {
559 let temp = TempDir::new().unwrap();
560
561 let mut scanner = IncrementalScanner::new(temp.path());
562 assert!(scanner.needs_rescan(&temp.path().join("test.py")));
563
564 scanner.update(CachedFile {
565 path: "test.py".to_string(),
566 mtime: 1000,
567 size: 100,
568 hash: 0,
569 tokens: TokenCounts::default(),
570 symbols: vec![],
571 symbols_extracted: false,
572 language: Some("python".to_string()),
573 lines: 5,
574 });
575
576 assert!(scanner.get_cached("test.py").is_some());
577 }
578
579 #[test]
580 fn test_hash_content() {
581 let h1 = hash_content(b"hello world");
582 let h2 = hash_content(b"hello world");
583 let h3 = hash_content(b"different");
584
585 assert_eq!(h1, h2);
586 assert_ne!(h1, h3);
587 }
588
589 #[test]
590 fn test_needs_rescan_with_hash() {
591 let mut cache = RepoCache::new("/test");
592 let original_hash = hash_content(b"original content");
593 let modified_hash = hash_content(b"modified content");
594
595 cache.files.insert(
596 "file.py".to_string(),
597 CachedFile {
598 path: "file.py".to_string(),
599 mtime: 1000,
600 size: 500,
601 hash: original_hash,
602 tokens: TokenCounts::default(),
603 symbols: vec![],
604 symbols_extracted: false,
605 language: None,
606 lines: 0,
607 },
608 );
609
610 assert!(!cache.needs_rescan_with_hash("file.py", 1000, 500, original_hash));
612
613 assert!(cache.needs_rescan_with_hash("file.py", 1000, 500, modified_hash));
615
616 assert!(cache.needs_rescan_with_hash("file.py", 2000, 500, original_hash));
618
619 assert!(!cache.needs_rescan_with_hash("file.py", 1000, 500, 0));
621 }
622}