1use bincode::Options;
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fs;
9use std::path::{Path, PathBuf};
10use std::time::SystemTime;
11use thiserror::Error;
12
13use crate::bincode_safe::deserialize_with_limit;
14use crate::tokenizer::TokenCounts;
15use crate::types::Symbol;
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct CachedFile {
20 pub path: String,
22 pub mtime: u64,
24 pub size: u64,
26 pub hash: u64,
28 pub tokens: TokenCounts,
30 pub symbols: Vec<CachedSymbol>,
32 pub symbols_extracted: bool,
34 pub language: Option<String>,
36 pub lines: usize,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct CachedSymbol {
43 pub name: String,
44 pub kind: String,
45 pub start_line: u32,
46 pub end_line: u32,
47 pub signature: Option<String>,
48}
49
50impl From<&Symbol> for CachedSymbol {
51 fn from(s: &Symbol) -> Self {
52 Self {
53 name: s.name.clone(),
54 kind: s.kind.name().to_owned(),
55 start_line: s.start_line,
56 end_line: s.end_line,
57 signature: s.signature.clone(),
58 }
59 }
60}
61
62impl From<&CachedSymbol> for Symbol {
63 fn from(s: &CachedSymbol) -> Self {
64 use crate::types::{SymbolKind, Visibility};
65 Self {
66 name: s.name.clone(),
67 kind: SymbolKind::from_str(&s.kind).unwrap_or(SymbolKind::Variable),
68 start_line: s.start_line,
69 end_line: s.end_line,
70 signature: s.signature.clone(),
71 docstring: None,
72 visibility: Visibility::Public,
73 references: 0,
74 importance: 0.5,
75 parent: None,
76 calls: Vec::new(),
77 extends: None,
78 implements: Vec::new(),
79 }
80 }
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
85pub struct RepoCache {
86 pub version: u32,
88 pub root_path: String,
90 pub created_at: u64,
92 pub updated_at: u64,
94 pub files: HashMap<String, CachedFile>,
96 pub total_tokens: TokenCounts,
98 pub external_deps: Vec<String>,
100}
101
102impl RepoCache {
103 pub const VERSION: u32 = 2;
105
106 pub fn new(root_path: &str) -> Self {
108 let now = SystemTime::now()
109 .duration_since(SystemTime::UNIX_EPOCH)
110 .map(|d| d.as_secs())
111 .unwrap_or(0);
112
113 Self {
114 version: Self::VERSION,
115 root_path: root_path.to_owned(),
116 created_at: now,
117 updated_at: now,
118 files: HashMap::new(),
119 total_tokens: TokenCounts::default(),
120 external_deps: Vec::new(),
121 }
122 }
123
124 pub fn load(cache_path: &Path) -> Result<Self, CacheError> {
126 let content = fs::read(cache_path).map_err(|e| CacheError::IoError(e.to_string()))?;
127
128 let cache: Self = deserialize_with_limit(&content)
129 .map_err(|e| CacheError::DeserializeError(e.to_string()))?;
130
131 if cache.version != Self::VERSION {
133 return Err(CacheError::VersionMismatch {
134 expected: Self::VERSION,
135 found: cache.version,
136 });
137 }
138
139 Ok(cache)
140 }
141
142 pub fn save(&self, cache_path: &Path) -> Result<(), CacheError> {
144 if let Some(parent) = cache_path.parent() {
146 fs::create_dir_all(parent).map_err(|e| CacheError::IoError(e.to_string()))?;
147 }
148
149 let content = bincode::options()
151 .serialize(self)
152 .map_err(|e| CacheError::SerializeError(e.to_string()))?;
153
154 fs::write(cache_path, content).map_err(|e| CacheError::IoError(e.to_string()))?;
155
156 Ok(())
157 }
158
159 pub fn default_cache_path(repo_path: &Path) -> PathBuf {
161 repo_path.join(".infiniloom/cache/repo.cache")
162 }
163
164 pub fn needs_rescan(&self, path: &str, current_mtime: u64, current_size: u64) -> bool {
166 match self.files.get(path) {
167 Some(cached) => cached.mtime != current_mtime || cached.size != current_size,
168 None => true,
169 }
170 }
171
172 pub fn needs_rescan_with_hash(
175 &self,
176 path: &str,
177 current_mtime: u64,
178 current_size: u64,
179 current_hash: u64,
180 ) -> bool {
181 match self.files.get(path) {
182 Some(cached) => {
183 cached.mtime != current_mtime
184 || cached.size != current_size
185 || (cached.hash != 0 && current_hash != 0 && cached.hash != current_hash)
186 },
187 None => true,
188 }
189 }
190
191 pub fn get(&self, path: &str) -> Option<&CachedFile> {
193 self.files.get(path)
194 }
195
196 pub fn update_file(&mut self, file: CachedFile) {
198 self.files.insert(file.path.clone(), file);
199 self.updated_at = SystemTime::now()
200 .duration_since(SystemTime::UNIX_EPOCH)
201 .map(|d| d.as_secs())
202 .unwrap_or(0);
203 }
204
205 pub fn remove_file(&mut self, path: &str) {
207 self.files.remove(path);
208 }
209
210 pub fn find_deleted_files(&self, current_files: &[&str]) -> Vec<String> {
212 let current_set: std::collections::HashSet<&str> = current_files.iter().copied().collect();
213 self.files
214 .keys()
215 .filter(|p| !current_set.contains(p.as_str()))
216 .cloned()
217 .collect()
218 }
219
220 pub fn recalculate_totals(&mut self) {
222 self.total_tokens = self.files.values().map(|f| f.tokens).sum();
223 }
224
225 pub fn stats(&self) -> CacheStats {
227 CacheStats {
228 file_count: self.files.len(),
229 total_tokens: self.total_tokens,
230 total_bytes: self.files.values().map(|f| f.size).sum(),
231 age_seconds: SystemTime::now()
232 .duration_since(SystemTime::UNIX_EPOCH)
233 .map(|d| d.as_secs())
234 .unwrap_or(0)
235 .saturating_sub(self.updated_at),
236 }
237 }
238}
239
240#[derive(Debug, Clone)]
242pub struct CacheStats {
243 pub file_count: usize,
244 pub total_tokens: TokenCounts,
245 pub total_bytes: u64,
246 pub age_seconds: u64,
247}
248
249#[derive(Debug, Error)]
251pub enum CacheError {
252 #[error("I/O error: {0}")]
253 IoError(String),
254 #[error("Serialization error: {0}")]
255 SerializeError(String),
256 #[error("Deserialization error: {0}")]
257 DeserializeError(String),
258 #[error("Cache version mismatch: expected {expected}, found {found}")]
259 VersionMismatch { expected: u32, found: u32 },
260}
261
262pub struct IncrementalScanner {
264 cache: RepoCache,
265 cache_path: PathBuf,
266 dirty: bool,
267}
268
269impl IncrementalScanner {
270 pub fn new(repo_path: &Path) -> Self {
272 let cache_path = RepoCache::default_cache_path(repo_path);
273
274 let cache = RepoCache::load(&cache_path)
275 .unwrap_or_else(|_| RepoCache::new(&repo_path.to_string_lossy()));
276
277 Self { cache, cache_path, dirty: false }
278 }
279
280 pub fn with_cache_path(repo_path: &Path, cache_path: PathBuf) -> Self {
282 let cache = RepoCache::load(&cache_path)
283 .unwrap_or_else(|_| RepoCache::new(&repo_path.to_string_lossy()));
284
285 Self { cache, cache_path, dirty: false }
286 }
287
288 pub fn needs_rescan(&self, path: &Path) -> bool {
290 let metadata = match path.metadata() {
291 Ok(m) => m,
292 Err(_) => return true,
293 };
294
295 let mtime = metadata
296 .modified()
297 .ok()
298 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
299 .map_or(0, |d| d.as_secs());
300
301 let relative_path = path.to_string_lossy();
302 self.cache
303 .needs_rescan(&relative_path, mtime, metadata.len())
304 }
305
306 pub fn needs_rescan_with_content(&self, path: &Path, content: &[u8]) -> bool {
309 let metadata = match path.metadata() {
310 Ok(m) => m,
311 Err(_) => return true,
312 };
313
314 let mtime = metadata
315 .modified()
316 .ok()
317 .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
318 .map_or(0, |d| d.as_secs());
319
320 let content_hash = hash_content(content);
321 let relative_path = path.to_string_lossy();
322 self.cache
323 .needs_rescan_with_hash(&relative_path, mtime, metadata.len(), content_hash)
324 }
325
326 pub fn get_cached(&self, path: &str) -> Option<&CachedFile> {
328 self.cache.files.get(path)
329 }
330
331 pub fn update(&mut self, file: CachedFile) {
333 self.cache.update_file(file);
334 self.dirty = true;
335 }
336
337 pub fn remove(&mut self, path: &str) {
339 self.cache.remove_file(path);
340 self.dirty = true;
341 }
342
343 pub fn save(&mut self) -> Result<(), CacheError> {
345 if self.dirty {
346 self.cache.recalculate_totals();
347 self.cache.save(&self.cache_path)?;
348 self.dirty = false;
349 }
350 Ok(())
351 }
352
353 pub fn force_save(&mut self) -> Result<(), CacheError> {
355 self.cache.recalculate_totals();
356 self.cache.save(&self.cache_path)?;
357 self.dirty = false;
358 Ok(())
359 }
360
361 pub fn stats(&self) -> CacheStats {
363 self.cache.stats()
364 }
365
366 pub fn clear(&mut self) {
368 self.cache = RepoCache::new(&self.cache.root_path);
369 self.dirty = true;
370 }
371
372 pub fn get_changed_files<'a>(
374 &self,
375 current_files: &'a [(PathBuf, u64, u64)],
376 ) -> Vec<&'a PathBuf> {
377 current_files
378 .iter()
379 .filter(|(path, mtime, size)| {
380 let relative = path.to_string_lossy();
381 self.cache.needs_rescan(&relative, *mtime, *size)
382 })
383 .map(|(path, _, _)| path)
384 .collect()
385 }
386}
387
388#[derive(Debug, Clone)]
390pub enum FileChange {
391 Created(PathBuf),
392 Modified(PathBuf),
393 Deleted(PathBuf),
394 Renamed { from: PathBuf, to: PathBuf },
395}
396
397#[cfg(feature = "watch")]
399pub mod watcher {
400 use super::*;
401 use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
402 use std::sync::mpsc::{channel, Receiver};
403
404 pub struct FileWatcher {
406 watcher: RecommendedWatcher,
407 receiver: Receiver<Result<Event, notify::Error>>,
408 root_path: PathBuf,
409 }
410
411 impl FileWatcher {
412 pub fn new(path: &Path) -> Result<Self, notify::Error> {
414 let (tx, rx) = channel();
415
416 let watcher = RecommendedWatcher::new(
417 move |res| {
418 let _ = tx.send(res);
419 },
420 Config::default(),
421 )?;
422
423 let mut fw = Self { watcher, receiver: rx, root_path: path.to_path_buf() };
424
425 fw.watcher.watch(path, RecursiveMode::Recursive)?;
426
427 Ok(fw)
428 }
429
430 pub fn try_next(&self) -> Option<FileChange> {
432 match self.receiver.try_recv() {
433 Ok(Ok(event)) => self.event_to_change(event),
434 _ => None,
435 }
436 }
437
438 pub fn next(&self) -> Option<FileChange> {
440 match self.receiver.recv() {
441 Ok(Ok(event)) => self.event_to_change(event),
442 _ => None,
443 }
444 }
445
446 fn event_to_change(&self, event: Event) -> Option<FileChange> {
448 let path = event.paths.first()?.clone();
449
450 match event.kind {
451 EventKind::Create(_) => Some(FileChange::Created(path)),
452 EventKind::Modify(_) => Some(FileChange::Modified(path)),
453 EventKind::Remove(_) => Some(FileChange::Deleted(path)),
454 _ => None,
455 }
456 }
457
458 pub fn stop(mut self) -> Result<(), notify::Error> {
460 self.watcher.unwatch(&self.root_path)
461 }
462 }
463}
464
465pub fn hash_content(content: &[u8]) -> u64 {
471 let hash = blake3::hash(content);
472 let bytes = hash.as_bytes();
473 u64::from_le_bytes([
475 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
476 ])
477}
478
479pub fn get_mtime(path: &Path) -> Option<u64> {
481 path.metadata()
482 .ok()?
483 .modified()
484 .ok()?
485 .duration_since(SystemTime::UNIX_EPOCH)
486 .ok()
487 .map(|d| d.as_secs())
488}
489
490#[cfg(test)]
491#[allow(clippy::str_to_string)]
492mod tests {
493 use super::*;
494 use tempfile::TempDir;
495
496 #[test]
497 fn test_cache_create_save_load() {
498 let temp = TempDir::new().unwrap();
499 let cache_path = temp.path().join("test.cache");
500
501 let mut cache = RepoCache::new("/test/repo");
502 cache.files.insert(
503 "test.py".to_string(),
504 CachedFile {
505 path: "test.py".to_string(),
506 mtime: 12345,
507 size: 100,
508 hash: 0,
509 tokens: TokenCounts {
510 o200k: 45,
511 cl100k: 48,
512 claude: 50,
513 gemini: 46,
514 llama: 50,
515 mistral: 50,
516 deepseek: 50,
517 qwen: 50,
518 cohere: 48,
519 grok: 50,
520 },
521 symbols: vec![],
522 symbols_extracted: false,
523 language: Some("python".to_string()),
524 lines: 10,
525 },
526 );
527
528 cache.save(&cache_path).unwrap();
529
530 let loaded = RepoCache::load(&cache_path).unwrap();
531 assert_eq!(loaded.files.len(), 1);
532 assert!(loaded.files.contains_key("test.py"));
533 }
534
535 #[test]
536 fn test_needs_rescan() {
537 let cache = RepoCache::new("/test");
538 assert!(cache.needs_rescan("new_file.py", 0, 0));
539
540 let mut cache = RepoCache::new("/test");
541 cache.files.insert(
542 "existing.py".to_string(),
543 CachedFile {
544 path: "existing.py".to_string(),
545 mtime: 1000,
546 size: 500,
547 hash: 0,
548 tokens: TokenCounts::default(),
549 symbols: vec![],
550 symbols_extracted: false,
551 language: None,
552 lines: 0,
553 },
554 );
555
556 assert!(!cache.needs_rescan("existing.py", 1000, 500));
557 assert!(cache.needs_rescan("existing.py", 2000, 500)); assert!(cache.needs_rescan("existing.py", 1000, 600)); }
560
561 #[test]
562 fn test_incremental_scanner() {
563 let temp = TempDir::new().unwrap();
564
565 let mut scanner = IncrementalScanner::new(temp.path());
566 assert!(scanner.needs_rescan(&temp.path().join("test.py")));
567
568 scanner.update(CachedFile {
569 path: "test.py".to_string(),
570 mtime: 1000,
571 size: 100,
572 hash: 0,
573 tokens: TokenCounts::default(),
574 symbols: vec![],
575 symbols_extracted: false,
576 language: Some("python".to_string()),
577 lines: 5,
578 });
579
580 assert!(scanner.get_cached("test.py").is_some());
581 }
582
583 #[test]
584 fn test_hash_content() {
585 let h1 = hash_content(b"hello world");
586 let h2 = hash_content(b"hello world");
587 let h3 = hash_content(b"different");
588
589 assert_eq!(h1, h2);
590 assert_ne!(h1, h3);
591 }
592
593 #[test]
594 fn test_needs_rescan_with_hash() {
595 let mut cache = RepoCache::new("/test");
596 let original_hash = hash_content(b"original content");
597 let modified_hash = hash_content(b"modified content");
598
599 cache.files.insert(
600 "file.py".to_string(),
601 CachedFile {
602 path: "file.py".to_string(),
603 mtime: 1000,
604 size: 500,
605 hash: original_hash,
606 tokens: TokenCounts::default(),
607 symbols: vec![],
608 symbols_extracted: false,
609 language: None,
610 lines: 0,
611 },
612 );
613
614 assert!(!cache.needs_rescan_with_hash("file.py", 1000, 500, original_hash));
616
617 assert!(cache.needs_rescan_with_hash("file.py", 1000, 500, modified_hash));
619
620 assert!(cache.needs_rescan_with_hash("file.py", 2000, 500, original_hash));
622
623 assert!(!cache.needs_rescan_with_hash("file.py", 1000, 500, 0));
625 }
626}