1use anyhow::{Context, Result};
45use serde::{Deserialize, Serialize};
46use std::collections::HashMap;
47use std::fs;
48use std::path::{Path, PathBuf};
49use std::time::SystemTime;
50use xxhash_rust::xxh64::xxh64;
51
52use crate::config::buffers::parse_buffer_size;
53
54const HASH_INDEX_MAGIC: [u8; 7] = *b"SQRYHSH";
55const HASH_INDEX_ENVELOPE_VERSION: u16 = 1;
56
57#[derive(Serialize, Deserialize)]
58struct HashIndexEnvelope {
59 magic: [u8; 7],
60 version: u16,
61 sqry_version: String,
62 payload: Vec<u8>,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct FileHash {
78 pub path: PathBuf,
80 pub hash: u64,
82 pub size: u64,
84 pub mtime: SystemTime,
86 pub symbols_count: usize,
88 #[serde(skip)]
93 pub content: Option<String>,
94}
95
96impl FileHash {
97 pub fn compute(path: &Path) -> Result<Self> {
105 use std::io::Read;
106
107 let metadata = fs::metadata(path)
109 .with_context(|| format!("Failed to read metadata for {}", path.display()))?;
110
111 let size = metadata.len();
112 let mtime = metadata
113 .modified()
114 .with_context(|| format!("Failed to get modification time for {}", path.display()))?;
115
116 let mut file = fs::File::open(path)
118 .with_context(|| format!("Failed to open file {}", path.display()))?;
119
120 let mut buffer = vec![0u8; parse_buffer_size()];
122 let mut hasher = xxhash_rust::xxh64::Xxh64::new(0); loop {
125 let bytes_read = file
126 .read(&mut buffer)
127 .with_context(|| format!("Failed to read file {}", path.display()))?;
128
129 if bytes_read == 0 {
130 break;
131 }
132
133 hasher.update(&buffer[..bytes_read]);
134 }
135
136 let hash = hasher.digest();
137
138 Ok(Self {
139 path: path.to_path_buf(),
140 hash,
141 size,
142 mtime,
143 symbols_count: 0, content: None, })
146 }
147
148 pub fn from_bytes(path: &Path, content: &[u8]) -> Result<Self> {
156 let metadata = fs::metadata(path)
157 .with_context(|| format!("Failed to read metadata for {}", path.display()))?;
158
159 let hash = xxh64(content, 0); Ok(Self {
162 path: path.to_path_buf(),
163 hash,
164 size: content.len() as u64,
165 mtime: metadata.modified().with_context(|| {
166 format!("Failed to get modification time for {}", path.display())
167 })?,
168 symbols_count: 0,
169 content: None, })
171 }
172
173 pub fn metadata_changed(&self, path: &Path) -> Result<bool> {
181 let metadata = fs::metadata(path)
182 .with_context(|| format!("Failed to read metadata for {}", path.display()))?;
183
184 let current_size = metadata.len();
185 let current_mtime = metadata
186 .modified()
187 .with_context(|| format!("Failed to get modification time for {}", path.display()))?;
188
189 Ok(current_size != self.size || current_mtime != self.mtime)
190 }
191}
192
193#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct HashIndex {
199 hashes: HashMap<PathBuf, FileHash>,
201 pub file_count: usize,
203 pub total_symbols: usize,
205 #[serde(default)]
207 content_cache_max_bytes: Option<usize>,
208}
209
210impl HashIndex {
211 #[must_use]
213 pub fn new() -> Self {
214 Self::with_content_cache_limit(None)
215 }
216
217 #[must_use]
219 pub fn with_content_cache_limit(limit: Option<usize>) -> Self {
220 Self {
221 hashes: HashMap::new(),
222 file_count: 0,
223 total_symbols: 0,
224 content_cache_max_bytes: limit,
225 }
226 }
227
228 pub fn set_content_cache_limit(&mut self, limit: Option<usize>) {
230 self.content_cache_max_bytes = limit;
231 }
232
233 pub fn has_changed(&self, path: &Path) -> Result<bool> {
247 let Some(stored_hash) = self.hashes.get(path) else {
249 return Ok(true);
251 };
252
253 if !path.exists() {
255 return Ok(true);
257 }
258
259 if !stored_hash.metadata_changed(path)? {
261 return Ok(false);
263 }
264
265 let current_hash = FileHash::compute(path)?;
270
271 Ok(current_hash.hash != stored_hash.hash)
272 }
273
274 pub fn update(&mut self, path: PathBuf, mut file_hash: FileHash) {
278 if let Some(old_hash) = self.hashes.remove(&path) {
280 self.total_symbols = self.total_symbols.saturating_sub(old_hash.symbols_count);
281 self.file_count = self.file_count.saturating_sub(1);
282 }
283
284 self.total_symbols += file_hash.symbols_count;
286 self.file_count += 1;
287
288 file_hash.path.clone_from(&path);
290
291 self.hashes.insert(path, file_hash);
292 }
293
294 pub fn remove(&mut self, path: &Path) -> Option<FileHash> {
298 if let Some(removed) = self.hashes.remove(path) {
299 self.total_symbols = self.total_symbols.saturating_sub(removed.symbols_count);
300 self.file_count = self.file_count.saturating_sub(1);
301 Some(removed)
302 } else {
303 None
304 }
305 }
306
307 #[must_use]
309 pub fn get(&self, path: &Path) -> Option<&FileHash> {
310 self.hashes.get(path)
311 }
312
313 pub fn iter(&self) -> impl Iterator<Item = (&PathBuf, &FileHash)> {
315 self.hashes.iter()
316 }
317
318 #[must_use]
320 pub fn len(&self) -> usize {
321 self.file_count
322 }
323
324 #[must_use]
326 pub fn is_empty(&self) -> bool {
327 self.file_count == 0
328 }
329
330 pub fn clear(&mut self) {
332 self.hashes.clear();
333 self.file_count = 0;
334 self.total_symbols = 0;
335 }
336
337 pub fn get_cached_content(&self, path: &Path) -> Result<String> {
348 if let Some(file_hash) = self.hashes.get(path)
350 && let Some(ref content) = file_hash.content
351 {
352 return Ok(content.clone());
353 }
354
355 anyhow::bail!("Content not cached for {}", path.display())
357 }
358
359 pub fn cache_content(&mut self, path: &Path, content: String) {
373 if let Some(limit) = self.content_cache_max_bytes
374 && content.len() > limit
375 {
376 log::trace!(
377 "Skipping content cache for {} (size: {} bytes > {} limit)",
378 path.display(),
379 content.len(),
380 limit
381 );
382 return;
383 }
384
385 if let Some(file_hash) = self.hashes.get_mut(path) {
386 let size = content.len();
387 file_hash.content = Some(content);
388 log::trace!("Cached content for {} ({size} bytes)", path.display());
389 }
390 }
391
392 pub fn save(&self, cache_dir: &Path) -> Result<()> {
401 fs::create_dir_all(cache_dir)
403 .with_context(|| format!("Failed to create cache directory {}", cache_dir.display()))?;
404
405 let hash_file = cache_dir.join("file_hashes.bin");
406
407 let payload =
409 postcard::to_allocvec(self).context("Failed to serialize hash index payload")?;
410
411 let envelope = HashIndexEnvelope {
412 magic: HASH_INDEX_MAGIC,
413 version: HASH_INDEX_ENVELOPE_VERSION,
414 sqry_version: env!("CARGO_PKG_VERSION").to_string(),
415 payload,
416 };
417
418 let bytes =
419 postcard::to_allocvec(&envelope).context("Failed to serialize hash index envelope")?;
420
421 let tmp_hash_index_file_path = hash_file.with_extension("bin.tmp");
423 fs::write(&tmp_hash_index_file_path, bytes).with_context(|| {
424 format!(
425 "Failed to write temp hash index to {}",
426 tmp_hash_index_file_path.display()
427 )
428 })?;
429
430 if hash_file.exists() {
432 let _ = fs::remove_file(&hash_file);
433 }
434 fs::rename(&tmp_hash_index_file_path, &hash_file).with_context(|| {
435 format!(
436 "Failed to atomically replace hash index at {} with temp {}",
437 hash_file.display(),
438 tmp_hash_index_file_path.display()
439 )
440 })?;
441
442 log::debug!(
443 "Saved hash index: {} files, {} symbols to {}",
444 self.file_count,
445 self.total_symbols,
446 hash_file.display()
447 );
448
449 Ok(())
450 }
451
452 pub fn load(cache_dir: &Path) -> Result<Self> {
461 let hash_file = cache_dir.join("file_hashes.bin");
462
463 if !hash_file.exists() {
465 log::debug!(
466 "No hash index found at {}, starting fresh",
467 hash_file.display()
468 );
469 return Ok(Self::new());
470 }
471
472 const MAX_HASH_INDEX_BYTES: u64 = 256 * 1024 * 1024; let metadata = fs::metadata(&hash_file)
475 .with_context(|| format!("Failed to stat hash index: {}", hash_file.display()))?;
476 if metadata.len() > MAX_HASH_INDEX_BYTES {
477 anyhow::bail!(
478 "Hash index file is too large ({} bytes, max {}): {}",
479 metadata.len(),
480 MAX_HASH_INDEX_BYTES,
481 hash_file.display()
482 );
483 }
484 let bytes = fs::read(&hash_file)
485 .with_context(|| format!("Failed to read hash index from {}", hash_file.display()))?;
486
487 let env: HashIndexEnvelope =
489 postcard::from_bytes(&bytes).context("Failed to deserialize hash index envelope")?;
490
491 if env.magic != HASH_INDEX_MAGIC {
492 anyhow::bail!("Invalid hash index magic: expected {HASH_INDEX_MAGIC:?}");
493 }
494 if env.version != HASH_INDEX_ENVELOPE_VERSION {
495 anyhow::bail!(
496 "Unsupported hash index version: {} (expected {})",
497 env.version,
498 HASH_INDEX_ENVELOPE_VERSION
499 );
500 }
501
502 let index: Self = postcard::from_bytes(&env.payload)
503 .context("Failed to deserialize hash index payload")?;
504
505 log::debug!(
506 "Loaded hash index: {} files, {} symbols from {}",
507 index.file_count,
508 index.total_symbols,
509 hash_file.display()
510 );
511 Ok(index)
512 }
513}
514
515impl Default for HashIndex {
516 fn default() -> Self {
517 Self::new()
518 }
519}
520
521#[cfg(test)]
522mod tests {
523 use super::*;
524 use std::io::Write;
525 use tempfile::{NamedTempFile, TempDir};
526
527 #[test]
528 fn test_file_hash_compute() {
529 let mut temp_file = NamedTempFile::new().unwrap();
530 temp_file.write_all(b"test content").unwrap();
531 temp_file.flush().unwrap();
532
533 let hash = FileHash::compute(temp_file.path()).unwrap();
534
535 assert_eq!(hash.size, 12); assert!(hash.hash != 0); assert_eq!(hash.symbols_count, 0); }
539
540 #[test]
541 fn test_file_hash_from_bytes() {
542 let mut temp_file = NamedTempFile::new().unwrap();
543 temp_file.write_all(b"test").unwrap();
544 temp_file.flush().unwrap();
545
546 let content = b"test";
547 let hash = FileHash::from_bytes(temp_file.path(), content).unwrap();
548
549 assert_eq!(hash.size, 4);
550 assert_eq!(hash.hash, xxh64(content, 0));
551 }
552
553 #[test]
554 fn test_file_hash_deterministic() {
555 let mut temp_file = NamedTempFile::new().unwrap();
556 let content = b"deterministic test content";
557 temp_file.write_all(content).unwrap();
558 temp_file.flush().unwrap();
559
560 let hash1 = FileHash::compute(temp_file.path()).unwrap();
561 let hash2 = FileHash::compute(temp_file.path()).unwrap();
562
563 assert_eq!(hash1.hash, hash2.hash);
564 assert_eq!(hash1.size, hash2.size);
565 }
566
567 #[test]
568 fn test_file_hash_different_content() {
569 let mut temp1 = NamedTempFile::new().unwrap();
570 temp1.write_all(b"content A").unwrap();
571 temp1.flush().unwrap();
572
573 let mut temp2 = NamedTempFile::new().unwrap();
574 temp2.write_all(b"content B").unwrap();
575 temp2.flush().unwrap();
576
577 let hash1 = FileHash::compute(temp1.path()).unwrap();
578 let hash2 = FileHash::compute(temp2.path()).unwrap();
579
580 assert_ne!(hash1.hash, hash2.hash);
581 }
582
583 #[test]
584 fn test_hash_index_new_file() {
585 let index = HashIndex::new();
586 let path = Path::new("nonexistent.rs");
587
588 assert!(index.has_changed(path).unwrap());
590 }
591
592 #[test]
593 fn test_hash_index_unchanged_file() {
594 let mut temp_file = NamedTempFile::new().unwrap();
595 temp_file.write_all(b"unchanged content").unwrap();
596 temp_file.flush().unwrap();
597
598 let mut index = HashIndex::new();
599 let hash = FileHash::compute(temp_file.path()).unwrap();
600 index.update(temp_file.path().to_path_buf(), hash);
601
602 assert!(!index.has_changed(temp_file.path()).unwrap());
604 }
605
606 #[test]
607 fn test_hash_index_changed_content() {
608 let mut temp_file = NamedTempFile::new().unwrap();
609 temp_file.write_all(b"original content").unwrap();
610 temp_file.flush().unwrap();
611
612 let mut index = HashIndex::new();
613 let hash = FileHash::compute(temp_file.path()).unwrap();
614 index.update(temp_file.path().to_path_buf(), hash);
615
616 temp_file.write_all(b" modified").unwrap();
618 temp_file.flush().unwrap();
619
620 assert!(index.has_changed(temp_file.path()).unwrap());
622 }
623
624 #[test]
625 fn test_hash_index_update_and_remove() {
626 let mut index = HashIndex::new();
627 let path = PathBuf::from("test.rs");
628
629 let mut hash = FileHash {
630 path: path.clone(),
631 hash: 12345,
632 size: 100,
633 mtime: SystemTime::now(),
634 symbols_count: 5,
635 content: None,
636 };
637
638 index.update(path.clone(), hash.clone());
640 assert_eq!(index.len(), 1);
641 assert_eq!(index.total_symbols, 5);
642
643 hash.symbols_count = 10;
645 index.update(path.clone(), hash.clone());
646 assert_eq!(index.len(), 1); assert_eq!(index.total_symbols, 10); let removed = index.remove(&path);
651 assert!(removed.is_some());
652 assert_eq!(index.len(), 0);
653 assert_eq!(index.total_symbols, 0);
654 }
655
656 #[test]
657 fn test_hash_index_save_and_load() {
658 let tmp_index_dir = TempDir::new().unwrap();
659 let cache_dir = tmp_index_dir.path();
660
661 let mut index = HashIndex::new();
663 let path = PathBuf::from("test.rs");
664 let hash = FileHash {
665 path: path.clone(),
666 hash: 67890,
667 size: 200,
668 mtime: SystemTime::now(),
669 symbols_count: 15,
670 content: None,
671 };
672 index.update(path, hash);
673
674 index.save(cache_dir).unwrap();
676
677 let loaded = HashIndex::load(cache_dir).unwrap();
679
680 assert_eq!(loaded.len(), 1);
681 assert_eq!(loaded.total_symbols, 15);
682 assert_eq!(loaded.get(Path::new("test.rs")).unwrap().hash, 67890);
683 }
684
685 #[test]
686 fn test_hash_index_mtime_change_no_content_change() {
687 use filetime::{FileTime, set_file_mtime};
688 use std::time::Duration;
689
690 let mut temp_file = NamedTempFile::new().unwrap();
691 temp_file.write_all(b"same content").unwrap();
692 temp_file.flush().unwrap();
693
694 let mut index = HashIndex::new();
695 let hash = FileHash::compute(temp_file.path()).unwrap();
696 index.update(temp_file.path().to_path_buf(), hash);
697
698 let meta = fs::metadata(temp_file.path()).unwrap();
700 let orig_mtime = meta.modified().unwrap();
701 let new_mtime = FileTime::from_system_time(orig_mtime + Duration::from_secs(60));
702 set_file_mtime(temp_file.path(), new_mtime).unwrap();
703
704 assert!(!index.has_changed(temp_file.path()).unwrap());
706 }
707
708 #[test]
709 fn test_hash_index_load_nonexistent() {
710 let tmp_index_dir = TempDir::new().unwrap();
711 let cache_dir = tmp_index_dir.path().join("nonexistent");
712
713 let index = HashIndex::load(&cache_dir).unwrap();
715
716 assert_eq!(index.len(), 0);
717 assert!(index.is_empty());
718 }
719
720 #[test]
721 fn test_hash_index_clear() {
722 let mut index = HashIndex::new();
723
724 for i in 0_u64..5 {
726 let path = PathBuf::from(format!("file{i}.rs"));
727 let hash = FileHash {
728 path: path.clone(),
729 hash: i,
730 size: 100,
731 mtime: SystemTime::now(),
732 symbols_count: 3,
733 content: None,
734 };
735 index.update(path, hash);
736 }
737
738 assert_eq!(index.len(), 5);
739 assert_eq!(index.total_symbols, 15);
740
741 index.clear();
743
744 assert_eq!(index.len(), 0);
745 assert_eq!(index.total_symbols, 0);
746 assert!(index.is_empty());
747 }
748
749 #[test]
750 fn test_xxhash64_performance_characteristic() {
751 let data = vec![0u8; 1_000_000];
754
755 let start = std::time::Instant::now();
756 let _hash = xxh64(&data, 0);
757 let elapsed = start.elapsed();
758
759 assert!(
762 elapsed.as_millis() < 100,
763 "XXHash64 took {elapsed:?} to hash 1MB (expected <100ms)"
764 );
765 }
766
767 #[test]
768 fn test_cache_small_file() {
769 let mut temp_file = NamedTempFile::new().unwrap();
771 let content = "Small file content for caching test";
772 temp_file.write_all(content.as_bytes()).unwrap();
773 temp_file.flush().unwrap();
774
775 let mut index = HashIndex::new();
776 let hash = FileHash::compute(temp_file.path()).unwrap();
777 index.update(temp_file.path().to_path_buf(), hash);
778
779 index.cache_content(temp_file.path(), content.to_string());
781
782 let cached = index.get_cached_content(temp_file.path()).unwrap();
784 assert_eq!(cached, content);
785
786 let file_hash = index.get(temp_file.path()).unwrap();
788 assert!(file_hash.content.is_some());
789 assert_eq!(file_hash.content.as_ref().unwrap(), content);
790 }
791
792 #[test]
793 fn test_skip_large_file_when_limit_configured() {
794 let mut temp_file = NamedTempFile::new().unwrap();
796 let large_content = "x".repeat(101_000); temp_file.write_all(large_content.as_bytes()).unwrap();
799 temp_file.flush().unwrap();
800
801 let mut index = HashIndex::with_content_cache_limit(Some(100_000));
802 let hash = FileHash::compute(temp_file.path()).unwrap();
803 index.update(temp_file.path().to_path_buf(), hash);
804
805 index.cache_content(temp_file.path(), large_content.clone());
807
808 let file_hash = index.get(temp_file.path()).unwrap();
810 assert!(file_hash.content.is_none());
811
812 assert!(index.get_cached_content(temp_file.path()).is_err());
814 }
815
816 #[test]
817 fn test_large_file_cached_without_limit() {
818 let mut temp_file = NamedTempFile::new().unwrap();
820 let large_content = "x".repeat(101_000); temp_file.write_all(large_content.as_bytes()).unwrap();
822 temp_file.flush().unwrap();
823
824 let mut index = HashIndex::new();
825 let hash = FileHash::compute(temp_file.path()).unwrap();
826 index.update(temp_file.path().to_path_buf(), hash);
827
828 index.cache_content(temp_file.path(), large_content.clone());
829
830 let cached = index.get_cached_content(temp_file.path()).unwrap();
831 assert_eq!(cached.len(), large_content.len());
832 }
833
834 #[test]
835 fn test_get_cached_content_error_when_not_cached() {
836 let mut temp_file = NamedTempFile::new().unwrap();
838 let content = "Test content";
839 temp_file.write_all(content.as_bytes()).unwrap();
840 temp_file.flush().unwrap();
841
842 let mut index = HashIndex::new();
843 let hash = FileHash::compute(temp_file.path()).unwrap();
844 index.update(temp_file.path().to_path_buf(), hash);
845
846 assert!(index.get_cached_content(temp_file.path()).is_err());
850 }
851}