1use anyhow::{Context, Result};
45use serde::{Deserialize, Serialize};
46use std::collections::HashMap;
47use std::fs;
48use std::path::{Path, PathBuf};
49use std::time::SystemTime;
50use xxhash_rust::xxh64::xxh64;
51
52use crate::config::buffers::parse_buffer_size;
53
54const HASH_INDEX_MAGIC: [u8; 7] = *b"SQRYHSH";
55const HASH_INDEX_ENVELOPE_VERSION: u16 = 1;
56
57#[derive(Serialize, Deserialize)]
58struct HashIndexEnvelope {
59 magic: [u8; 7],
60 version: u16,
61 sqry_version: String,
62 payload: Vec<u8>,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct FileHash {
78 pub path: PathBuf,
80 pub hash: u64,
82 pub size: u64,
84 pub mtime: SystemTime,
86 pub symbols_count: usize,
88 #[serde(skip)]
93 pub content: Option<String>,
94}
95
96impl FileHash {
97 pub fn compute(path: &Path) -> Result<Self> {
105 use std::io::Read;
106
107 let metadata = fs::metadata(path)
109 .with_context(|| format!("Failed to read metadata for {}", path.display()))?;
110
111 let size = metadata.len();
112 let mtime = metadata
113 .modified()
114 .with_context(|| format!("Failed to get modification time for {}", path.display()))?;
115
116 let mut file = fs::File::open(path)
118 .with_context(|| format!("Failed to open file {}", path.display()))?;
119
120 let mut buffer = vec![0u8; parse_buffer_size()];
122 let mut hasher = xxhash_rust::xxh64::Xxh64::new(0); loop {
125 let bytes_read = file
126 .read(&mut buffer)
127 .with_context(|| format!("Failed to read file {}", path.display()))?;
128
129 if bytes_read == 0 {
130 break;
131 }
132
133 hasher.update(&buffer[..bytes_read]);
134 }
135
136 let hash = hasher.digest();
137
138 Ok(Self {
139 path: path.to_path_buf(),
140 hash,
141 size,
142 mtime,
143 symbols_count: 0, content: None, })
146 }
147
148 pub fn from_bytes(path: &Path, content: &[u8]) -> Result<Self> {
156 let metadata = fs::metadata(path)
157 .with_context(|| format!("Failed to read metadata for {}", path.display()))?;
158
159 let hash = xxh64(content, 0); Ok(Self {
162 path: path.to_path_buf(),
163 hash,
164 size: content.len() as u64,
165 mtime: metadata.modified().with_context(|| {
166 format!("Failed to get modification time for {}", path.display())
167 })?,
168 symbols_count: 0,
169 content: None, })
171 }
172
173 pub fn metadata_changed(&self, path: &Path) -> Result<bool> {
181 let metadata = fs::metadata(path)
182 .with_context(|| format!("Failed to read metadata for {}", path.display()))?;
183
184 let current_size = metadata.len();
185 let current_mtime = metadata
186 .modified()
187 .with_context(|| format!("Failed to get modification time for {}", path.display()))?;
188
189 Ok(current_size != self.size || current_mtime != self.mtime)
190 }
191}
192
193#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct HashIndex {
199 hashes: HashMap<PathBuf, FileHash>,
201 pub file_count: usize,
203 pub total_symbols: usize,
205 #[serde(default)]
207 content_cache_max_bytes: Option<usize>,
208}
209
210impl HashIndex {
211 #[must_use]
213 pub fn new() -> Self {
214 Self::with_content_cache_limit(None)
215 }
216
217 #[must_use]
219 pub fn with_content_cache_limit(limit: Option<usize>) -> Self {
220 Self {
221 hashes: HashMap::new(),
222 file_count: 0,
223 total_symbols: 0,
224 content_cache_max_bytes: limit,
225 }
226 }
227
228 pub fn set_content_cache_limit(&mut self, limit: Option<usize>) {
230 self.content_cache_max_bytes = limit;
231 }
232
233 pub fn has_changed(&self, path: &Path) -> Result<bool> {
247 let Some(stored_hash) = self.hashes.get(path) else {
249 return Ok(true);
251 };
252
253 if !path.exists() {
255 return Ok(true);
257 }
258
259 if !stored_hash.metadata_changed(path)? {
261 return Ok(false);
263 }
264
265 let current_hash = FileHash::compute(path)?;
270
271 Ok(current_hash.hash != stored_hash.hash)
272 }
273
274 pub fn update(&mut self, path: PathBuf, mut file_hash: FileHash) {
278 if let Some(old_hash) = self.hashes.remove(&path) {
280 self.total_symbols = self.total_symbols.saturating_sub(old_hash.symbols_count);
281 self.file_count = self.file_count.saturating_sub(1);
282 }
283
284 self.total_symbols += file_hash.symbols_count;
286 self.file_count += 1;
287
288 file_hash.path.clone_from(&path);
290
291 self.hashes.insert(path, file_hash);
292 }
293
294 pub fn remove(&mut self, path: &Path) -> Option<FileHash> {
298 if let Some(removed) = self.hashes.remove(path) {
299 self.total_symbols = self.total_symbols.saturating_sub(removed.symbols_count);
300 self.file_count = self.file_count.saturating_sub(1);
301 Some(removed)
302 } else {
303 None
304 }
305 }
306
307 #[must_use]
309 pub fn get(&self, path: &Path) -> Option<&FileHash> {
310 self.hashes.get(path)
311 }
312
313 pub fn iter(&self) -> impl Iterator<Item = (&PathBuf, &FileHash)> {
315 self.hashes.iter()
316 }
317
318 #[must_use]
320 pub fn len(&self) -> usize {
321 self.file_count
322 }
323
324 #[must_use]
326 pub fn is_empty(&self) -> bool {
327 self.file_count == 0
328 }
329
330 pub fn clear(&mut self) {
332 self.hashes.clear();
333 self.file_count = 0;
334 self.total_symbols = 0;
335 }
336
337 pub fn get_cached_content(&self, path: &Path) -> Result<String> {
348 if let Some(file_hash) = self.hashes.get(path)
350 && let Some(ref content) = file_hash.content
351 {
352 return Ok(content.clone());
353 }
354
355 anyhow::bail!("Content not cached for {}", path.display())
357 }
358
359 pub fn cache_content(&mut self, path: &Path, content: String) {
373 if let Some(limit) = self.content_cache_max_bytes
374 && content.len() > limit
375 {
376 log::trace!(
377 "Skipping content cache for {} (size: {} bytes > {} limit)",
378 path.display(),
379 content.len(),
380 limit
381 );
382 return;
383 }
384
385 if let Some(file_hash) = self.hashes.get_mut(path) {
386 let size = content.len();
387 file_hash.content = Some(content);
388 log::trace!("Cached content for {} ({size} bytes)", path.display());
389 }
390 }
391
392 pub fn save(&self, cache_dir: &Path) -> Result<()> {
401 fs::create_dir_all(cache_dir)
403 .with_context(|| format!("Failed to create cache directory {}", cache_dir.display()))?;
404
405 let hash_file = cache_dir.join("file_hashes.bin");
406
407 let payload =
409 postcard::to_allocvec(self).context("Failed to serialize hash index payload")?;
410
411 let envelope = HashIndexEnvelope {
412 magic: HASH_INDEX_MAGIC,
413 version: HASH_INDEX_ENVELOPE_VERSION,
414 sqry_version: env!("CARGO_PKG_VERSION").to_string(),
415 payload,
416 };
417
418 let bytes =
419 postcard::to_allocvec(&envelope).context("Failed to serialize hash index envelope")?;
420
421 let tmp_hash_index_file_path = hash_file.with_extension("bin.tmp");
423 fs::write(&tmp_hash_index_file_path, bytes).with_context(|| {
424 format!(
425 "Failed to write temp hash index to {}",
426 tmp_hash_index_file_path.display()
427 )
428 })?;
429
430 if hash_file.exists() {
432 let _ = fs::remove_file(&hash_file);
433 }
434 fs::rename(&tmp_hash_index_file_path, &hash_file).with_context(|| {
435 format!(
436 "Failed to atomically replace hash index at {} with temp {}",
437 hash_file.display(),
438 tmp_hash_index_file_path.display()
439 )
440 })?;
441
442 log::debug!(
443 "Saved hash index: {} files, {} symbols to {}",
444 self.file_count,
445 self.total_symbols,
446 hash_file.display()
447 );
448
449 Ok(())
450 }
451
452 pub fn load(cache_dir: &Path) -> Result<Self> {
461 let hash_file = cache_dir.join("file_hashes.bin");
462
463 if !hash_file.exists() {
465 log::debug!(
466 "No hash index found at {}, starting fresh",
467 hash_file.display()
468 );
469 return Ok(Self::new());
470 }
471
472 let bytes = fs::read(&hash_file)
474 .with_context(|| format!("Failed to read hash index from {}", hash_file.display()))?;
475
476 let env: HashIndexEnvelope =
478 postcard::from_bytes(&bytes).context("Failed to deserialize hash index envelope")?;
479
480 if env.magic != HASH_INDEX_MAGIC {
481 anyhow::bail!("Invalid hash index magic: expected {HASH_INDEX_MAGIC:?}");
482 }
483 if env.version != HASH_INDEX_ENVELOPE_VERSION {
484 anyhow::bail!(
485 "Unsupported hash index version: {} (expected {})",
486 env.version,
487 HASH_INDEX_ENVELOPE_VERSION
488 );
489 }
490
491 let index: Self = postcard::from_bytes(&env.payload)
492 .context("Failed to deserialize hash index payload")?;
493
494 log::debug!(
495 "Loaded hash index: {} files, {} symbols from {}",
496 index.file_count,
497 index.total_symbols,
498 hash_file.display()
499 );
500 Ok(index)
501 }
502}
503
504impl Default for HashIndex {
505 fn default() -> Self {
506 Self::new()
507 }
508}
509
510#[cfg(test)]
511mod tests {
512 use super::*;
513 use std::io::Write;
514 use tempfile::{NamedTempFile, TempDir};
515
516 #[test]
517 fn test_file_hash_compute() {
518 let mut temp_file = NamedTempFile::new().unwrap();
519 temp_file.write_all(b"test content").unwrap();
520 temp_file.flush().unwrap();
521
522 let hash = FileHash::compute(temp_file.path()).unwrap();
523
524 assert_eq!(hash.size, 12); assert!(hash.hash != 0); assert_eq!(hash.symbols_count, 0); }
528
529 #[test]
530 fn test_file_hash_from_bytes() {
531 let mut temp_file = NamedTempFile::new().unwrap();
532 temp_file.write_all(b"test").unwrap();
533 temp_file.flush().unwrap();
534
535 let content = b"test";
536 let hash = FileHash::from_bytes(temp_file.path(), content).unwrap();
537
538 assert_eq!(hash.size, 4);
539 assert_eq!(hash.hash, xxh64(content, 0));
540 }
541
542 #[test]
543 fn test_file_hash_deterministic() {
544 let mut temp_file = NamedTempFile::new().unwrap();
545 let content = b"deterministic test content";
546 temp_file.write_all(content).unwrap();
547 temp_file.flush().unwrap();
548
549 let hash1 = FileHash::compute(temp_file.path()).unwrap();
550 let hash2 = FileHash::compute(temp_file.path()).unwrap();
551
552 assert_eq!(hash1.hash, hash2.hash);
553 assert_eq!(hash1.size, hash2.size);
554 }
555
556 #[test]
557 fn test_file_hash_different_content() {
558 let mut temp1 = NamedTempFile::new().unwrap();
559 temp1.write_all(b"content A").unwrap();
560 temp1.flush().unwrap();
561
562 let mut temp2 = NamedTempFile::new().unwrap();
563 temp2.write_all(b"content B").unwrap();
564 temp2.flush().unwrap();
565
566 let hash1 = FileHash::compute(temp1.path()).unwrap();
567 let hash2 = FileHash::compute(temp2.path()).unwrap();
568
569 assert_ne!(hash1.hash, hash2.hash);
570 }
571
572 #[test]
573 fn test_hash_index_new_file() {
574 let index = HashIndex::new();
575 let path = Path::new("nonexistent.rs");
576
577 assert!(index.has_changed(path).unwrap());
579 }
580
581 #[test]
582 fn test_hash_index_unchanged_file() {
583 let mut temp_file = NamedTempFile::new().unwrap();
584 temp_file.write_all(b"unchanged content").unwrap();
585 temp_file.flush().unwrap();
586
587 let mut index = HashIndex::new();
588 let hash = FileHash::compute(temp_file.path()).unwrap();
589 index.update(temp_file.path().to_path_buf(), hash);
590
591 assert!(!index.has_changed(temp_file.path()).unwrap());
593 }
594
595 #[test]
596 fn test_hash_index_changed_content() {
597 let mut temp_file = NamedTempFile::new().unwrap();
598 temp_file.write_all(b"original content").unwrap();
599 temp_file.flush().unwrap();
600
601 let mut index = HashIndex::new();
602 let hash = FileHash::compute(temp_file.path()).unwrap();
603 index.update(temp_file.path().to_path_buf(), hash);
604
605 temp_file.write_all(b" modified").unwrap();
607 temp_file.flush().unwrap();
608
609 assert!(index.has_changed(temp_file.path()).unwrap());
611 }
612
613 #[test]
614 fn test_hash_index_update_and_remove() {
615 let mut index = HashIndex::new();
616 let path = PathBuf::from("test.rs");
617
618 let mut hash = FileHash {
619 path: path.clone(),
620 hash: 12345,
621 size: 100,
622 mtime: SystemTime::now(),
623 symbols_count: 5,
624 content: None,
625 };
626
627 index.update(path.clone(), hash.clone());
629 assert_eq!(index.len(), 1);
630 assert_eq!(index.total_symbols, 5);
631
632 hash.symbols_count = 10;
634 index.update(path.clone(), hash.clone());
635 assert_eq!(index.len(), 1); assert_eq!(index.total_symbols, 10); let removed = index.remove(&path);
640 assert!(removed.is_some());
641 assert_eq!(index.len(), 0);
642 assert_eq!(index.total_symbols, 0);
643 }
644
645 #[test]
646 fn test_hash_index_save_and_load() {
647 let tmp_index_dir = TempDir::new().unwrap();
648 let cache_dir = tmp_index_dir.path();
649
650 let mut index = HashIndex::new();
652 let path = PathBuf::from("test.rs");
653 let hash = FileHash {
654 path: path.clone(),
655 hash: 67890,
656 size: 200,
657 mtime: SystemTime::now(),
658 symbols_count: 15,
659 content: None,
660 };
661 index.update(path, hash);
662
663 index.save(cache_dir).unwrap();
665
666 let loaded = HashIndex::load(cache_dir).unwrap();
668
669 assert_eq!(loaded.len(), 1);
670 assert_eq!(loaded.total_symbols, 15);
671 assert_eq!(loaded.get(Path::new("test.rs")).unwrap().hash, 67890);
672 }
673
674 #[test]
675 fn test_hash_index_mtime_change_no_content_change() {
676 use filetime::{FileTime, set_file_mtime};
677 use std::time::Duration;
678
679 let mut temp_file = NamedTempFile::new().unwrap();
680 temp_file.write_all(b"same content").unwrap();
681 temp_file.flush().unwrap();
682
683 let mut index = HashIndex::new();
684 let hash = FileHash::compute(temp_file.path()).unwrap();
685 index.update(temp_file.path().to_path_buf(), hash);
686
687 let meta = fs::metadata(temp_file.path()).unwrap();
689 let orig_mtime = meta.modified().unwrap();
690 let new_mtime = FileTime::from_system_time(orig_mtime + Duration::from_secs(60));
691 set_file_mtime(temp_file.path(), new_mtime).unwrap();
692
693 assert!(!index.has_changed(temp_file.path()).unwrap());
695 }
696
697 #[test]
698 fn test_hash_index_load_nonexistent() {
699 let tmp_index_dir = TempDir::new().unwrap();
700 let cache_dir = tmp_index_dir.path().join("nonexistent");
701
702 let index = HashIndex::load(&cache_dir).unwrap();
704
705 assert_eq!(index.len(), 0);
706 assert!(index.is_empty());
707 }
708
709 #[test]
710 fn test_hash_index_clear() {
711 let mut index = HashIndex::new();
712
713 for i in 0_u64..5 {
715 let path = PathBuf::from(format!("file{i}.rs"));
716 let hash = FileHash {
717 path: path.clone(),
718 hash: i,
719 size: 100,
720 mtime: SystemTime::now(),
721 symbols_count: 3,
722 content: None,
723 };
724 index.update(path, hash);
725 }
726
727 assert_eq!(index.len(), 5);
728 assert_eq!(index.total_symbols, 15);
729
730 index.clear();
732
733 assert_eq!(index.len(), 0);
734 assert_eq!(index.total_symbols, 0);
735 assert!(index.is_empty());
736 }
737
738 #[test]
739 fn test_xxhash64_performance_characteristic() {
740 let data = vec![0u8; 1_000_000];
743
744 let start = std::time::Instant::now();
745 let _hash = xxh64(&data, 0);
746 let elapsed = start.elapsed();
747
748 assert!(
751 elapsed.as_millis() < 20,
752 "XXHash64 took {elapsed:?} to hash 1MB (expected <20ms)"
753 );
754 }
755
756 #[test]
757 fn test_cache_small_file() {
758 let mut temp_file = NamedTempFile::new().unwrap();
760 let content = "Small file content for caching test";
761 temp_file.write_all(content.as_bytes()).unwrap();
762 temp_file.flush().unwrap();
763
764 let mut index = HashIndex::new();
765 let hash = FileHash::compute(temp_file.path()).unwrap();
766 index.update(temp_file.path().to_path_buf(), hash);
767
768 index.cache_content(temp_file.path(), content.to_string());
770
771 let cached = index.get_cached_content(temp_file.path()).unwrap();
773 assert_eq!(cached, content);
774
775 let file_hash = index.get(temp_file.path()).unwrap();
777 assert!(file_hash.content.is_some());
778 assert_eq!(file_hash.content.as_ref().unwrap(), content);
779 }
780
781 #[test]
782 fn test_skip_large_file_when_limit_configured() {
783 let mut temp_file = NamedTempFile::new().unwrap();
785 let large_content = "x".repeat(101_000); temp_file.write_all(large_content.as_bytes()).unwrap();
788 temp_file.flush().unwrap();
789
790 let mut index = HashIndex::with_content_cache_limit(Some(100_000));
791 let hash = FileHash::compute(temp_file.path()).unwrap();
792 index.update(temp_file.path().to_path_buf(), hash);
793
794 index.cache_content(temp_file.path(), large_content.clone());
796
797 let file_hash = index.get(temp_file.path()).unwrap();
799 assert!(file_hash.content.is_none());
800
801 assert!(index.get_cached_content(temp_file.path()).is_err());
803 }
804
805 #[test]
806 fn test_large_file_cached_without_limit() {
807 let mut temp_file = NamedTempFile::new().unwrap();
809 let large_content = "x".repeat(101_000); temp_file.write_all(large_content.as_bytes()).unwrap();
811 temp_file.flush().unwrap();
812
813 let mut index = HashIndex::new();
814 let hash = FileHash::compute(temp_file.path()).unwrap();
815 index.update(temp_file.path().to_path_buf(), hash);
816
817 index.cache_content(temp_file.path(), large_content.clone());
818
819 let cached = index.get_cached_content(temp_file.path()).unwrap();
820 assert_eq!(cached.len(), large_content.len());
821 }
822
823 #[test]
824 fn test_get_cached_content_error_when_not_cached() {
825 let mut temp_file = NamedTempFile::new().unwrap();
827 let content = "Test content";
828 temp_file.write_all(content.as_bytes()).unwrap();
829 temp_file.flush().unwrap();
830
831 let mut index = HashIndex::new();
832 let hash = FileHash::compute(temp_file.path()).unwrap();
833 index.update(temp_file.path().to_path_buf(), hash);
834
835 assert!(index.get_cached_content(temp_file.path()).is_err());
839 }
840}