1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2use std::fs::{self, File};
3use std::io::{BufReader, BufWriter, Cursor, Read, Seek, Write};
4use std::path::{Component, Path, PathBuf};
5use std::process::Command;
6use std::sync::{
7 atomic::{AtomicBool, AtomicUsize, Ordering},
8 Arc, Mutex,
9};
10use std::time::{Duration, SystemTime, UNIX_EPOCH};
11
12use globset::{Glob, GlobSet, GlobSetBuilder};
13use ignore::WalkBuilder;
14use rayon::prelude::*;
15use regex::bytes::Regex;
16use regex_syntax::hir::{Hir, HirKind};
17
18use crate::cache_freshness::{self, FileFreshness, FreshnessVerdict};
19use crate::fs_lock;
20use crate::pattern_compile::{self, CompileOpts, CompileResult, CompiledPattern, LiteralSearch};
21
22const DEFAULT_MAX_FILE_SIZE: u64 = 1_048_576;
23const CACHE_MAGIC: u32 = 0x3144_4958; const INDEX_MAGIC: &[u8; 8] = b"AFTIDX01";
25const LOOKUP_MAGIC: &[u8; 8] = b"AFTLKP01";
26const INDEX_VERSION: u32 = 3;
27const PREVIEW_BYTES: usize = 8 * 1024;
28const EOF_SENTINEL: u8 = 0;
29const MAX_ENTRIES: usize = 10_000_000;
30const MIN_FILE_ENTRY_BYTES: usize = 57;
31const LOOKUP_ENTRY_BYTES: usize = 16;
32const POSTING_BYTES: usize = 6;
33static CACHE_LOCK_ACQUIRE_MUTEX: Mutex<()> = Mutex::new(());
34
35pub struct CacheLock {
36 _guard: fs_lock::LockGuard,
37}
38
39impl CacheLock {
40 pub fn acquire(cache_dir: &Path) -> std::io::Result<Self> {
41 fs::create_dir_all(cache_dir)?;
42 let path = cache_dir.join("cache.lock");
43 let _acquire_guard = CACHE_LOCK_ACQUIRE_MUTEX
44 .lock()
45 .map_err(|_| std::io::Error::other("search cache lock acquisition mutex poisoned"))?;
46 fs_lock::try_acquire(&path, Duration::from_secs(2))
47 .map(|guard| Self { _guard: guard })
48 .map_err(|error| match error {
49 fs_lock::AcquireError::Timeout => {
50 std::io::Error::other("timed out acquiring search cache lock")
51 }
52 fs_lock::AcquireError::Io(error) => error,
53 })
54 }
55}
56
57#[derive(Clone, Debug)]
58pub struct SearchIndex {
59 pub postings: HashMap<u32, Vec<Posting>>,
60 pub files: Vec<FileEntry>,
61 pub path_to_id: HashMap<PathBuf, u32>,
62 pub ready: bool,
63 project_root: PathBuf,
64 git_head: Option<String>,
65 max_file_size: u64,
66 pub file_trigrams: HashMap<u32, Vec<u32>>,
67 unindexed_files: HashSet<u32>,
68}
69
70#[derive(Clone, Debug, Default)]
71pub struct LexicalRankResult {
72 pub files: Vec<(PathBuf, f32)>,
73 pub engine_capped: bool,
74}
75
76impl SearchIndex {
77 pub fn file_count(&self) -> usize {
79 self.files.len()
80 }
81
82 pub fn trigram_count(&self) -> usize {
84 self.postings.len()
85 }
86
87 pub fn query_trigrams_from_tokens(tokens: &[&str]) -> Vec<u32> {
89 query_trigrams_from_tokens(tokens)
90 }
91
92 pub fn lexical_rank(
94 &self,
95 query_trigrams: &[u32],
96 candidate_filter: Option<&dyn Fn(&Path) -> bool>,
97 max_files: usize,
98 ) -> Vec<(PathBuf, f32)> {
99 self.lexical_rank_with_stats(query_trigrams, candidate_filter, max_files)
100 .files
101 }
102
103 pub fn lexical_rank_with_stats(
106 &self,
107 query_trigrams: &[u32],
108 candidate_filter: Option<&dyn Fn(&Path) -> bool>,
109 max_files: usize,
110 ) -> LexicalRankResult {
111 if query_trigrams.is_empty() || max_files == 0 {
112 return LexicalRankResult::default();
113 }
114
115 let mut non_zero: Vec<(u32, usize)> = query_trigrams
116 .iter()
117 .filter_map(|trigram| {
118 let posting_count = self.postings.get(trigram).map_or(0, Vec::len);
119 (posting_count > 0).then_some((*trigram, posting_count))
120 })
121 .collect();
122 if non_zero.is_empty() {
123 return LexicalRankResult::default();
124 }
125
126 non_zero.sort_unstable_by_key(|(_, posting_count)| *posting_count);
127 let selected_count = non_zero.len().min(3);
128 let candidate_cap = if selected_count == 3 { 200 } else { 500 };
129
130 let mut candidate_ids = BTreeSet::new();
131 for (trigram, _) in non_zero.iter().take(selected_count) {
132 if let Some(postings) = self.postings.get(trigram) {
133 for posting in postings {
134 if self.is_active_file(posting.file_id) {
135 candidate_ids.insert(posting.file_id);
136 }
137 }
138 }
139 }
140 let pre_filter_candidate_count = candidate_ids.len();
141 let engine_capped = pre_filter_candidate_count > candidate_cap;
142 let filtered_candidates = candidate_ids
143 .into_iter()
144 .filter_map(|file_id| {
145 self.files
146 .get(file_id as usize)
147 .map(|entry| (file_id, entry))
148 })
149 .filter(|(_, entry)| {
150 if let Some(filter) = candidate_filter {
151 filter(&entry.path)
152 } else {
153 true
154 }
155 })
156 .collect::<Vec<_>>();
157
158 let mut ranked = Vec::new();
159 for (file_id, entry) in filtered_candidates.into_iter().take(candidate_cap) {
160 let score = lexical_score(self, query_trigrams, file_id);
161 if score > 0.0 {
162 ranked.push((entry.path.clone(), score));
163 }
164 }
165
166 ranked.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
167 ranked.truncate(max_files);
168 LexicalRankResult {
169 files: ranked,
170 engine_capped,
171 }
172 }
173}
174
175#[derive(Clone, Debug, PartialEq, Eq)]
176pub struct Posting {
177 pub file_id: u32,
178 pub next_mask: u8,
179 pub loc_mask: u8,
180}
181
182#[derive(Clone, Debug)]
183pub struct FileEntry {
184 pub path: PathBuf,
185 pub size: u64,
186 pub modified: SystemTime,
187 pub content_hash: blake3::Hash,
188}
189
190#[derive(Clone, Debug, PartialEq, Eq)]
191pub struct GrepMatch {
192 pub file: PathBuf,
193 pub line: u32,
194 pub column: u32,
195 pub line_text: String,
196 pub match_text: String,
197}
198
199#[derive(Clone, Debug)]
200pub struct GrepResult {
201 pub matches: Vec<GrepMatch>,
202 pub total_matches: usize,
203 pub files_searched: usize,
204 pub files_with_matches: usize,
205 pub index_status: IndexStatus,
206 pub truncated: bool,
207 pub fully_degraded: bool,
208 pub engine_capped: bool,
209}
210
211#[derive(Clone, Copy, Debug, PartialEq, Eq)]
212pub enum IndexStatus {
213 Ready,
214 Building,
215 Fallback,
216 Disabled,
217}
218
219impl IndexStatus {
220 pub fn as_str(&self) -> &'static str {
221 match self {
222 IndexStatus::Ready => "Ready",
223 IndexStatus::Building => "Building",
224 IndexStatus::Fallback => "Fallback",
225 IndexStatus::Disabled => "Disabled",
226 }
227 }
228}
229
230#[derive(Clone, Debug, Default)]
231pub struct RegexQuery {
232 pub and_trigrams: Vec<u32>,
233 pub or_groups: Vec<Vec<u32>>,
234 pub(crate) and_filters: HashMap<u32, PostingFilter>,
235 pub(crate) or_filters: Vec<HashMap<u32, PostingFilter>>,
236}
237
238#[derive(Clone, Copy, Debug, Default)]
239pub(crate) struct PostingFilter {
240 next_mask: u8,
241 loc_mask: u8,
242}
243
244#[derive(Clone, Debug, Default)]
245struct QueryBuild {
246 and_runs: Vec<Vec<u8>>,
247 or_groups: Vec<Vec<Vec<u8>>>,
248}
249
250#[derive(Clone, Debug, Default)]
251pub(crate) struct PathFilters {
252 includes: Option<GlobSet>,
253 excludes: Option<GlobSet>,
254}
255
256#[derive(Clone, Debug)]
257pub(crate) struct SearchScope {
258 pub root: PathBuf,
259 pub use_index: bool,
260}
261
262#[derive(Clone, Debug)]
263struct SharedGrepMatch {
264 file: Arc<PathBuf>,
265 line: u32,
266 column: u32,
267 line_text: String,
268 match_text: String,
269}
270
271#[derive(Clone, Debug)]
272enum SearchMatcher {
273 Literal(LiteralSearch),
274 Regex(Regex),
275}
276
277impl SearchIndex {
278 pub fn new() -> Self {
279 SearchIndex {
280 postings: HashMap::new(),
281 files: Vec::new(),
282 path_to_id: HashMap::new(),
283 ready: false,
284 project_root: PathBuf::new(),
285 git_head: None,
286 max_file_size: DEFAULT_MAX_FILE_SIZE,
287 file_trigrams: HashMap::new(),
288 unindexed_files: HashSet::new(),
289 }
290 }
291
292 pub fn build(root: &Path) -> Self {
293 Self::build_with_limit(root, DEFAULT_MAX_FILE_SIZE)
294 }
295
296 pub(crate) fn build_with_limit(root: &Path, max_file_size: u64) -> Self {
297 let project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
298 let mut index = SearchIndex {
299 project_root: project_root.clone(),
300 max_file_size,
301 ..SearchIndex::new()
302 };
303
304 let filters = PathFilters::default();
305 for path in walk_project_files(&project_root, &filters) {
306 index.update_file(&path);
307 }
308
309 index.git_head = current_git_head(&project_root);
310 index.ready = true;
311 index
312 }
313
314 pub fn index_file(&mut self, path: &Path, content: &[u8]) {
315 self.remove_file(path);
316
317 let file_id = match self.allocate_file_id(path, content.len() as u64) {
318 Some(file_id) => file_id,
319 None => return,
320 };
321 if let Some(file) = self.files.get_mut(file_id as usize) {
322 file.content_hash = cache_freshness::hash_bytes(content);
323 }
324
325 let mut trigram_map: BTreeMap<u32, PostingFilter> = BTreeMap::new();
326 for (trigram, next_char, position) in extract_trigrams(content) {
327 let entry = trigram_map.entry(trigram).or_default();
328 entry.next_mask |= mask_for_next_char(next_char);
329 entry.loc_mask |= mask_for_position(position);
330 }
331
332 let mut file_trigrams = Vec::with_capacity(trigram_map.len());
333 for (trigram, filter) in trigram_map {
334 let postings = self.postings.entry(trigram).or_default();
335 postings.push(Posting {
336 file_id,
337 next_mask: filter.next_mask,
338 loc_mask: filter.loc_mask,
339 });
340 if postings.len() > 1
344 && postings[postings.len() - 2].file_id > postings[postings.len() - 1].file_id
345 {
346 postings.sort_unstable_by_key(|p| p.file_id);
347 }
348 file_trigrams.push(trigram);
349 }
350
351 self.file_trigrams.insert(file_id, file_trigrams);
352 self.unindexed_files.remove(&file_id);
353 }
354
355 pub fn remove_file(&mut self, path: &Path) {
356 let canonical_path = canonicalize_existing_or_deleted_path(path);
357 let file_id = if let Some(file_id) = self.path_to_id.remove(path) {
358 file_id
359 } else if canonical_path.as_path() != path {
360 let Some(file_id) = self.path_to_id.remove(&canonical_path) else {
361 return;
362 };
363 file_id
364 } else {
365 return;
366 };
367
368 if let Some(trigrams) = self.file_trigrams.remove(&file_id) {
369 for trigram in trigrams {
370 let should_remove = if let Some(postings) = self.postings.get_mut(&trigram) {
371 postings.retain(|posting| posting.file_id != file_id);
372 postings.is_empty()
373 } else {
374 false
375 };
376
377 if should_remove {
378 self.postings.remove(&trigram);
379 }
380 }
381 }
382
383 self.unindexed_files.remove(&file_id);
384 if let Some(file) = self.files.get_mut(file_id as usize) {
385 file.path = PathBuf::new();
386 file.size = 0;
387 file.modified = UNIX_EPOCH;
388 file.content_hash = cache_freshness::zero_hash();
389 }
390 }
391
392 pub fn update_file(&mut self, path: &Path) {
393 self.remove_file(path);
394
395 let metadata = match fs::metadata(path) {
396 Ok(metadata) if metadata.is_file() => metadata,
397 _ => return,
398 };
399
400 if is_binary_path(path, metadata.len()) {
401 self.track_unindexed_file(path, &metadata);
402 return;
403 }
404
405 if metadata.len() > self.max_file_size {
406 self.track_unindexed_file(path, &metadata);
407 return;
408 }
409
410 let content = match fs::read(path) {
411 Ok(content) => content,
412 Err(_) => return,
413 };
414
415 if is_binary_bytes(&content) {
416 self.track_unindexed_file(path, &metadata);
417 return;
418 }
419
420 self.index_file(path, &content);
421 }
422
423 pub fn grep(
424 &self,
425 pattern: &str,
426 case_sensitive: bool,
427 include: &[String],
428 exclude: &[String],
429 search_root: &Path,
430 max_results: usize,
431 ) -> GrepResult {
432 match pattern_compile::compile(
433 pattern,
434 CompileOpts {
435 case_insensitive: !case_sensitive,
436 ..CompileOpts::default()
437 },
438 ) {
439 CompileResult::Ok(compiled) => {
440 self.search_grep(&compiled, include, exclude, search_root, max_results)
441 }
442 CompileResult::InvalidPattern { .. } | CompileResult::UnsupportedSyntax { .. } => {
443 self.empty_grep_result()
444 }
445 }
446 }
447
448 pub fn search_grep(
449 &self,
450 pattern: &CompiledPattern,
451 include: &[String],
452 exclude: &[String],
453 search_root: &Path,
454 max_results: usize,
455 ) -> GrepResult {
456 let matcher = match pattern {
457 CompiledPattern::Literal(literal) => SearchMatcher::Literal(literal.clone()),
458 CompiledPattern::Regex { compiled, .. } => SearchMatcher::Regex(compiled.clone()),
459 };
460
461 let filters = match build_path_filters(include, exclude) {
462 Ok(filters) => filters,
463 Err(_) => PathFilters::default(),
464 };
465 let search_root = canonicalize_or_normalize(search_root);
466
467 let raw_pattern = pattern.raw_pattern_for_trigrams();
468 let query = if pattern.case_insensitive() && !raw_pattern.is_ascii() {
469 RegexQuery::default()
470 } else {
471 decompose_regex(&raw_pattern)
472 };
473 let fully_degraded = query.and_trigrams.is_empty() && query.or_groups.is_empty();
474 let candidate_ids = self.candidates(&query);
475
476 let candidate_files: Vec<&FileEntry> = candidate_ids
477 .into_iter()
478 .filter_map(|file_id| self.files.get(file_id as usize))
479 .filter(|file| !file.path.as_os_str().is_empty())
480 .filter(|file| is_within_search_root(&search_root, &file.path))
481 .filter(|file| filters.matches(&self.project_root, &file.path))
482 .collect();
483
484 let total_matches = AtomicUsize::new(0);
485 let files_searched = AtomicUsize::new(0);
486 let files_with_matches = AtomicUsize::new(0);
487 let truncated = AtomicBool::new(false);
488 let engine_capped = AtomicBool::new(false);
489 let stop_after = max_results.saturating_mul(2);
490
491 let mut matches = if candidate_files.len() > 10 {
492 candidate_files
493 .par_iter()
494 .map(|file| {
495 search_candidate_file(
496 file,
497 &matcher,
498 max_results,
499 stop_after,
500 &total_matches,
501 &files_searched,
502 &files_with_matches,
503 &truncated,
504 &engine_capped,
505 )
506 })
507 .reduce(Vec::new, |mut left, mut right| {
508 left.append(&mut right);
509 left
510 })
511 } else {
512 let mut matches = Vec::new();
513 for file in candidate_files {
514 matches.extend(search_candidate_file(
515 file,
516 &matcher,
517 max_results,
518 stop_after,
519 &total_matches,
520 &files_searched,
521 &files_with_matches,
522 &truncated,
523 &engine_capped,
524 ));
525
526 if should_stop_search(&truncated, &total_matches, stop_after) {
527 engine_capped.store(true, Ordering::Relaxed);
528 break;
529 }
530 }
531 matches
532 };
533
534 sort_shared_grep_matches_by_cached_mtime_desc(&mut matches, |path| {
535 self.path_to_id
536 .get(path)
537 .and_then(|file_id| self.files.get(*file_id as usize))
538 .map(|file| file.modified)
539 });
540
541 let matches = matches
542 .into_iter()
543 .map(|matched| GrepMatch {
544 file: matched.file.as_ref().clone(),
545 line: matched.line,
546 column: matched.column,
547 line_text: matched.line_text,
548 match_text: matched.match_text,
549 })
550 .collect();
551
552 GrepResult {
553 total_matches: total_matches.load(Ordering::Relaxed),
554 matches,
555 files_searched: files_searched.load(Ordering::Relaxed),
556 files_with_matches: files_with_matches.load(Ordering::Relaxed),
557 index_status: if self.ready {
558 IndexStatus::Ready
559 } else {
560 IndexStatus::Building
561 },
562 truncated: truncated.load(Ordering::Relaxed),
563 fully_degraded,
564 engine_capped: engine_capped.load(Ordering::Relaxed),
565 }
566 }
567
568 fn empty_grep_result(&self) -> GrepResult {
569 GrepResult {
570 matches: Vec::new(),
571 total_matches: 0,
572 files_searched: 0,
573 files_with_matches: 0,
574 index_status: if self.ready {
575 IndexStatus::Ready
576 } else {
577 IndexStatus::Building
578 },
579 truncated: false,
580 fully_degraded: false,
581 engine_capped: false,
582 }
583 }
584
585 pub fn glob(&self, pattern: &str, search_root: &Path) -> Vec<PathBuf> {
586 let filters = match build_path_filters(&[pattern.to_string()], &[]) {
587 Ok(filters) => filters,
588 Err(_) => return Vec::new(),
589 };
590 let search_root = canonicalize_or_normalize(search_root);
591 let mut entries = self
592 .files
593 .iter()
594 .filter(|file| !file.path.as_os_str().is_empty())
595 .filter(|file| is_within_search_root(&search_root, &file.path))
596 .filter(|file| filters.matches(&self.project_root, &file.path))
597 .map(|file| (file.path.clone(), file.modified))
598 .collect::<Vec<_>>();
599
600 entries.sort_by(|(left_path, left_mtime), (right_path, right_mtime)| {
601 right_mtime
602 .cmp(left_mtime)
603 .then_with(|| left_path.cmp(right_path))
604 });
605
606 entries.into_iter().map(|(path, _)| path).collect()
607 }
608
609 pub fn candidates(&self, query: &RegexQuery) -> Vec<u32> {
610 if query.and_trigrams.is_empty() && query.or_groups.is_empty() {
611 return self.active_file_ids();
612 }
613
614 let mut and_trigrams = query.and_trigrams.clone();
615 and_trigrams.sort_unstable_by_key(|trigram| self.postings.get(trigram).map_or(0, Vec::len));
616
617 let mut current: Option<Vec<u32>> = None;
618
619 for trigram in and_trigrams {
620 let filter = query.and_filters.get(&trigram).copied();
621 let matches = self.postings_for_trigram(trigram, filter);
622 current = Some(match current.take() {
623 Some(existing) => intersect_sorted_ids(&existing, &matches),
624 None => matches,
625 });
626
627 if current.as_ref().is_some_and(|ids| ids.is_empty()) {
628 break;
629 }
630 }
631
632 let mut current = current.unwrap_or_else(|| self.active_file_ids());
633
634 for (index, group) in query.or_groups.iter().enumerate() {
635 let mut group_matches = Vec::new();
636 let filters = query.or_filters.get(index);
637
638 for trigram in group {
639 let filter = filters.and_then(|filters| filters.get(trigram).copied());
640 let matches = self.postings_for_trigram(*trigram, filter);
641 if group_matches.is_empty() {
642 group_matches = matches;
643 } else {
644 group_matches = union_sorted_ids(&group_matches, &matches);
645 }
646 }
647
648 current = intersect_sorted_ids(¤t, &group_matches);
649 if current.is_empty() {
650 break;
651 }
652 }
653
654 let mut unindexed = self
655 .unindexed_files
656 .iter()
657 .copied()
658 .filter(|file_id| self.is_active_file(*file_id))
659 .collect::<Vec<_>>();
660 if !unindexed.is_empty() {
661 unindexed.sort_unstable();
662 current = union_sorted_ids(¤t, &unindexed);
663 }
664
665 current
666 }
667
668 pub fn write_to_disk(&self, cache_dir: &Path, git_head: Option<&str>) {
669 if fs::create_dir_all(cache_dir).is_err() {
670 return;
671 }
672
673 let cache_path = cache_dir.join("cache.bin");
674 let tmp_cache = cache_dir.join(format!(
675 "cache.bin.tmp.{}.{}",
676 std::process::id(),
677 SystemTime::now()
678 .duration_since(UNIX_EPOCH)
679 .unwrap_or(Duration::ZERO)
680 .as_nanos()
681 ));
682
683 let active_ids = self.active_file_ids();
684 let mut id_map = HashMap::new();
685 for (new_id, old_id) in active_ids.iter().enumerate() {
686 let Ok(new_id_u32) = u32::try_from(new_id) else {
687 return;
688 };
689 id_map.insert(*old_id, new_id_u32);
690 }
691
692 let write_result = (|| -> std::io::Result<()> {
693 let mut postings_writer = BufWriter::new(Cursor::new(Vec::new()));
694
695 postings_writer.write_all(INDEX_MAGIC)?;
696 write_u32(&mut postings_writer, INDEX_VERSION)?;
697
698 let head = git_head.unwrap_or_default();
699 let root = self.project_root.to_string_lossy();
700 let head_len = u32::try_from(head.len())
701 .map_err(|_| std::io::Error::other("git head too large to cache"))?;
702 let root_len = u32::try_from(root.len())
703 .map_err(|_| std::io::Error::other("project root too large to cache"))?;
704 let file_count = u32::try_from(active_ids.len())
705 .map_err(|_| std::io::Error::other("too many files to cache"))?;
706
707 write_u32(&mut postings_writer, head_len)?;
708 write_u32(&mut postings_writer, root_len)?;
709 write_u64(&mut postings_writer, self.max_file_size)?;
710 write_u32(&mut postings_writer, file_count)?;
711 postings_writer.write_all(head.as_bytes())?;
712 postings_writer.write_all(root.as_bytes())?;
713
714 for old_id in &active_ids {
715 let Some(file) = self.files.get(*old_id as usize) else {
716 return Err(std::io::Error::other("missing file entry for cache write"));
717 };
718 let path =
719 cache_relative_path(&self.project_root, &file.path).ok_or_else(|| {
720 std::io::Error::other(format!(
721 "refusing to cache path outside project root: {}",
722 file.path.display()
723 ))
724 })?;
725 let path = path.to_string_lossy();
726 let path_len = u32::try_from(path.len())
727 .map_err(|_| std::io::Error::other("cached path too large"))?;
728 let modified = file
729 .modified
730 .duration_since(UNIX_EPOCH)
731 .unwrap_or(Duration::ZERO);
732 let unindexed = if self.unindexed_files.contains(old_id) {
733 1u8
734 } else {
735 0u8
736 };
737
738 postings_writer.write_all(&[unindexed])?;
739 write_u32(&mut postings_writer, path_len)?;
740 write_u64(&mut postings_writer, file.size)?;
741 write_u64(&mut postings_writer, modified.as_secs())?;
742 write_u32(&mut postings_writer, modified.subsec_nanos())?;
743 postings_writer.write_all(file.content_hash.as_bytes())?;
744 postings_writer.write_all(path.as_bytes())?;
745 }
746
747 let mut lookup_entries = Vec::new();
748 let mut postings_blob = Vec::new();
749 let mut sorted_postings: Vec<_> = self.postings.iter().collect();
750 sorted_postings.sort_by_key(|(trigram, _)| **trigram);
751
752 for (trigram, postings) in sorted_postings {
753 let offset = u64::try_from(postings_blob.len())
754 .map_err(|_| std::io::Error::other("postings blob too large"))?;
755 let mut count = 0u32;
756
757 for posting in postings {
758 let Some(mapped_file_id) = id_map.get(&posting.file_id).copied() else {
759 continue;
760 };
761
762 postings_blob.extend_from_slice(&mapped_file_id.to_le_bytes());
763 postings_blob.push(posting.next_mask);
764 postings_blob.push(posting.loc_mask);
765 count = count.saturating_add(1);
766 }
767
768 if count > 0 {
769 lookup_entries.push((*trigram, offset, count));
770 }
771 }
772
773 write_u64(
774 &mut postings_writer,
775 u64::try_from(postings_blob.len())
776 .map_err(|_| std::io::Error::other("postings blob too large"))?,
777 )?;
778 postings_writer.write_all(&postings_blob)?;
779 postings_writer.flush()?;
780 let mut postings_blob_file = postings_writer
781 .into_inner()
782 .map_err(|error| std::io::Error::other(error.to_string()))?
783 .into_inner();
784 let checksum = crc32fast::hash(&postings_blob_file);
785 postings_blob_file.extend_from_slice(&checksum.to_le_bytes());
786
787 let mut lookup_writer = BufWriter::new(Cursor::new(Vec::new()));
788 let entry_count = u32::try_from(lookup_entries.len())
789 .map_err(|_| std::io::Error::other("too many lookup entries to cache"))?;
790
791 lookup_writer.write_all(LOOKUP_MAGIC)?;
792 write_u32(&mut lookup_writer, INDEX_VERSION)?;
793 write_u32(&mut lookup_writer, entry_count)?;
794
795 for (trigram, offset, count) in lookup_entries {
796 write_u32(&mut lookup_writer, trigram)?;
797 write_u64(&mut lookup_writer, offset)?;
798 write_u32(&mut lookup_writer, count)?;
799 }
800
801 lookup_writer.flush()?;
802 let mut lookup_blob_file = lookup_writer
803 .into_inner()
804 .map_err(|error| std::io::Error::other(error.to_string()))?
805 .into_inner();
806 let checksum = crc32fast::hash(&lookup_blob_file);
807 lookup_blob_file.extend_from_slice(&checksum.to_le_bytes());
808
809 let mut cache_writer = BufWriter::new(File::create(&tmp_cache)?);
810 write_u32(&mut cache_writer, CACHE_MAGIC)?;
811 write_u32(&mut cache_writer, INDEX_VERSION)?;
812 write_u64(
813 &mut cache_writer,
814 u64::try_from(postings_blob_file.len())
815 .map_err(|_| std::io::Error::other("postings section too large"))?,
816 )?;
817 cache_writer.write_all(&postings_blob_file)?;
818 cache_writer.write_all(&lookup_blob_file)?;
819 cache_writer.flush()?;
820 cache_writer.get_ref().sync_all()?;
821 drop(cache_writer);
822 fs::rename(&tmp_cache, &cache_path)?;
823
824 Ok(())
825 })();
826
827 if write_result.is_err() {
828 let _ = fs::remove_file(&tmp_cache);
829 }
830 }
831
832 pub fn read_from_disk(cache_dir: &Path, current_canonical_root: &Path) -> Option<Self> {
833 debug_assert!(current_canonical_root.is_absolute());
834 let cache_path = cache_dir.join("cache.bin");
835 let cache_bytes = fs::read(&cache_path).ok()?;
836 if cache_bytes.len() < 16 {
837 return None;
838 }
839 let mut header = Cursor::new(&cache_bytes);
840 if read_u32(&mut header).ok()? != CACHE_MAGIC {
841 return None;
842 }
843 if read_u32(&mut header).ok()? != INDEX_VERSION {
844 return None;
845 }
846 let postings_len_total = usize::try_from(read_u64(&mut header).ok()?).ok()?;
847 let start = usize::try_from(header.position()).ok()?;
848 let postings_end = start.checked_add(postings_len_total)?;
849 if postings_end > cache_bytes.len() {
850 return None;
851 }
852 let postings_bytes = &cache_bytes[start..postings_end];
853 let lookup_bytes = &cache_bytes[postings_end..];
854 let lookup_len_total = lookup_bytes.len();
855 let mut postings_reader = BufReader::new(Cursor::new(postings_bytes));
856 let mut lookup_reader = BufReader::new(Cursor::new(lookup_bytes));
857 if postings_len_total < 4 || lookup_len_total < 4 {
858 return None;
859 }
860 verify_crc32_bytes_slice(postings_bytes).ok()?;
861 verify_crc32_bytes_slice(lookup_bytes).ok()?;
862
863 let mut magic = [0u8; 8];
864 postings_reader.read_exact(&mut magic).ok()?;
865 if &magic != INDEX_MAGIC {
866 return None;
867 }
868 if read_u32(&mut postings_reader).ok()? != INDEX_VERSION {
869 return None;
870 }
871
872 let head_len = read_u32(&mut postings_reader).ok()? as usize;
873 let root_len = read_u32(&mut postings_reader).ok()? as usize;
874 let max_file_size = read_u64(&mut postings_reader).ok()?;
875 let file_count = read_u32(&mut postings_reader).ok()? as usize;
876 if file_count > MAX_ENTRIES {
877 return None;
878 }
879 let postings_body_len = postings_len_total.checked_sub(4)?;
880 let lookup_body_len = lookup_len_total.checked_sub(4)?;
881
882 let remaining_postings = remaining_bytes(&mut postings_reader, postings_body_len)?;
883 let minimum_file_bytes = file_count.checked_mul(MIN_FILE_ENTRY_BYTES)?;
884 if minimum_file_bytes > remaining_postings {
885 return None;
886 }
887
888 if head_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
889 return None;
890 }
891 let mut head_bytes = vec![0u8; head_len];
892 postings_reader.read_exact(&mut head_bytes).ok()?;
893 let git_head = String::from_utf8(head_bytes)
894 .ok()
895 .filter(|head| !head.is_empty());
896
897 if root_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
898 return None;
899 }
900 let mut root_bytes = vec![0u8; root_len];
901 postings_reader.read_exact(&mut root_bytes).ok()?;
902 let _stored_project_root = PathBuf::from(String::from_utf8(root_bytes).ok()?);
903 let project_root = current_canonical_root.to_path_buf();
904
905 let mut files = Vec::with_capacity(file_count);
906 let mut path_to_id = HashMap::new();
907 let mut unindexed_files = HashSet::new();
908
909 for file_id in 0..file_count {
910 let mut unindexed = [0u8; 1];
911 postings_reader.read_exact(&mut unindexed).ok()?;
912 let path_len = read_u32(&mut postings_reader).ok()? as usize;
913 let size = read_u64(&mut postings_reader).ok()?;
914 let secs = read_u64(&mut postings_reader).ok()?;
915 let nanos = read_u32(&mut postings_reader).ok()?;
916 let mut hash_bytes = [0u8; 32];
917 postings_reader.read_exact(&mut hash_bytes).ok()?;
918 let content_hash = blake3::Hash::from_bytes(hash_bytes);
919 if nanos >= 1_000_000_000 {
920 return None;
921 }
922 if path_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
923 return None;
924 }
925 let mut path_bytes = vec![0u8; path_len];
926 postings_reader.read_exact(&mut path_bytes).ok()?;
927 let relative_path = PathBuf::from(String::from_utf8(path_bytes).ok()?);
928 let full_path = cached_path_under_root(&project_root, &relative_path)?;
929 let file_id_u32 = u32::try_from(file_id).ok()?;
930
931 files.push(FileEntry {
932 path: full_path.clone(),
933 size,
934 modified: UNIX_EPOCH + Duration::new(secs, nanos),
935 content_hash,
936 });
937 path_to_id.insert(full_path, file_id_u32);
938 if unindexed[0] == 1 {
939 unindexed_files.insert(file_id_u32);
940 }
941 }
942
943 let postings_len = read_u64(&mut postings_reader).ok()? as usize;
944 let max_postings_bytes = MAX_ENTRIES.checked_mul(POSTING_BYTES)?;
945 if postings_len > max_postings_bytes {
946 return None;
947 }
948 if postings_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
949 return None;
950 }
951 let mut postings_blob = vec![0u8; postings_len];
952 postings_reader.read_exact(&mut postings_blob).ok()?;
953
954 let mut lookup_magic = [0u8; 8];
955 lookup_reader.read_exact(&mut lookup_magic).ok()?;
956 if &lookup_magic != LOOKUP_MAGIC {
957 return None;
958 }
959 if read_u32(&mut lookup_reader).ok()? != INDEX_VERSION {
960 return None;
961 }
962 let entry_count = read_u32(&mut lookup_reader).ok()? as usize;
963 if entry_count > MAX_ENTRIES {
964 return None;
965 }
966 let remaining_lookup = remaining_bytes(&mut lookup_reader, lookup_body_len)?;
967 let minimum_lookup_bytes = entry_count.checked_mul(LOOKUP_ENTRY_BYTES)?;
968 if minimum_lookup_bytes > remaining_lookup {
969 return None;
970 }
971
972 let mut postings = HashMap::new();
973 let mut file_trigrams: HashMap<u32, Vec<u32>> = HashMap::new();
974
975 for _ in 0..entry_count {
976 let trigram = read_u32(&mut lookup_reader).ok()?;
977 let offset = read_u64(&mut lookup_reader).ok()? as usize;
978 let count = read_u32(&mut lookup_reader).ok()? as usize;
979 if count > MAX_ENTRIES {
980 return None;
981 }
982 let bytes_len = count.checked_mul(POSTING_BYTES)?;
983 let end = offset.checked_add(bytes_len)?;
984 if end > postings_blob.len() {
985 return None;
986 }
987
988 let mut trigram_postings = Vec::with_capacity(count);
989 for chunk in postings_blob[offset..end].chunks_exact(6) {
990 let file_id = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
991 let posting = Posting {
992 file_id,
993 next_mask: chunk[4],
994 loc_mask: chunk[5],
995 };
996 trigram_postings.push(posting.clone());
997 file_trigrams.entry(file_id).or_default().push(trigram);
998 }
999 postings.insert(trigram, trigram_postings);
1000 }
1001
1002 Some(SearchIndex {
1003 postings,
1004 files,
1005 path_to_id,
1006 ready: false,
1007 project_root,
1008 git_head,
1009 max_file_size,
1010 file_trigrams,
1011 unindexed_files,
1012 })
1013 }
1014
1015 pub(crate) fn stored_git_head(&self) -> Option<&str> {
1016 self.git_head.as_deref()
1017 }
1018
1019 pub(crate) fn set_ready(&mut self, ready: bool) {
1020 self.ready = ready;
1021 }
1022
1023 pub(crate) fn verify_against_disk(&mut self, current_head: Option<String>) {
1024 self.git_head = current_head;
1025 verify_file_mtimes(self);
1026 self.ready = true;
1027 }
1028
1029 #[cfg(debug_assertions)]
1030 #[doc(hidden)]
1031 pub fn verify_against_disk_for_debug(&mut self, current_head: Option<String>) {
1032 self.verify_against_disk(current_head);
1033 }
1034
1035 pub(crate) fn rebuild_or_refresh(
1036 root: &Path,
1037 max_file_size: u64,
1038 current_head: Option<String>,
1039 baseline: Option<SearchIndex>,
1040 ) -> Self {
1041 if let Some(mut baseline) = baseline {
1042 baseline.project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
1043 baseline.max_file_size = max_file_size;
1044
1045 if baseline.git_head == current_head || current_head.is_none() {
1046 baseline.git_head = current_head;
1053 verify_file_mtimes(&mut baseline);
1054 baseline.ready = true;
1055 return baseline;
1056 }
1057
1058 if let (Some(previous), Some(current)) =
1059 (baseline.git_head.clone(), current_head.clone())
1060 {
1061 let project_root = baseline.project_root.clone();
1062 if apply_git_diff_updates(&mut baseline, &project_root, &previous, ¤t) {
1063 baseline.git_head = Some(current);
1064 verify_file_mtimes(&mut baseline);
1065 baseline.ready = true;
1066 return baseline;
1067 }
1068 }
1069 }
1070
1071 SearchIndex::build_with_limit(root, max_file_size)
1072 }
1073
1074 fn allocate_file_id(&mut self, path: &Path, size_hint: u64) -> Option<u32> {
1075 let file_id = u32::try_from(self.files.len()).ok()?;
1076 let metadata = fs::metadata(path).ok();
1077 let size = metadata
1078 .as_ref()
1079 .map_or(size_hint, |metadata| metadata.len());
1080 let modified = metadata
1081 .and_then(|metadata| metadata.modified().ok())
1082 .unwrap_or(UNIX_EPOCH);
1083
1084 self.files.push(FileEntry {
1085 path: path.to_path_buf(),
1086 size,
1087 modified,
1088 content_hash: cache_freshness::zero_hash(),
1089 });
1090 self.path_to_id.insert(path.to_path_buf(), file_id);
1091 Some(file_id)
1092 }
1093
1094 fn track_unindexed_file(&mut self, path: &Path, metadata: &fs::Metadata) {
1095 let Some(file_id) = self.allocate_file_id(path, metadata.len()) else {
1096 return;
1097 };
1098 self.unindexed_files.insert(file_id);
1099 self.file_trigrams.insert(file_id, Vec::new());
1100 }
1101
1102 fn active_file_ids(&self) -> Vec<u32> {
1103 let mut ids: Vec<u32> = self.path_to_id.values().copied().collect();
1104 ids.sort_unstable();
1105 ids
1106 }
1107
1108 fn is_active_file(&self, file_id: u32) -> bool {
1109 self.files
1110 .get(file_id as usize)
1111 .map(|file| !file.path.as_os_str().is_empty())
1112 .unwrap_or(false)
1113 }
1114
1115 fn postings_for_trigram(&self, trigram: u32, filter: Option<PostingFilter>) -> Vec<u32> {
1116 let Some(postings) = self.postings.get(&trigram) else {
1117 return Vec::new();
1118 };
1119
1120 let mut matches = Vec::with_capacity(postings.len());
1121
1122 for posting in postings {
1123 if let Some(filter) = filter {
1124 if filter.next_mask != 0 && posting.next_mask & filter.next_mask == 0 {
1127 continue;
1128 }
1129 }
1134 if self.is_active_file(posting.file_id) {
1135 matches.push(posting.file_id);
1136 }
1137 }
1138
1139 matches
1140 }
1141}
1142
1143fn search_candidate_file(
1144 file: &FileEntry,
1145 matcher: &SearchMatcher,
1146 max_results: usize,
1147 stop_after: usize,
1148 total_matches: &AtomicUsize,
1149 files_searched: &AtomicUsize,
1150 files_with_matches: &AtomicUsize,
1151 truncated: &AtomicBool,
1152 engine_capped: &AtomicBool,
1153) -> Vec<SharedGrepMatch> {
1154 if should_stop_search(truncated, total_matches, stop_after) {
1155 engine_capped.store(true, Ordering::Relaxed);
1156 return Vec::new();
1157 }
1158
1159 let content = match read_indexed_file_bytes(&file.path) {
1160 Some(content) => content,
1161 None => return Vec::new(),
1162 };
1163 if is_binary_bytes(&content) {
1170 return Vec::new();
1171 }
1172 files_searched.fetch_add(1, Ordering::Relaxed);
1173
1174 let shared_path = Arc::new(file.path.clone());
1175 let mut matches = Vec::new();
1176 let mut line_starts = None;
1177 let mut seen_lines = HashSet::new();
1178 let mut matched_this_file = false;
1179
1180 match matcher {
1181 SearchMatcher::Literal(literal) if !literal.case_insensitive_ascii => {
1182 let needle = &literal.needle;
1183 let finder = memchr::memmem::Finder::new(needle);
1184 let mut start = 0;
1185
1186 while let Some(position) = finder.find(&content[start..]) {
1187 if should_stop_search(truncated, total_matches, stop_after) {
1188 engine_capped.store(true, Ordering::Relaxed);
1189 break;
1190 }
1191
1192 let offset = start + position;
1193 start = offset + 1;
1194
1195 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1196 let (line, column, line_text) = line_details_bytes(&content, line_starts, offset);
1197 if !seen_lines.insert(line) {
1198 continue;
1199 }
1200
1201 matched_this_file = true;
1202 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1203 if match_number > max_results {
1204 truncated.store(true, Ordering::Relaxed);
1205 break;
1206 }
1207
1208 let end = offset + needle.len();
1209 matches.push(SharedGrepMatch {
1210 file: shared_path.clone(),
1211 line,
1212 column,
1213 line_text,
1214 match_text: String::from_utf8_lossy(&content[offset..end]).into_owned(),
1215 });
1216 }
1217 }
1218 SearchMatcher::Literal(literal) => {
1219 let needle = &literal.needle;
1220 let search_content = content.to_ascii_lowercase();
1221 let finder = memchr::memmem::Finder::new(needle);
1222 let mut start = 0;
1223
1224 while let Some(position) = finder.find(&search_content[start..]) {
1225 if should_stop_search(truncated, total_matches, stop_after) {
1226 engine_capped.store(true, Ordering::Relaxed);
1227 break;
1228 }
1229
1230 let offset = start + position;
1231 start = offset + 1;
1232
1233 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1234 let (line, column, line_text) = line_details_bytes(&content, line_starts, offset);
1235 if !seen_lines.insert(line) {
1236 continue;
1237 }
1238
1239 matched_this_file = true;
1240 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1241 if match_number > max_results {
1242 truncated.store(true, Ordering::Relaxed);
1243 break;
1244 }
1245
1246 let end = offset + needle.len();
1247 matches.push(SharedGrepMatch {
1248 file: shared_path.clone(),
1249 line,
1250 column,
1251 line_text,
1252 match_text: String::from_utf8_lossy(&content[offset..end]).into_owned(),
1253 });
1254 }
1255 }
1256 SearchMatcher::Regex(regex) => {
1257 for matched in regex.find_iter(&content) {
1258 if should_stop_search(truncated, total_matches, stop_after) {
1259 engine_capped.store(true, Ordering::Relaxed);
1260 break;
1261 }
1262
1263 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1264 let (line, column, line_text) =
1265 line_details_bytes(&content, line_starts, matched.start());
1266 if !seen_lines.insert(line) {
1267 continue;
1268 }
1269
1270 matched_this_file = true;
1271 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1272 if match_number > max_results {
1273 truncated.store(true, Ordering::Relaxed);
1274 break;
1275 }
1276
1277 matches.push(SharedGrepMatch {
1278 file: shared_path.clone(),
1279 line,
1280 column,
1281 line_text,
1282 match_text: String::from_utf8_lossy(matched.as_bytes()).into_owned(),
1283 });
1284 }
1285 }
1286 }
1287
1288 if matched_this_file {
1289 files_with_matches.fetch_add(1, Ordering::Relaxed);
1290 }
1291
1292 matches
1293}
1294
1295fn should_stop_search(
1296 truncated: &AtomicBool,
1297 total_matches: &AtomicUsize,
1298 stop_after: usize,
1299) -> bool {
1300 truncated.load(Ordering::Relaxed) && total_matches.load(Ordering::Relaxed) >= stop_after
1301}
1302
1303fn intersect_sorted_ids(left: &[u32], right: &[u32]) -> Vec<u32> {
1304 let mut merged = Vec::with_capacity(left.len().min(right.len()));
1305 let mut left_index = 0;
1306 let mut right_index = 0;
1307
1308 while left_index < left.len() && right_index < right.len() {
1309 match left[left_index].cmp(&right[right_index]) {
1310 std::cmp::Ordering::Less => left_index += 1,
1311 std::cmp::Ordering::Greater => right_index += 1,
1312 std::cmp::Ordering::Equal => {
1313 merged.push(left[left_index]);
1314 left_index += 1;
1315 right_index += 1;
1316 }
1317 }
1318 }
1319
1320 merged
1321}
1322
1323fn union_sorted_ids(left: &[u32], right: &[u32]) -> Vec<u32> {
1324 let mut merged = Vec::with_capacity(left.len() + right.len());
1325 let mut left_index = 0;
1326 let mut right_index = 0;
1327
1328 while left_index < left.len() && right_index < right.len() {
1329 match left[left_index].cmp(&right[right_index]) {
1330 std::cmp::Ordering::Less => {
1331 merged.push(left[left_index]);
1332 left_index += 1;
1333 }
1334 std::cmp::Ordering::Greater => {
1335 merged.push(right[right_index]);
1336 right_index += 1;
1337 }
1338 std::cmp::Ordering::Equal => {
1339 merged.push(left[left_index]);
1340 left_index += 1;
1341 right_index += 1;
1342 }
1343 }
1344 }
1345
1346 merged.extend_from_slice(&left[left_index..]);
1347 merged.extend_from_slice(&right[right_index..]);
1348 merged
1349}
1350
1351pub fn decompose_regex(pattern: &str) -> RegexQuery {
1352 let hir = match regex_syntax::parse(pattern) {
1353 Ok(hir) => hir,
1354 Err(_) => return RegexQuery::default(),
1355 };
1356
1357 let build = build_query(&hir);
1358 build.into_query()
1359}
1360
1361pub fn pack_trigram(a: u8, b: u8, c: u8) -> u32 {
1362 ((a as u32) << 16) | ((b as u32) << 8) | c as u32
1363}
1364
1365pub fn normalize_char(c: u8) -> u8 {
1366 c.to_ascii_lowercase()
1367}
1368
1369pub fn extract_trigrams(content: &[u8]) -> Vec<(u32, u8, usize)> {
1370 if content.len() < 3 {
1371 return Vec::new();
1372 }
1373
1374 let mut trigrams = Vec::with_capacity(content.len().saturating_sub(2));
1375 for start in 0..=content.len() - 3 {
1376 let trigram = pack_trigram(
1377 normalize_char(content[start]),
1378 normalize_char(content[start + 1]),
1379 normalize_char(content[start + 2]),
1380 );
1381 let next_char = content.get(start + 3).copied().unwrap_or(EOF_SENTINEL);
1382 trigrams.push((trigram, next_char, start));
1383 }
1384 trigrams
1385}
1386
1387pub fn query_trigrams_from_tokens(tokens: &[&str]) -> Vec<u32> {
1388 let mut seen = HashSet::new();
1389 let mut out = Vec::new();
1390 for token in tokens {
1391 for (trigram, _, _) in extract_trigrams(token.as_bytes()) {
1392 if seen.insert(trigram) {
1393 out.push(trigram);
1394 }
1395 }
1396 }
1397 out
1398}
1399
1400pub fn lexical_score(index: &SearchIndex, query_trigrams: &[u32], file_id: u32) -> f32 {
1401 if query_trigrams.is_empty() {
1402 return 0.0;
1403 }
1404
1405 let mut hits = 0u32;
1406 for &trigram in query_trigrams {
1407 if let Some(postings) = index.postings.get(&trigram) {
1408 if postings
1409 .binary_search_by(|posting| posting.file_id.cmp(&file_id))
1410 .is_ok()
1411 {
1412 hits += 1;
1413 }
1414 }
1415 }
1416
1417 if hits == 0 {
1418 return 0.0;
1419 }
1420
1421 let file_trigram_count = index
1422 .file_trigrams
1423 .get(&file_id)
1424 .map_or(1, |trigrams| trigrams.len().max(1)) as f32;
1425 (hits as f32) / (1.0 + file_trigram_count.ln())
1426}
1427
1428pub fn resolve_cache_dir(project_root: &Path, storage_dir: Option<&Path>) -> PathBuf {
1429 if let Some(override_dir) = std::env::var_os("AFT_CACHE_DIR") {
1431 return PathBuf::from(override_dir)
1432 .join("index")
1433 .join(project_cache_key(project_root));
1434 }
1435 if let Some(dir) = storage_dir {
1437 return dir.join("index").join(project_cache_key(project_root));
1438 }
1439 let home = std::env::var_os("HOME")
1444 .or_else(|| std::env::var_os("USERPROFILE"))
1445 .map(PathBuf::from)
1446 .unwrap_or_else(std::env::temp_dir);
1447 home.join(".cache")
1448 .join("aft")
1449 .join("index")
1450 .join(project_cache_key(project_root))
1451}
1452
1453pub(crate) fn build_path_filters(
1454 include: &[String],
1455 exclude: &[String],
1456) -> Result<PathFilters, String> {
1457 Ok(PathFilters {
1458 includes: build_globset(include)?,
1459 excludes: build_globset(exclude)?,
1460 })
1461}
1462
1463pub(crate) fn walk_project_files(root: &Path, filters: &PathFilters) -> Vec<PathBuf> {
1464 walk_project_files_from(root, root, filters)
1465}
1466
1467pub(crate) fn walk_project_files_from(
1468 filter_root: &Path,
1469 search_root: &Path,
1470 filters: &PathFilters,
1471) -> Vec<PathBuf> {
1472 let mut builder = WalkBuilder::new(search_root);
1473 builder
1474 .hidden(false)
1475 .git_ignore(true)
1476 .git_global(true)
1477 .git_exclude(true)
1478 .filter_entry(|entry| {
1479 let name = entry.file_name().to_string_lossy();
1480 if entry.file_type().map_or(false, |ft| ft.is_dir()) {
1481 return !matches!(
1482 name.as_ref(),
1483 "node_modules"
1484 | "target"
1485 | "venv"
1486 | ".venv"
1487 | ".git"
1488 | "__pycache__"
1489 | ".tox"
1490 | "dist"
1491 | "build"
1492 );
1493 }
1494 true
1495 });
1496
1497 let mut files = Vec::new();
1498 for entry in builder.build().filter_map(|entry| entry.ok()) {
1499 if !entry
1500 .file_type()
1501 .map_or(false, |file_type| file_type.is_file())
1502 {
1503 continue;
1504 }
1505 let path = entry.into_path();
1506 if filters.matches(filter_root, &path) {
1507 files.push(path);
1508 }
1509 }
1510
1511 sort_paths_by_mtime_desc(&mut files);
1512 files
1513}
1514
1515pub(crate) fn read_searchable_text(path: &Path) -> Option<String> {
1516 let bytes = fs::read(path).ok()?;
1517 if is_binary_bytes(&bytes) {
1518 return None;
1519 }
1520 String::from_utf8(bytes).ok()
1521}
1522
1523fn read_indexed_file_bytes(path: &Path) -> Option<Vec<u8>> {
1524 fs::read(path).ok()
1525}
1526
1527pub(crate) fn relative_to_root(root: &Path, path: &Path) -> PathBuf {
1528 path.strip_prefix(root)
1529 .map(PathBuf::from)
1530 .unwrap_or_else(|_| path.to_path_buf())
1531}
1532
1533pub(crate) fn cache_relative_path(root: &Path, path: &Path) -> Option<PathBuf> {
1534 let normalized_root = normalize_path(root);
1535 let normalized_path = normalize_path(path);
1536 let relative = normalized_path.strip_prefix(&normalized_root).ok()?;
1537 validate_cached_relative_path(relative)
1538}
1539
1540pub(crate) fn cached_path_under_root(root: &Path, relative_path: &Path) -> Option<PathBuf> {
1541 let relative = validate_cached_relative_path(relative_path)?;
1542 let normalized_root = normalize_path(root);
1543 let full_path = normalize_path(&normalized_root.join(relative));
1544
1545 match fs::canonicalize(&full_path) {
1546 Ok(canonical_path) => {
1547 if canonical_path.starts_with(&normalized_root) {
1548 return Some(full_path);
1549 }
1550
1551 let canonical_root = fs::canonicalize(&normalized_root).ok()?;
1552 canonical_path
1553 .starts_with(&canonical_root)
1554 .then_some(full_path)
1555 }
1556 Err(_) => full_path.starts_with(&normalized_root).then_some(full_path),
1557 }
1558}
1559
1560pub(crate) fn validate_cached_relative_path(path: &Path) -> Option<PathBuf> {
1561 if path.is_absolute() {
1562 return None;
1563 }
1564
1565 let mut normalized = PathBuf::new();
1566 for component in path.components() {
1567 match component {
1568 Component::Normal(part) => normalized.push(part),
1569 Component::CurDir => {}
1570 Component::ParentDir | Component::RootDir | Component::Prefix(_) => return None,
1571 }
1572 }
1573 (!normalized.as_os_str().is_empty()).then_some(normalized)
1574}
1575
1576pub(crate) fn sort_paths_by_mtime_desc(paths: &mut [PathBuf]) {
1589 use std::collections::HashMap;
1590 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::with_capacity(paths.len());
1591 for path in paths.iter() {
1592 mtimes
1593 .entry(path.clone())
1594 .or_insert_with(|| path_modified_time(path));
1595 }
1596 paths.sort_by(|left, right| {
1597 let left_mtime = mtimes.get(left).and_then(|v| *v);
1598 let right_mtime = mtimes.get(right).and_then(|v| *v);
1599 right_mtime.cmp(&left_mtime).then_with(|| left.cmp(right))
1600 });
1601}
1602
1603pub(crate) fn sort_grep_matches_by_mtime_desc(matches: &mut [GrepMatch], project_root: &Path) {
1606 use std::collections::HashMap;
1607 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::new();
1608 for m in matches.iter() {
1609 mtimes.entry(m.file.clone()).or_insert_with(|| {
1610 let resolved = resolve_match_path(project_root, &m.file);
1611 path_modified_time(&resolved)
1612 });
1613 }
1614 matches.sort_by(|left, right| {
1615 let left_mtime = mtimes.get(&left.file).and_then(|v| *v);
1616 let right_mtime = mtimes.get(&right.file).and_then(|v| *v);
1617 right_mtime
1618 .cmp(&left_mtime)
1619 .then_with(|| left.file.cmp(&right.file))
1620 .then_with(|| left.line.cmp(&right.line))
1621 .then_with(|| left.column.cmp(&right.column))
1622 });
1623}
1624
1625fn sort_shared_grep_matches_by_cached_mtime_desc<F>(
1630 matches: &mut [SharedGrepMatch],
1631 modified_for_path: F,
1632) where
1633 F: Fn(&Path) -> Option<SystemTime>,
1634{
1635 use std::collections::HashMap;
1636 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::with_capacity(matches.len());
1637 for m in matches.iter() {
1638 let path = m.file.as_path().to_path_buf();
1639 mtimes
1640 .entry(path.clone())
1641 .or_insert_with(|| modified_for_path(&path));
1642 }
1643 matches.sort_by(|left, right| {
1644 let left_mtime = mtimes.get(left.file.as_path()).and_then(|v| *v);
1645 let right_mtime = mtimes.get(right.file.as_path()).and_then(|v| *v);
1646 right_mtime
1647 .cmp(&left_mtime)
1648 .then_with(|| left.file.as_path().cmp(right.file.as_path()))
1649 .then_with(|| left.line.cmp(&right.line))
1650 .then_with(|| left.column.cmp(&right.column))
1651 });
1652}
1653
1654pub(crate) fn resolve_search_scope(project_root: &Path, path: Option<&str>) -> SearchScope {
1655 let resolved_project_root = canonicalize_or_normalize(project_root);
1656 let root = match path {
1657 Some(path) => {
1658 let path = PathBuf::from(path);
1659 if path.is_absolute() {
1660 canonicalize_or_normalize(&path)
1661 } else {
1662 normalize_path(&resolved_project_root.join(path))
1663 }
1664 }
1665 None => resolved_project_root.clone(),
1666 };
1667
1668 let use_index = is_within_search_root(&resolved_project_root, &root);
1669 SearchScope { root, use_index }
1670}
1671
1672pub(crate) fn is_binary_bytes(content: &[u8]) -> bool {
1673 content_inspector::inspect(content).is_binary()
1674}
1675
1676pub(crate) fn current_git_head(root: &Path) -> Option<String> {
1677 run_git(root, &["rev-parse", "HEAD"])
1678}
1679
1680pub fn project_cache_key(project_root: &Path) -> String {
1681 use sha2::{Digest, Sha256};
1682
1683 let mut hasher = Sha256::new();
1684
1685 if let Some(root_commit) = run_git(project_root, &["rev-list", "--max-parents=0", "HEAD"]) {
1686 hasher.update(root_commit.as_bytes());
1689 } else {
1690 let canonical_root = canonicalize_or_normalize(project_root);
1692 hasher.update(canonical_root.to_string_lossy().as_bytes());
1693 }
1694
1695 let digest = format!("{:x}", hasher.finalize());
1696 digest[..16].to_string()
1697}
1698
1699impl PathFilters {
1700 fn matches(&self, root: &Path, path: &Path) -> bool {
1701 let relative = to_glob_path(&relative_to_root(root, path));
1702 if self
1703 .includes
1704 .as_ref()
1705 .is_some_and(|includes| !includes.is_match(&relative))
1706 {
1707 return false;
1708 }
1709 if self
1710 .excludes
1711 .as_ref()
1712 .is_some_and(|excludes| excludes.is_match(&relative))
1713 {
1714 return false;
1715 }
1716 true
1717 }
1718}
1719
1720fn canonicalize_or_normalize(path: &Path) -> PathBuf {
1721 fs::canonicalize(path).unwrap_or_else(|_| normalize_path(path))
1722}
1723
1724fn resolve_match_path(project_root: &Path, path: &Path) -> PathBuf {
1725 if path.is_absolute() {
1726 path.to_path_buf()
1727 } else {
1728 project_root.join(path)
1729 }
1730}
1731
1732fn path_modified_time(path: &Path) -> Option<SystemTime> {
1733 fs::metadata(path)
1734 .and_then(|metadata| metadata.modified())
1735 .ok()
1736}
1737
1738fn normalize_path(path: &Path) -> PathBuf {
1739 let mut result = PathBuf::new();
1740 for component in path.components() {
1741 match component {
1742 Component::ParentDir => {
1743 if !result.pop() {
1744 result.push(component);
1745 }
1746 }
1747 Component::CurDir => {}
1748 _ => result.push(component),
1749 }
1750 }
1751 result
1752}
1753
1754fn canonicalize_existing_or_deleted_path(path: &Path) -> PathBuf {
1755 if let Ok(canonical) = fs::canonicalize(path) {
1756 return canonical;
1757 }
1758
1759 let Some(parent) = path.parent() else {
1760 return path.to_path_buf();
1761 };
1762 let Some(file_name) = path.file_name() else {
1763 return path.to_path_buf();
1764 };
1765
1766 fs::canonicalize(parent)
1767 .map(|canonical_parent| canonical_parent.join(file_name))
1768 .unwrap_or_else(|_| path.to_path_buf())
1769}
1770
1771fn verify_file_mtimes(index: &mut SearchIndex) {
1774 let mut stale_paths = Vec::new();
1775
1776 for entry in &mut index.files {
1777 if entry.path.as_os_str().is_empty() {
1778 continue; }
1780 let cached = FileFreshness {
1781 mtime: entry.modified,
1782 size: entry.size,
1783 content_hash: entry.content_hash,
1784 };
1785 match cache_freshness::verify_file_strict(&entry.path, &cached) {
1786 FreshnessVerdict::HotFresh => {}
1787 FreshnessVerdict::ContentFresh {
1788 new_mtime,
1789 new_size,
1790 } => {
1791 entry.modified = new_mtime;
1792 entry.size = new_size;
1793 }
1794 FreshnessVerdict::Stale | FreshnessVerdict::Deleted => {
1795 stale_paths.push(entry.path.clone())
1796 }
1797 }
1798 }
1799
1800 for path in &stale_paths {
1802 index.update_file(path);
1803 }
1804
1805 let filters = PathFilters::default();
1807 for path in walk_project_files(&index.project_root, &filters) {
1808 if !index.path_to_id.contains_key(&path) {
1809 index.update_file(&path);
1810 }
1811 }
1812
1813 if !stale_paths.is_empty() {
1814 crate::slog_info!(
1815 "search index: refreshed {} stale file(s) from disk cache",
1816 stale_paths.len()
1817 );
1818 }
1819}
1820
1821fn is_within_search_root(search_root: &Path, path: &Path) -> bool {
1822 normalize_path(path).starts_with(normalize_path(search_root))
1823}
1824
1825impl QueryBuild {
1826 fn into_query(self) -> RegexQuery {
1827 let mut query = RegexQuery::default();
1828
1829 for run in self.and_runs {
1830 add_run_to_and_query(&mut query, &run);
1831 }
1832
1833 for group in self.or_groups {
1834 let mut trigrams = BTreeSet::new();
1835 let mut filters = HashMap::new();
1836 for run in group {
1837 for (trigram, filter) in trigram_filters(&run) {
1838 trigrams.insert(trigram);
1839 merge_filter(filters.entry(trigram).or_default(), filter);
1840 }
1841 }
1842 if !trigrams.is_empty() {
1843 query.or_groups.push(trigrams.into_iter().collect());
1844 query.or_filters.push(filters);
1845 }
1846 }
1847
1848 query
1849 }
1850}
1851
1852fn build_query(hir: &Hir) -> QueryBuild {
1853 match hir.kind() {
1854 HirKind::Literal(literal) => {
1855 if literal.0.len() >= 3 {
1856 QueryBuild {
1857 and_runs: vec![literal.0.to_vec()],
1858 or_groups: Vec::new(),
1859 }
1860 } else {
1861 QueryBuild::default()
1862 }
1863 }
1864 HirKind::Capture(capture) => build_query(&capture.sub),
1865 HirKind::Concat(parts) => {
1866 let mut build = QueryBuild::default();
1867 for part in parts {
1868 let part_build = build_query(part);
1869 build.and_runs.extend(part_build.and_runs);
1870 build.or_groups.extend(part_build.or_groups);
1871 }
1872 build
1873 }
1874 HirKind::Alternation(parts) => {
1875 let mut group = Vec::new();
1876 for part in parts {
1877 let Some(mut choices) = guaranteed_run_choices(part) else {
1878 return QueryBuild::default();
1879 };
1880 group.append(&mut choices);
1881 }
1882 if group.is_empty() {
1883 QueryBuild::default()
1884 } else {
1885 QueryBuild {
1886 and_runs: Vec::new(),
1887 or_groups: vec![group],
1888 }
1889 }
1890 }
1891 HirKind::Repetition(repetition) => {
1892 if repetition.min == 0 {
1893 QueryBuild::default()
1894 } else {
1895 build_query(&repetition.sub)
1896 }
1897 }
1898 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => QueryBuild::default(),
1899 }
1900}
1901
1902fn guaranteed_run_choices(hir: &Hir) -> Option<Vec<Vec<u8>>> {
1903 match hir.kind() {
1904 HirKind::Literal(literal) => {
1905 if literal.0.len() >= 3 {
1906 Some(vec![literal.0.to_vec()])
1907 } else {
1908 None
1909 }
1910 }
1911 HirKind::Capture(capture) => guaranteed_run_choices(&capture.sub),
1912 HirKind::Concat(parts) => {
1913 let mut runs = Vec::new();
1914 for part in parts {
1915 if let Some(mut part_runs) = guaranteed_run_choices(part) {
1916 runs.append(&mut part_runs);
1917 }
1918 }
1919 if runs.is_empty() {
1920 None
1921 } else {
1922 Some(runs)
1923 }
1924 }
1925 HirKind::Alternation(parts) => {
1926 let mut runs = Vec::new();
1927 for part in parts {
1928 let Some(mut part_runs) = guaranteed_run_choices(part) else {
1929 return None;
1930 };
1931 runs.append(&mut part_runs);
1932 }
1933 if runs.is_empty() {
1934 None
1935 } else {
1936 Some(runs)
1937 }
1938 }
1939 HirKind::Repetition(repetition) => {
1940 if repetition.min == 0 {
1941 None
1942 } else {
1943 guaranteed_run_choices(&repetition.sub)
1944 }
1945 }
1946 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => None,
1947 }
1948}
1949
1950fn add_run_to_and_query(query: &mut RegexQuery, run: &[u8]) {
1951 for (trigram, filter) in trigram_filters(run) {
1952 if !query.and_trigrams.contains(&trigram) {
1953 query.and_trigrams.push(trigram);
1954 }
1955 merge_filter(query.and_filters.entry(trigram).or_default(), filter);
1956 }
1957}
1958
1959fn trigram_filters(run: &[u8]) -> Vec<(u32, PostingFilter)> {
1960 let mut filters: BTreeMap<u32, PostingFilter> = BTreeMap::new();
1961 for (trigram, next_char, position) in extract_trigrams(run) {
1962 let entry: &mut PostingFilter = filters.entry(trigram).or_default();
1963 if next_char != EOF_SENTINEL {
1964 entry.next_mask |= mask_for_next_char(next_char);
1965 }
1966 entry.loc_mask |= mask_for_position(position);
1967 }
1968 filters.into_iter().collect()
1969}
1970
1971fn merge_filter(target: &mut PostingFilter, filter: PostingFilter) {
1972 target.next_mask |= filter.next_mask;
1973 target.loc_mask |= filter.loc_mask;
1974}
1975
1976fn mask_for_next_char(next_char: u8) -> u8 {
1977 let bit = (normalize_char(next_char).wrapping_mul(31) & 7) as u32;
1978 1u8 << bit
1979}
1980
1981fn mask_for_position(position: usize) -> u8 {
1982 1u8 << (position % 8)
1983}
1984
1985fn build_globset(patterns: &[String]) -> Result<Option<GlobSet>, String> {
1986 if patterns.is_empty() {
1987 return Ok(None);
1988 }
1989
1990 let mut builder = GlobSetBuilder::new();
1991 for pattern in patterns {
1992 let glob = Glob::new(pattern).map_err(|error| error.to_string())?;
1993 builder.add(glob);
1994 }
1995 builder.build().map(Some).map_err(|error| error.to_string())
1996}
1997
1998fn read_u32<R: Read>(reader: &mut R) -> std::io::Result<u32> {
1999 let mut buffer = [0u8; 4];
2000 reader.read_exact(&mut buffer)?;
2001 Ok(u32::from_le_bytes(buffer))
2002}
2003
2004fn read_u64<R: Read>(reader: &mut R) -> std::io::Result<u64> {
2005 let mut buffer = [0u8; 8];
2006 reader.read_exact(&mut buffer)?;
2007 Ok(u64::from_le_bytes(buffer))
2008}
2009
2010fn write_u32<W: Write>(writer: &mut W, value: u32) -> std::io::Result<()> {
2011 writer.write_all(&value.to_le_bytes())
2012}
2013
2014fn write_u64<W: Write>(writer: &mut W, value: u64) -> std::io::Result<()> {
2015 writer.write_all(&value.to_le_bytes())
2016}
2017
2018fn verify_crc32_bytes_slice(bytes: &[u8]) -> std::io::Result<()> {
2019 let Some((body, stored)) = bytes.split_last_chunk::<4>() else {
2020 return Err(std::io::Error::other("search index checksum missing"));
2021 };
2022 let expected = u32::from_le_bytes(*stored);
2023 let actual = crc32fast::hash(body);
2024 if actual != expected {
2025 return Err(std::io::Error::other("search index checksum mismatch"));
2026 }
2027 Ok(())
2028}
2029
2030fn remaining_bytes<R: Seek>(reader: &mut R, total_len: usize) -> Option<usize> {
2031 let pos = usize::try_from(reader.stream_position().ok()?).ok()?;
2032 total_len.checked_sub(pos)
2033}
2034
2035fn run_git(root: &Path, args: &[&str]) -> Option<String> {
2036 let output = Command::new("git")
2037 .arg("-C")
2038 .arg(root)
2039 .args(args)
2040 .output()
2041 .ok()?;
2042 if !output.status.success() {
2043 return None;
2044 }
2045 let value = String::from_utf8(output.stdout).ok()?;
2046 let value = value.trim().to_string();
2047 if value.is_empty() {
2048 None
2049 } else {
2050 Some(value)
2051 }
2052}
2053
2054fn apply_git_diff_updates(index: &mut SearchIndex, root: &Path, from: &str, to: &str) -> bool {
2055 let diff_range = format!("{}..{}", from, to);
2056 let output = match Command::new("git")
2057 .arg("-C")
2058 .arg(root)
2059 .args(["diff", "--name-status", "-M", &diff_range])
2060 .output()
2061 {
2062 Ok(output) => output,
2063 Err(_) => return false,
2064 };
2065
2066 if !output.status.success() {
2067 return false;
2068 }
2069
2070 let Ok(diff) = String::from_utf8(output.stdout) else {
2071 return false;
2072 };
2073
2074 for line in diff.lines().map(str::trim).filter(|line| !line.is_empty()) {
2075 let mut fields = line.split('\t');
2076 let Some(status) = fields.next() else {
2077 continue;
2078 };
2079
2080 if status.starts_with('R') {
2081 let Some(old_path) = fields
2082 .next()
2083 .and_then(|path| cached_path_under_root(root, &PathBuf::from(path)))
2084 else {
2085 continue;
2086 };
2087 let Some(new_path) = fields
2088 .next()
2089 .and_then(|path| cached_path_under_root(root, &PathBuf::from(path)))
2090 else {
2091 continue;
2092 };
2093 index.remove_file(&old_path);
2094 index.update_file(&new_path);
2095 continue;
2096 }
2097
2098 let Some(path) = fields
2099 .next()
2100 .and_then(|path| cached_path_under_root(root, &PathBuf::from(path)))
2101 else {
2102 continue;
2103 };
2104 if status.starts_with('D') || !path.exists() {
2105 index.remove_file(&path);
2106 } else {
2107 index.update_file(&path);
2108 }
2109 }
2110
2111 true
2112}
2113
2114fn is_binary_path(path: &Path, size: u64) -> bool {
2115 if size == 0 {
2116 return false;
2117 }
2118
2119 let mut file = match File::open(path) {
2120 Ok(file) => file,
2121 Err(_) => return true,
2122 };
2123
2124 let mut preview = vec![0u8; PREVIEW_BYTES.min(size as usize)];
2125 match file.read(&mut preview) {
2126 Ok(read) => is_binary_bytes(&preview[..read]),
2127 Err(_) => true,
2128 }
2129}
2130
2131fn line_starts_bytes(content: &[u8]) -> Vec<usize> {
2132 let mut starts = vec![0usize];
2133 for (index, byte) in content.iter().copied().enumerate() {
2134 if byte == b'\n' {
2135 starts.push(index + 1);
2136 }
2137 }
2138 starts
2139}
2140
2141fn line_details_bytes(content: &[u8], line_starts: &[usize], offset: usize) -> (u32, u32, String) {
2142 let line_index = match line_starts.binary_search(&offset) {
2143 Ok(index) => index,
2144 Err(index) => index.saturating_sub(1),
2145 };
2146 let line_start = line_starts.get(line_index).copied().unwrap_or(0);
2147 let line_end = content[line_start..]
2148 .iter()
2149 .position(|byte| *byte == b'\n')
2150 .map(|length| line_start + length)
2151 .unwrap_or(content.len());
2152 let mut line_slice = &content[line_start..line_end];
2153 if line_slice.ends_with(b"\r") {
2154 line_slice = &line_slice[..line_slice.len() - 1];
2155 }
2156 let line_text = String::from_utf8_lossy(line_slice).into_owned();
2157 let column = String::from_utf8_lossy(&content[line_start..offset])
2158 .chars()
2159 .count() as u32
2160 + 1;
2161 (line_index as u32 + 1, column, line_text)
2162}
2163
2164fn to_glob_path(path: &Path) -> String {
2165 path.to_string_lossy().replace('\\', "/")
2166}
2167
2168#[cfg(test)]
2169mod tests {
2170 use std::process::Command;
2171
2172 use super::*;
2173
2174 #[test]
2175 fn cached_path_under_root_allows_missing_lexical_child() {
2176 let dir = tempfile::tempdir().expect("create temp dir");
2177 let project = dir.path().join("project");
2178 fs::create_dir_all(&project).expect("create project dir");
2179 let root = fs::canonicalize(&project).expect("canonicalize project");
2180
2181 let path = cached_path_under_root(&root, Path::new("future/file.rs"))
2182 .expect("missing child should fall back to lexical validation");
2183
2184 assert_eq!(path, root.join("future/file.rs"));
2185 }
2186
2187 #[cfg(unix)]
2188 #[test]
2189 fn cached_path_under_root_rejects_symlink_escape() {
2190 let dir = tempfile::tempdir().expect("create temp dir");
2191 let project = dir.path().join("project");
2192 let outside = dir.path().join("outside");
2193 fs::create_dir_all(&project).expect("create project dir");
2194 fs::create_dir_all(&outside).expect("create outside dir");
2195 fs::write(outside.join("secret.txt"), "secret").expect("write outside file");
2196 std::os::unix::fs::symlink(&outside, project.join("link")).expect("create symlink");
2197 let root = fs::canonicalize(&project).expect("canonicalize project");
2198
2199 assert!(cached_path_under_root(&root, Path::new("link/secret.txt")).is_none());
2200 }
2201
2202 #[test]
2203 fn extract_trigrams_tracks_next_char_and_position() {
2204 let trigrams = extract_trigrams(b"Rust");
2205 assert_eq!(trigrams.len(), 2);
2206 assert_eq!(trigrams[0], (pack_trigram(b'r', b'u', b's'), b't', 0));
2207 assert_eq!(
2208 trigrams[1],
2209 (pack_trigram(b'u', b's', b't'), EOF_SENTINEL, 1)
2210 );
2211 }
2212
2213 #[test]
2214 fn decompose_regex_extracts_literals_and_alternations() {
2215 let query = decompose_regex("abc(def|ghi)xyz");
2216 assert!(query.and_trigrams.contains(&pack_trigram(b'a', b'b', b'c')));
2217 assert!(query.and_trigrams.contains(&pack_trigram(b'x', b'y', b'z')));
2218 assert_eq!(query.or_groups.len(), 1);
2219 assert!(query.or_groups[0].contains(&pack_trigram(b'd', b'e', b'f')));
2220 assert!(query.or_groups[0].contains(&pack_trigram(b'g', b'h', b'i')));
2221 }
2222
2223 #[test]
2224 fn candidates_intersect_posting_lists() {
2225 let mut index = SearchIndex::new();
2226 let dir = tempfile::tempdir().expect("create temp dir");
2227 let alpha = dir.path().join("alpha.txt");
2228 let beta = dir.path().join("beta.txt");
2229 fs::write(&alpha, "abcdef").expect("write alpha");
2230 fs::write(&beta, "abcxyz").expect("write beta");
2231 index.project_root = dir.path().to_path_buf();
2232 index.index_file(&alpha, b"abcdef");
2233 index.index_file(&beta, b"abcxyz");
2234
2235 let query = RegexQuery {
2236 and_trigrams: vec![
2237 pack_trigram(b'a', b'b', b'c'),
2238 pack_trigram(b'd', b'e', b'f'),
2239 ],
2240 ..RegexQuery::default()
2241 };
2242
2243 let candidates = index.candidates(&query);
2244 assert_eq!(candidates.len(), 1);
2245 assert_eq!(index.files[candidates[0] as usize].path, alpha);
2246 }
2247
2248 #[test]
2249 fn candidates_apply_bloom_filters() {
2250 let mut index = SearchIndex::new();
2251 let dir = tempfile::tempdir().expect("create temp dir");
2252 let file = dir.path().join("sample.txt");
2253 fs::write(&file, "abcd efgh").expect("write sample");
2254 index.project_root = dir.path().to_path_buf();
2255 index.index_file(&file, b"abcd efgh");
2256
2257 let trigram = pack_trigram(b'a', b'b', b'c');
2258 let matching_filter = PostingFilter {
2259 next_mask: mask_for_next_char(b'd'),
2260 loc_mask: mask_for_position(0),
2261 };
2262 let non_matching_filter = PostingFilter {
2263 next_mask: mask_for_next_char(b'z'),
2264 loc_mask: mask_for_position(0),
2265 };
2266
2267 assert_eq!(
2268 index
2269 .postings_for_trigram(trigram, Some(matching_filter))
2270 .len(),
2271 1
2272 );
2273 assert!(index
2274 .postings_for_trigram(trigram, Some(non_matching_filter))
2275 .is_empty());
2276 }
2277
2278 #[test]
2279 fn disk_round_trip_preserves_postings_and_files() {
2280 let dir = tempfile::tempdir().expect("create temp dir");
2281 let project = dir.path().join("project");
2282 fs::create_dir_all(&project).expect("create project dir");
2283 let file = project.join("src.txt");
2284 fs::write(&file, "abcdef").expect("write source");
2285
2286 let mut index = SearchIndex::build(&project);
2287 index.git_head = Some("deadbeef".to_string());
2288 let cache_dir = dir.path().join("cache");
2289 index.write_to_disk(&cache_dir, index.git_head.as_deref());
2290
2291 let loaded =
2292 SearchIndex::read_from_disk(&cache_dir, &project).expect("load index from disk");
2293 assert_eq!(loaded.stored_git_head(), Some("deadbeef"));
2294 assert_eq!(loaded.files.len(), 1);
2295 assert_eq!(
2296 relative_to_root(&loaded.project_root, &loaded.files[0].path),
2297 PathBuf::from("src.txt")
2298 );
2299 assert_eq!(loaded.postings.len(), index.postings.len());
2300 assert!(loaded
2301 .postings
2302 .contains_key(&pack_trigram(b'a', b'b', b'c')));
2303 }
2304
2305 #[test]
2306 fn cache_path_helpers_reject_absolute_and_parent_paths() {
2307 let root = PathBuf::from("/tmp/aft-project");
2308
2309 assert_eq!(
2310 cache_relative_path(&root, &root.join("src/lib.rs")),
2311 Some(PathBuf::from("src/lib.rs"))
2312 );
2313 assert!(cache_relative_path(&root, Path::new("/tmp/outside.rs")).is_none());
2314 assert!(cached_path_under_root(&root, Path::new("../outside.rs")).is_none());
2315 assert!(cached_path_under_root(&root, Path::new("/tmp/outside.rs")).is_none());
2316 assert_eq!(
2317 cached_path_under_root(&root, Path::new("src/./lib.rs")),
2318 Some(root.join("src/lib.rs"))
2319 );
2320 }
2321
2322 #[test]
2323 fn refresh_after_head_change_removes_renames_and_detects_local_files() {
2324 let dir = tempfile::tempdir().expect("create temp dir");
2325 let project = dir.path().join("project");
2326 fs::create_dir_all(&project).expect("create project dir");
2327 let canonical_project = fs::canonicalize(&project).expect("canonical project");
2328 fs::write(project.join("old.txt"), "old token\n").expect("write old");
2329 fs::write(project.join("unchanged.txt"), "before\n").expect("write unchanged");
2330
2331 Command::new("git")
2332 .arg("init")
2333 .arg(&project)
2334 .status()
2335 .expect("git init");
2336 for args in [
2337 ["config", "user.email", "aft@example.invalid"],
2338 ["config", "user.name", "AFT Test"],
2339 ] {
2340 Command::new("git")
2341 .arg("-C")
2342 .arg(&project)
2343 .args(args)
2344 .status()
2345 .expect("git config");
2346 }
2347 Command::new("git")
2348 .arg("-C")
2349 .arg(&project)
2350 .args(["add", "."])
2351 .status()
2352 .expect("git add initial");
2353 Command::new("git")
2354 .arg("-C")
2355 .arg(&project)
2356 .args(["commit", "-m", "initial"])
2357 .status()
2358 .expect("git commit initial");
2359 let previous = run_git(&project, &["rev-parse", "HEAD"]).expect("previous head");
2360 let mut baseline = SearchIndex::build(&project);
2361 baseline.git_head = Some(previous.clone());
2362
2363 fs::rename(project.join("old.txt"), project.join("new.txt")).expect("rename file");
2364 Command::new("git")
2365 .arg("-C")
2366 .arg(&project)
2367 .args(["add", "-A"])
2368 .status()
2369 .expect("git add rename");
2370 Command::new("git")
2371 .arg("-C")
2372 .arg(&project)
2373 .args(["commit", "-m", "rename"])
2374 .status()
2375 .expect("git commit rename");
2376 let current = run_git(&project, &["rev-parse", "HEAD"]).expect("current head");
2377
2378 fs::write(project.join("unchanged.txt"), "after local edit\n").expect("local edit");
2379 fs::write(project.join("untracked.txt"), "untracked token\n").expect("untracked");
2380
2381 let refreshed = SearchIndex::rebuild_or_refresh(
2382 &project,
2383 DEFAULT_MAX_FILE_SIZE,
2384 Some(current),
2385 Some(baseline),
2386 );
2387
2388 assert!(!refreshed
2389 .path_to_id
2390 .contains_key(&canonical_project.join("old.txt")));
2391 assert!(refreshed
2392 .path_to_id
2393 .contains_key(&canonical_project.join("new.txt")));
2394 assert!(refreshed
2395 .path_to_id
2396 .contains_key(&canonical_project.join("untracked.txt")));
2397 let matches = refreshed.grep("after local edit", true, &[], &[], &canonical_project, 10);
2398 assert_eq!(matches.matches.len(), 1);
2399 }
2400
2401 #[test]
2402 fn read_from_disk_rejects_corrupt_postings_checksum() {
2403 let dir = tempfile::tempdir().expect("create temp dir");
2404 let project = dir.path().join("project");
2405 fs::create_dir_all(&project).expect("create project dir");
2406 fs::write(project.join("src.txt"), "abcdef").expect("write source");
2407
2408 let index = SearchIndex::build(&project);
2409 let cache_dir = dir.path().join("cache");
2410 index.write_to_disk(&cache_dir, None);
2411
2412 let cache_path = cache_dir.join("cache.bin");
2413 let mut bytes = fs::read(&cache_path).expect("read cache");
2414 let middle = bytes.len() / 2;
2415 bytes[middle] ^= 0xff;
2416 fs::write(&cache_path, bytes).expect("write corrupted cache");
2417
2418 assert!(SearchIndex::read_from_disk(&cache_dir, &project).is_none());
2419 }
2420
2421 #[test]
2422 fn write_to_disk_uses_temp_files_and_cleans_them_up() {
2423 let dir = tempfile::tempdir().expect("create temp dir");
2424 let project = dir.path().join("project");
2425 fs::create_dir_all(&project).expect("create project dir");
2426 fs::write(project.join("src.txt"), "abcdef").expect("write source");
2427
2428 let index = SearchIndex::build(&project);
2429 let cache_dir = dir.path().join("cache");
2430 index.write_to_disk(&cache_dir, None);
2431
2432 assert!(cache_dir.join("cache.bin").is_file());
2433 assert!(fs::read_dir(&cache_dir)
2434 .expect("read cache dir")
2435 .all(|entry| !entry
2436 .expect("cache entry")
2437 .file_name()
2438 .to_string_lossy()
2439 .contains(".tmp.")));
2440 }
2441
2442 #[test]
2443 fn concurrent_search_index_writes_do_not_corrupt() {
2444 let dir = tempfile::tempdir().expect("create temp dir");
2445 let project = dir.path().join("project");
2446 fs::create_dir_all(&project).expect("create project dir");
2447 fs::write(project.join("src.txt"), "abcdef\n").expect("write source");
2448 let cache_dir = dir.path().join("cache");
2449
2450 let a_project = project.clone();
2451 let a_cache = cache_dir.clone();
2452 let a = std::thread::spawn(move || {
2453 let _lock = CacheLock::acquire(&a_cache).expect("acquire cache lock a");
2454 let index = SearchIndex::build(&a_project);
2455 index.write_to_disk(&a_cache, None);
2456 });
2457 let b_project = project.clone();
2458 let b_cache = cache_dir.clone();
2459 let b = std::thread::spawn(move || {
2460 let _lock = CacheLock::acquire(&b_cache).expect("acquire cache lock b");
2461 let index = SearchIndex::build(&b_project);
2462 index.write_to_disk(&b_cache, None);
2463 });
2464 a.join().expect("writer a");
2465 b.join().expect("writer b");
2466
2467 assert!(SearchIndex::read_from_disk(&cache_dir, &project).is_some());
2468 }
2469
2470 #[test]
2471 fn search_index_atomic_rename_survives_partial_write() {
2472 let dir = tempfile::tempdir().expect("create temp dir");
2473 let cache_dir = dir.path().join("cache");
2474 fs::create_dir_all(&cache_dir).expect("create cache dir");
2475 fs::write(cache_dir.join("cache.bin.tmp.1.1"), b"partial").expect("write partial tmp");
2476
2477 assert!(SearchIndex::read_from_disk(&cache_dir, dir.path()).is_none());
2478 }
2479
2480 #[test]
2481 fn project_cache_key_includes_checkout_path() {
2482 let dir = tempfile::tempdir().expect("create temp dir");
2483 let source = dir.path().join("source");
2484 fs::create_dir_all(&source).expect("create source repo dir");
2485 fs::write(source.join("tracked.txt"), "content\n").expect("write tracked file");
2486
2487 assert!(Command::new("git")
2488 .current_dir(&source)
2489 .args(["init"])
2490 .status()
2491 .expect("init git repo")
2492 .success());
2493 assert!(Command::new("git")
2494 .current_dir(&source)
2495 .args(["add", "."])
2496 .status()
2497 .expect("git add")
2498 .success());
2499 assert!(Command::new("git")
2500 .current_dir(&source)
2501 .args([
2502 "-c",
2503 "user.name=AFT Tests",
2504 "-c",
2505 "user.email=aft-tests@example.com",
2506 "commit",
2507 "-m",
2508 "initial",
2509 ])
2510 .status()
2511 .expect("git commit")
2512 .success());
2513
2514 let clone = dir.path().join("clone");
2515 assert!(Command::new("git")
2516 .args(["clone", "--quiet"])
2517 .arg(&source)
2518 .arg(&clone)
2519 .status()
2520 .expect("git clone")
2521 .success());
2522
2523 let source_key = project_cache_key(&source);
2524 let clone_key = project_cache_key(&clone);
2525
2526 assert_eq!(source_key.len(), 16);
2527 assert_eq!(clone_key.len(), 16);
2528 assert_eq!(source_key, clone_key);
2530 }
2531
2532 #[test]
2533 fn git_head_unchanged_picks_up_local_edits() {
2534 let dir = tempfile::tempdir().expect("create temp dir");
2535 let project = dir.path().join("repo");
2536 fs::create_dir_all(&project).expect("create repo dir");
2537 let file = project.join("tracked.txt");
2538 fs::write(&file, "oldtoken\n").expect("write file");
2539 assert!(Command::new("git")
2540 .current_dir(&project)
2541 .arg("init")
2542 .status()
2543 .unwrap()
2544 .success());
2545 assert!(Command::new("git")
2546 .current_dir(&project)
2547 .args(["add", "."])
2548 .status()
2549 .unwrap()
2550 .success());
2551 assert!(Command::new("git")
2552 .current_dir(&project)
2553 .args([
2554 "-c",
2555 "user.name=AFT Tests",
2556 "-c",
2557 "user.email=aft-tests@example.com",
2558 "commit",
2559 "-m",
2560 "initial"
2561 ])
2562 .status()
2563 .unwrap()
2564 .success());
2565 let head = current_git_head(&project);
2566 let mut baseline = SearchIndex::build(&project);
2567 baseline.git_head = head.clone();
2568 fs::write(&file, "newtoken\n").expect("edit tracked file");
2569
2570 let refreshed =
2571 SearchIndex::rebuild_or_refresh(&project, DEFAULT_MAX_FILE_SIZE, head, Some(baseline));
2572 let result = refreshed.grep("newtoken", true, &[], &[], &project, 10);
2573
2574 assert_eq!(result.total_matches, 1);
2575 }
2576
2577 #[test]
2578 fn non_git_project_reuses_cache_when_files_unchanged() {
2579 let dir = tempfile::tempdir().expect("create temp dir");
2580 let project = dir.path().join("project");
2581 fs::create_dir_all(&project).expect("create project dir");
2582 fs::write(project.join("file.txt"), "unchangedtoken\n").expect("write file");
2583 let baseline = SearchIndex::build(&project);
2584 let baseline_file_count = baseline.file_count();
2585
2586 let refreshed =
2587 SearchIndex::rebuild_or_refresh(&project, DEFAULT_MAX_FILE_SIZE, None, Some(baseline));
2588
2589 assert_eq!(refreshed.file_count(), baseline_file_count);
2590 assert_eq!(
2591 refreshed
2592 .grep("unchangedtoken", true, &[], &[], &project, 10)
2593 .total_matches,
2594 1
2595 );
2596 }
2597
2598 #[test]
2599 fn resolve_search_scope_disables_index_for_external_path() {
2600 let dir = tempfile::tempdir().expect("create temp dir");
2601 let project = dir.path().join("project");
2602 let outside = dir.path().join("outside");
2603 fs::create_dir_all(&project).expect("create project dir");
2604 fs::create_dir_all(&outside).expect("create outside dir");
2605
2606 let scope = resolve_search_scope(&project, outside.to_str());
2607
2608 assert_eq!(
2609 scope.root,
2610 fs::canonicalize(&outside).expect("canonicalize outside")
2611 );
2612 assert!(!scope.use_index);
2613 }
2614
2615 #[test]
2616 fn grep_filters_matches_to_search_root() {
2617 let dir = tempfile::tempdir().expect("create temp dir");
2618 let project = dir.path().join("project");
2619 let src = project.join("src");
2620 let docs = project.join("docs");
2621 fs::create_dir_all(&src).expect("create src dir");
2622 fs::create_dir_all(&docs).expect("create docs dir");
2623 fs::write(src.join("main.rs"), "pub struct SearchIndex;\n").expect("write src file");
2624 fs::write(docs.join("guide.md"), "SearchIndex guide\n").expect("write docs file");
2625
2626 let index = SearchIndex::build(&project);
2627 let result = index.grep("SearchIndex", true, &[], &[], &src, 10);
2628
2629 assert_eq!(result.files_searched, 1);
2630 assert_eq!(result.files_with_matches, 1);
2631 assert_eq!(result.matches.len(), 1);
2632 let expected = fs::canonicalize(src.join("main.rs")).expect("canonicalize");
2634 assert_eq!(result.matches[0].file, expected);
2635 }
2636
2637 #[test]
2638 fn grep_deduplicates_multiple_matches_on_same_line() {
2639 let dir = tempfile::tempdir().expect("create temp dir");
2640 let project = dir.path().join("project");
2641 let src = project.join("src");
2642 fs::create_dir_all(&src).expect("create src dir");
2643 fs::write(src.join("main.rs"), "SearchIndex SearchIndex\n").expect("write src file");
2644
2645 let index = SearchIndex::build(&project);
2646 let result = index.grep("SearchIndex", true, &[], &[], &src, 10);
2647
2648 assert_eq!(result.total_matches, 1);
2649 assert_eq!(result.matches.len(), 1);
2650 }
2651
2652 #[test]
2653 fn grep_case_insensitive_unicode_literal_matches_indexed_file() {
2654 let dir = tempfile::tempdir().expect("create temp dir");
2655 let project = dir.path().join("project");
2656 fs::create_dir_all(&project).expect("create project dir");
2657 let file = project.join("unicode.txt");
2658 fs::write(&file, "äbc\n").expect("write unicode file");
2659
2660 let index = SearchIndex::build(&project);
2661 let result = index.grep("Äbc", false, &[], &[], &project, 10);
2662
2663 assert_eq!(result.total_matches, 1);
2664 assert_eq!(result.matches.len(), 1);
2665 assert_eq!(
2666 result.matches[0].file,
2667 fs::canonicalize(file).expect("canonicalize unicode file")
2668 );
2669 }
2670
2671 #[test]
2672 fn refresh_reindexes_same_size_edit_with_preserved_mtime() {
2673 let dir = tempfile::tempdir().expect("create temp dir");
2674 let project = dir.path().join("project");
2675 fs::create_dir_all(&project).expect("create project dir");
2676 let file = project.join("tokens.txt");
2677 let original_mtime = filetime::FileTime::from_unix_time(1_700_000_000, 0);
2678 fs::write(&file, "alpha").expect("write original file");
2679 filetime::set_file_mtime(&file, original_mtime).expect("set original mtime");
2680
2681 let baseline = SearchIndex::build(&project);
2682 fs::write(&file, "bravo").expect("write same-size edit");
2683 filetime::set_file_mtime(&file, original_mtime).expect("restore original mtime");
2684
2685 let refreshed =
2686 SearchIndex::rebuild_or_refresh(&project, DEFAULT_MAX_FILE_SIZE, None, Some(baseline));
2687 let result = refreshed.grep("bravo", true, &[], &[], &project, 10);
2688 let canonical_file = fs::canonicalize(&file).expect("canonicalize edited file");
2689 let refreshed_id = *refreshed
2690 .path_to_id
2691 .get(&canonical_file)
2692 .expect("file remains indexed");
2693
2694 assert_eq!(result.total_matches, 1);
2695 assert!(refreshed
2696 .postings_for_trigram(pack_trigram(b'b', b'r', b'a'), None)
2697 .contains(&refreshed_id));
2698 assert!(!refreshed
2699 .postings_for_trigram(pack_trigram(b'a', b'l', b'p'), None)
2700 .contains(&refreshed_id));
2701 }
2702
2703 #[test]
2704 fn grep_reports_total_matches_before_truncation() {
2705 let dir = tempfile::tempdir().expect("create temp dir");
2706 let project = dir.path().join("project");
2707 let src = project.join("src");
2708 fs::create_dir_all(&src).expect("create src dir");
2709 fs::write(src.join("main.rs"), "SearchIndex\nSearchIndex\n").expect("write src file");
2710
2711 let index = SearchIndex::build(&project);
2712 let result = index.grep("SearchIndex", true, &[], &[], &src, 1);
2713
2714 assert_eq!(result.total_matches, 2);
2715 assert_eq!(result.matches.len(), 1);
2716 assert!(result.truncated);
2717 }
2718
2719 #[test]
2720 fn glob_filters_results_to_search_root() {
2721 let dir = tempfile::tempdir().expect("create temp dir");
2722 let project = dir.path().join("project");
2723 let src = project.join("src");
2724 let scripts = project.join("scripts");
2725 fs::create_dir_all(&src).expect("create src dir");
2726 fs::create_dir_all(&scripts).expect("create scripts dir");
2727 fs::write(src.join("main.rs"), "pub fn main() {}\n").expect("write src file");
2728 fs::write(scripts.join("tool.rs"), "pub fn tool() {}\n").expect("write scripts file");
2729
2730 let index = SearchIndex::build(&project);
2731 let files = index.glob("**/*.rs", &src);
2732
2733 assert_eq!(
2734 files,
2735 vec![fs::canonicalize(src.join("main.rs")).expect("canonicalize src file")]
2736 );
2737 }
2738
2739 #[test]
2740 fn glob_includes_hidden_and_binary_files() {
2741 let dir = tempfile::tempdir().expect("create temp dir");
2742 let project = dir.path().join("project");
2743 let hidden_dir = project.join(".hidden");
2744 fs::create_dir_all(&hidden_dir).expect("create hidden dir");
2745 let hidden_file = hidden_dir.join("data.bin");
2746 fs::write(&hidden_file, [0u8, 159, 146, 150]).expect("write binary file");
2747
2748 let index = SearchIndex::build(&project);
2749 let files = index.glob("**/*.bin", &project);
2750
2751 assert_eq!(
2752 files,
2753 vec![fs::canonicalize(hidden_file).expect("canonicalize binary file")]
2754 );
2755 }
2756
2757 #[test]
2758 fn read_from_disk_rejects_invalid_nanos() {
2759 let dir = tempfile::tempdir().expect("create temp dir");
2760 let cache_dir = dir.path().join("cache");
2761 fs::create_dir_all(&cache_dir).expect("create cache dir");
2762
2763 let mut postings = Vec::new();
2764 postings.extend_from_slice(INDEX_MAGIC);
2765 postings.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2766 postings.extend_from_slice(&0u32.to_le_bytes());
2767 postings.extend_from_slice(&1u32.to_le_bytes());
2768 postings.extend_from_slice(&DEFAULT_MAX_FILE_SIZE.to_le_bytes());
2769 postings.extend_from_slice(&1u32.to_le_bytes());
2770 postings.extend_from_slice(b"/");
2771 postings.push(0u8);
2772 postings.extend_from_slice(&1u32.to_le_bytes());
2773 postings.extend_from_slice(&0u64.to_le_bytes());
2774 postings.extend_from_slice(&0u64.to_le_bytes());
2775 postings.extend_from_slice(&1_000_000_000u32.to_le_bytes());
2776 postings.extend_from_slice(b"a");
2777 postings.extend_from_slice(&0u64.to_le_bytes());
2778
2779 let mut lookup = Vec::new();
2780 lookup.extend_from_slice(LOOKUP_MAGIC);
2781 lookup.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2782 lookup.extend_from_slice(&0u32.to_le_bytes());
2783
2784 let postings_checksum = crc32fast::hash(&postings);
2785 postings.extend_from_slice(&postings_checksum.to_le_bytes());
2786 let lookup_checksum = crc32fast::hash(&lookup);
2787 lookup.extend_from_slice(&lookup_checksum.to_le_bytes());
2788 let mut cache = Vec::new();
2789 cache.extend_from_slice(&CACHE_MAGIC.to_le_bytes());
2790 cache.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2791 cache.extend_from_slice(&(postings.len() as u64).to_le_bytes());
2792 cache.extend_from_slice(&postings);
2793 cache.extend_from_slice(&lookup);
2794 fs::write(cache_dir.join("cache.bin"), cache).expect("write cache");
2795
2796 assert!(SearchIndex::read_from_disk(&cache_dir, dir.path()).is_none());
2797 }
2798
2799 #[test]
2814 fn sort_paths_by_mtime_desc_does_not_panic_on_missing_files() {
2815 let dir = tempfile::tempdir().expect("create tempdir");
2819 let mut paths: Vec<PathBuf> = Vec::new();
2820 for i in 0..30 {
2821 let path = if i % 2 == 0 {
2823 let p = dir.path().join(format!("real-{i}.rs"));
2824 fs::write(&p, format!("// {i}\n")).expect("write");
2825 p
2826 } else {
2827 dir.path().join(format!("missing-{i}.rs"))
2828 };
2829 paths.push(path);
2830 }
2831
2832 for _ in 0..50 {
2835 let mut copy = paths.clone();
2836 sort_paths_by_mtime_desc(&mut copy);
2837 assert_eq!(copy.len(), paths.len());
2838 }
2839 }
2840
2841 #[test]
2845 fn sort_grep_matches_by_mtime_desc_does_not_panic_on_missing_files() {
2846 let dir = tempfile::tempdir().expect("create tempdir");
2847 let mut matches: Vec<GrepMatch> = Vec::new();
2848 for i in 0..30 {
2849 let file = if i % 2 == 0 {
2850 let p = dir.path().join(format!("real-{i}.rs"));
2851 fs::write(&p, format!("// {i}\n")).expect("write");
2852 p
2853 } else {
2854 dir.path().join(format!("missing-{i}.rs"))
2855 };
2856 matches.push(GrepMatch {
2857 file,
2858 line: u32::try_from(i).unwrap_or(0),
2859 column: 0,
2860 line_text: format!("match {i}"),
2861 match_text: format!("match {i}"),
2862 });
2863 }
2864
2865 for _ in 0..50 {
2866 let mut copy = matches.clone();
2867 sort_grep_matches_by_mtime_desc(&mut copy, dir.path());
2868 assert_eq!(copy.len(), matches.len());
2869 }
2870 }
2871}