1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2use std::fs::{self, File};
3use std::io::{BufReader, BufWriter, Cursor, Read, Seek, Write};
4use std::path::{Component, Path, PathBuf};
5use std::process::Command;
6use std::sync::{
7 atomic::{AtomicBool, AtomicUsize, Ordering},
8 Arc, Mutex,
9};
10use std::time::{Duration, SystemTime, UNIX_EPOCH};
11
12use globset::{Glob, GlobSet, GlobSetBuilder};
13use ignore::WalkBuilder;
14use rayon::prelude::*;
15use regex::bytes::{Regex, RegexBuilder};
16use regex_syntax::hir::{Hir, HirKind};
17
18use crate::cache_freshness::{self, FileFreshness, FreshnessVerdict};
19use crate::fs_lock;
20
21const DEFAULT_MAX_FILE_SIZE: u64 = 1_048_576;
22const CACHE_MAGIC: u32 = 0x3144_4958; const INDEX_MAGIC: &[u8; 8] = b"AFTIDX01";
24const LOOKUP_MAGIC: &[u8; 8] = b"AFTLKP01";
25const INDEX_VERSION: u32 = 3;
26const PREVIEW_BYTES: usize = 8 * 1024;
27const EOF_SENTINEL: u8 = 0;
28const MAX_ENTRIES: usize = 10_000_000;
29const MIN_FILE_ENTRY_BYTES: usize = 57;
30const LOOKUP_ENTRY_BYTES: usize = 16;
31const POSTING_BYTES: usize = 6;
32static CACHE_LOCK_ACQUIRE_MUTEX: Mutex<()> = Mutex::new(());
33
34pub struct CacheLock {
35 _guard: fs_lock::LockGuard,
36}
37
38impl CacheLock {
39 pub fn acquire(cache_dir: &Path) -> std::io::Result<Self> {
40 fs::create_dir_all(cache_dir)?;
41 let path = cache_dir.join("cache.lock");
42 let _acquire_guard = CACHE_LOCK_ACQUIRE_MUTEX
43 .lock()
44 .map_err(|_| std::io::Error::other("search cache lock acquisition mutex poisoned"))?;
45 fs_lock::try_acquire(&path, Duration::from_secs(2))
46 .map(|guard| Self { _guard: guard })
47 .map_err(|error| match error {
48 fs_lock::AcquireError::Timeout => {
49 std::io::Error::other("timed out acquiring search cache lock")
50 }
51 fs_lock::AcquireError::Io(error) => error,
52 })
53 }
54}
55
56#[derive(Clone, Debug)]
57pub struct SearchIndex {
58 pub postings: HashMap<u32, Vec<Posting>>,
59 pub files: Vec<FileEntry>,
60 pub path_to_id: HashMap<PathBuf, u32>,
61 pub ready: bool,
62 project_root: PathBuf,
63 git_head: Option<String>,
64 max_file_size: u64,
65 pub file_trigrams: HashMap<u32, Vec<u32>>,
66 unindexed_files: HashSet<u32>,
67}
68
69impl SearchIndex {
70 pub fn file_count(&self) -> usize {
72 self.files.len()
73 }
74
75 pub fn trigram_count(&self) -> usize {
77 self.postings.len()
78 }
79
80 pub fn query_trigrams_from_tokens(tokens: &[&str]) -> Vec<u32> {
82 query_trigrams_from_tokens(tokens)
83 }
84
85 pub fn lexical_rank(
87 &self,
88 query_trigrams: &[u32],
89 candidate_filter: Option<&dyn Fn(&Path) -> bool>,
90 max_files: usize,
91 ) -> Vec<(PathBuf, f32)> {
92 if query_trigrams.is_empty() || max_files == 0 {
93 return Vec::new();
94 }
95
96 let mut non_zero: Vec<(u32, usize)> = query_trigrams
97 .iter()
98 .filter_map(|trigram| {
99 let posting_count = self.postings.get(trigram).map_or(0, Vec::len);
100 (posting_count > 0).then_some((*trigram, posting_count))
101 })
102 .collect();
103 if non_zero.is_empty() {
104 return Vec::new();
105 }
106
107 non_zero.sort_unstable_by_key(|(_, posting_count)| *posting_count);
108 let selected_count = non_zero.len().min(3);
109 let candidate_cap = if selected_count == 3 { 200 } else { 500 };
110
111 let mut candidate_ids = BTreeSet::new();
112 for (trigram, _) in non_zero.iter().take(selected_count) {
113 if let Some(postings) = self.postings.get(trigram) {
114 for posting in postings {
115 if self.is_active_file(posting.file_id) {
116 candidate_ids.insert(posting.file_id);
117 }
118 }
119 }
120 }
121
122 let mut ranked = Vec::new();
123 for file_id in candidate_ids.into_iter().take(candidate_cap) {
124 let Some(entry) = self.files.get(file_id as usize) else {
125 continue;
126 };
127 if let Some(filter) = candidate_filter {
128 if !filter(&entry.path) {
129 continue;
130 }
131 }
132 let score = lexical_score(self, query_trigrams, file_id);
133 if score > 0.0 {
134 ranked.push((entry.path.clone(), score));
135 }
136 }
137
138 ranked.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
139 ranked.truncate(max_files);
140 ranked
141 }
142}
143
144#[derive(Clone, Debug, PartialEq, Eq)]
145pub struct Posting {
146 pub file_id: u32,
147 pub next_mask: u8,
148 pub loc_mask: u8,
149}
150
151#[derive(Clone, Debug)]
152pub struct FileEntry {
153 pub path: PathBuf,
154 pub size: u64,
155 pub modified: SystemTime,
156 pub content_hash: blake3::Hash,
157}
158
159#[derive(Clone, Debug, PartialEq, Eq)]
160pub struct GrepMatch {
161 pub file: PathBuf,
162 pub line: u32,
163 pub column: u32,
164 pub line_text: String,
165 pub match_text: String,
166}
167
168#[derive(Clone, Debug)]
169pub struct GrepResult {
170 pub matches: Vec<GrepMatch>,
171 pub total_matches: usize,
172 pub files_searched: usize,
173 pub files_with_matches: usize,
174 pub index_status: IndexStatus,
175 pub truncated: bool,
176}
177
178#[derive(Clone, Copy, Debug, PartialEq, Eq)]
179pub enum IndexStatus {
180 Ready,
181 Building,
182 Fallback,
183}
184
185impl IndexStatus {
186 pub fn as_str(&self) -> &'static str {
187 match self {
188 IndexStatus::Ready => "Ready",
189 IndexStatus::Building => "Building",
190 IndexStatus::Fallback => "Fallback",
191 }
192 }
193}
194
195#[derive(Clone, Debug, Default)]
196pub struct RegexQuery {
197 pub and_trigrams: Vec<u32>,
198 pub or_groups: Vec<Vec<u32>>,
199 pub(crate) and_filters: HashMap<u32, PostingFilter>,
200 pub(crate) or_filters: Vec<HashMap<u32, PostingFilter>>,
201}
202
203#[derive(Clone, Copy, Debug, Default)]
204pub(crate) struct PostingFilter {
205 next_mask: u8,
206 loc_mask: u8,
207}
208
209#[derive(Clone, Debug, Default)]
210struct QueryBuild {
211 and_runs: Vec<Vec<u8>>,
212 or_groups: Vec<Vec<Vec<u8>>>,
213}
214
215#[derive(Clone, Debug, Default)]
216pub(crate) struct PathFilters {
217 includes: Option<GlobSet>,
218 excludes: Option<GlobSet>,
219}
220
221#[derive(Clone, Debug)]
222pub(crate) struct SearchScope {
223 pub root: PathBuf,
224 pub use_index: bool,
225}
226
227#[derive(Clone, Debug)]
228struct SharedGrepMatch {
229 file: Arc<PathBuf>,
230 line: u32,
231 column: u32,
232 line_text: String,
233 match_text: String,
234}
235
236#[derive(Clone, Debug)]
237enum SearchMatcher {
238 Literal(LiteralSearch),
239 Regex(Regex),
240}
241
242#[derive(Clone, Debug)]
243enum LiteralSearch {
244 CaseSensitive(Vec<u8>),
245 AsciiCaseInsensitive(Vec<u8>),
246}
247
248impl SearchIndex {
249 pub fn new() -> Self {
250 SearchIndex {
251 postings: HashMap::new(),
252 files: Vec::new(),
253 path_to_id: HashMap::new(),
254 ready: false,
255 project_root: PathBuf::new(),
256 git_head: None,
257 max_file_size: DEFAULT_MAX_FILE_SIZE,
258 file_trigrams: HashMap::new(),
259 unindexed_files: HashSet::new(),
260 }
261 }
262
263 pub fn build(root: &Path) -> Self {
264 Self::build_with_limit(root, DEFAULT_MAX_FILE_SIZE)
265 }
266
267 pub(crate) fn build_with_limit(root: &Path, max_file_size: u64) -> Self {
268 let project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
269 let mut index = SearchIndex {
270 project_root: project_root.clone(),
271 max_file_size,
272 ..SearchIndex::new()
273 };
274
275 let filters = PathFilters::default();
276 for path in walk_project_files(&project_root, &filters) {
277 index.update_file(&path);
278 }
279
280 index.git_head = current_git_head(&project_root);
281 index.ready = true;
282 index
283 }
284
285 pub fn index_file(&mut self, path: &Path, content: &[u8]) {
286 self.remove_file(path);
287
288 let file_id = match self.allocate_file_id(path, content.len() as u64) {
289 Some(file_id) => file_id,
290 None => return,
291 };
292 if let Some(file) = self.files.get_mut(file_id as usize) {
293 file.content_hash = cache_freshness::hash_bytes(content);
294 }
295
296 let mut trigram_map: BTreeMap<u32, PostingFilter> = BTreeMap::new();
297 for (trigram, next_char, position) in extract_trigrams(content) {
298 let entry = trigram_map.entry(trigram).or_default();
299 entry.next_mask |= mask_for_next_char(next_char);
300 entry.loc_mask |= mask_for_position(position);
301 }
302
303 let mut file_trigrams = Vec::with_capacity(trigram_map.len());
304 for (trigram, filter) in trigram_map {
305 let postings = self.postings.entry(trigram).or_default();
306 postings.push(Posting {
307 file_id,
308 next_mask: filter.next_mask,
309 loc_mask: filter.loc_mask,
310 });
311 if postings.len() > 1
315 && postings[postings.len() - 2].file_id > postings[postings.len() - 1].file_id
316 {
317 postings.sort_unstable_by_key(|p| p.file_id);
318 }
319 file_trigrams.push(trigram);
320 }
321
322 self.file_trigrams.insert(file_id, file_trigrams);
323 self.unindexed_files.remove(&file_id);
324 }
325
326 pub fn remove_file(&mut self, path: &Path) {
327 let Some(file_id) = self.path_to_id.remove(path) else {
328 return;
329 };
330
331 if let Some(trigrams) = self.file_trigrams.remove(&file_id) {
332 for trigram in trigrams {
333 let should_remove = if let Some(postings) = self.postings.get_mut(&trigram) {
334 postings.retain(|posting| posting.file_id != file_id);
335 postings.is_empty()
336 } else {
337 false
338 };
339
340 if should_remove {
341 self.postings.remove(&trigram);
342 }
343 }
344 }
345
346 self.unindexed_files.remove(&file_id);
347 if let Some(file) = self.files.get_mut(file_id as usize) {
348 file.path = PathBuf::new();
349 file.size = 0;
350 file.modified = UNIX_EPOCH;
351 file.content_hash = cache_freshness::zero_hash();
352 }
353 }
354
355 pub fn update_file(&mut self, path: &Path) {
356 self.remove_file(path);
357
358 let metadata = match fs::metadata(path) {
359 Ok(metadata) if metadata.is_file() => metadata,
360 _ => return,
361 };
362
363 if is_binary_path(path, metadata.len()) {
364 self.track_unindexed_file(path, &metadata);
365 return;
366 }
367
368 if metadata.len() > self.max_file_size {
369 self.track_unindexed_file(path, &metadata);
370 return;
371 }
372
373 let content = match fs::read(path) {
374 Ok(content) => content,
375 Err(_) => return,
376 };
377
378 if is_binary_bytes(&content) {
379 self.track_unindexed_file(path, &metadata);
380 return;
381 }
382
383 self.index_file(path, &content);
384 }
385
386 pub fn grep(
387 &self,
388 pattern: &str,
389 case_sensitive: bool,
390 include: &[String],
391 exclude: &[String],
392 search_root: &Path,
393 max_results: usize,
394 ) -> GrepResult {
395 self.search_grep(
396 pattern,
397 case_sensitive,
398 include,
399 exclude,
400 search_root,
401 max_results,
402 )
403 }
404
405 pub fn search_grep(
406 &self,
407 pattern: &str,
408 case_sensitive: bool,
409 include: &[String],
410 exclude: &[String],
411 search_root: &Path,
412 max_results: usize,
413 ) -> GrepResult {
414 let is_literal = !pattern.chars().any(|c| {
417 matches!(
418 c,
419 '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|' | '^' | '$' | '\\'
420 )
421 });
422
423 let literal_search = if is_literal {
424 if case_sensitive {
425 Some(LiteralSearch::CaseSensitive(pattern.as_bytes().to_vec()))
426 } else if pattern.is_ascii() {
427 Some(LiteralSearch::AsciiCaseInsensitive(
428 pattern
429 .as_bytes()
430 .iter()
431 .map(|byte| byte.to_ascii_lowercase())
432 .collect(),
433 ))
434 } else {
435 None
436 }
437 } else {
438 None
439 };
440
441 let regex = if literal_search.is_some() {
443 None
444 } else {
445 let regex_pattern = if is_literal {
446 regex::escape(pattern)
447 } else {
448 pattern.to_string()
449 };
450 let mut builder = RegexBuilder::new(®ex_pattern);
451 builder.case_insensitive(!case_sensitive);
452 builder.multi_line(true);
454 match builder.build() {
455 Ok(r) => Some(r),
456 Err(_) => {
457 return GrepResult {
458 matches: Vec::new(),
459 total_matches: 0,
460 files_searched: 0,
461 files_with_matches: 0,
462 index_status: if self.ready {
463 IndexStatus::Ready
464 } else {
465 IndexStatus::Building
466 },
467 truncated: false,
468 };
469 }
470 }
471 };
472
473 let matcher = if let Some(literal_search) = literal_search {
474 SearchMatcher::Literal(literal_search)
475 } else {
476 SearchMatcher::Regex(
477 regex.expect("regex should exist when literal matcher is unavailable"),
478 )
479 };
480
481 let filters = match build_path_filters(include, exclude) {
482 Ok(filters) => filters,
483 Err(_) => PathFilters::default(),
484 };
485 let search_root = canonicalize_or_normalize(search_root);
486
487 let query = if !case_sensitive && !pattern.is_ascii() {
488 RegexQuery::default()
489 } else {
490 decompose_regex(pattern)
491 };
492 let candidate_ids = self.candidates(&query);
493
494 let candidate_files: Vec<&FileEntry> = candidate_ids
495 .into_iter()
496 .filter_map(|file_id| self.files.get(file_id as usize))
497 .filter(|file| !file.path.as_os_str().is_empty())
498 .filter(|file| is_within_search_root(&search_root, &file.path))
499 .filter(|file| filters.matches(&self.project_root, &file.path))
500 .collect();
501
502 let total_matches = AtomicUsize::new(0);
503 let files_searched = AtomicUsize::new(0);
504 let files_with_matches = AtomicUsize::new(0);
505 let truncated = AtomicBool::new(false);
506 let stop_after = max_results.saturating_mul(2);
507
508 let mut matches = if candidate_files.len() > 10 {
509 candidate_files
510 .par_iter()
511 .map(|file| {
512 search_candidate_file(
513 file,
514 &matcher,
515 max_results,
516 stop_after,
517 &total_matches,
518 &files_searched,
519 &files_with_matches,
520 &truncated,
521 )
522 })
523 .reduce(Vec::new, |mut left, mut right| {
524 left.append(&mut right);
525 left
526 })
527 } else {
528 let mut matches = Vec::new();
529 for file in candidate_files {
530 matches.extend(search_candidate_file(
531 file,
532 &matcher,
533 max_results,
534 stop_after,
535 &total_matches,
536 &files_searched,
537 &files_with_matches,
538 &truncated,
539 ));
540
541 if should_stop_search(&truncated, &total_matches, stop_after) {
542 break;
543 }
544 }
545 matches
546 };
547
548 sort_shared_grep_matches_by_cached_mtime_desc(&mut matches, |path| {
549 self.path_to_id
550 .get(path)
551 .and_then(|file_id| self.files.get(*file_id as usize))
552 .map(|file| file.modified)
553 });
554
555 let matches = matches
556 .into_iter()
557 .map(|matched| GrepMatch {
558 file: matched.file.as_ref().clone(),
559 line: matched.line,
560 column: matched.column,
561 line_text: matched.line_text,
562 match_text: matched.match_text,
563 })
564 .collect();
565
566 GrepResult {
567 total_matches: total_matches.load(Ordering::Relaxed),
568 matches,
569 files_searched: files_searched.load(Ordering::Relaxed),
570 files_with_matches: files_with_matches.load(Ordering::Relaxed),
571 index_status: if self.ready {
572 IndexStatus::Ready
573 } else {
574 IndexStatus::Building
575 },
576 truncated: truncated.load(Ordering::Relaxed),
577 }
578 }
579
580 pub fn glob(&self, pattern: &str, search_root: &Path) -> Vec<PathBuf> {
581 let filters = match build_path_filters(&[pattern.to_string()], &[]) {
582 Ok(filters) => filters,
583 Err(_) => return Vec::new(),
584 };
585 let search_root = canonicalize_or_normalize(search_root);
586 let mut entries = self
587 .files
588 .iter()
589 .filter(|file| !file.path.as_os_str().is_empty())
590 .filter(|file| is_within_search_root(&search_root, &file.path))
591 .filter(|file| filters.matches(&self.project_root, &file.path))
592 .map(|file| (file.path.clone(), file.modified))
593 .collect::<Vec<_>>();
594
595 entries.sort_by(|(left_path, left_mtime), (right_path, right_mtime)| {
596 right_mtime
597 .cmp(left_mtime)
598 .then_with(|| left_path.cmp(right_path))
599 });
600
601 entries.into_iter().map(|(path, _)| path).collect()
602 }
603
604 pub fn candidates(&self, query: &RegexQuery) -> Vec<u32> {
605 if query.and_trigrams.is_empty() && query.or_groups.is_empty() {
606 return self.active_file_ids();
607 }
608
609 let mut and_trigrams = query.and_trigrams.clone();
610 and_trigrams.sort_unstable_by_key(|trigram| self.postings.get(trigram).map_or(0, Vec::len));
611
612 let mut current: Option<Vec<u32>> = None;
613
614 for trigram in and_trigrams {
615 let filter = query.and_filters.get(&trigram).copied();
616 let matches = self.postings_for_trigram(trigram, filter);
617 current = Some(match current.take() {
618 Some(existing) => intersect_sorted_ids(&existing, &matches),
619 None => matches,
620 });
621
622 if current.as_ref().is_some_and(|ids| ids.is_empty()) {
623 break;
624 }
625 }
626
627 let mut current = current.unwrap_or_else(|| self.active_file_ids());
628
629 for (index, group) in query.or_groups.iter().enumerate() {
630 let mut group_matches = Vec::new();
631 let filters = query.or_filters.get(index);
632
633 for trigram in group {
634 let filter = filters.and_then(|filters| filters.get(trigram).copied());
635 let matches = self.postings_for_trigram(*trigram, filter);
636 if group_matches.is_empty() {
637 group_matches = matches;
638 } else {
639 group_matches = union_sorted_ids(&group_matches, &matches);
640 }
641 }
642
643 current = intersect_sorted_ids(¤t, &group_matches);
644 if current.is_empty() {
645 break;
646 }
647 }
648
649 let mut unindexed = self
650 .unindexed_files
651 .iter()
652 .copied()
653 .filter(|file_id| self.is_active_file(*file_id))
654 .collect::<Vec<_>>();
655 if !unindexed.is_empty() {
656 unindexed.sort_unstable();
657 current = union_sorted_ids(¤t, &unindexed);
658 }
659
660 current
661 }
662
663 pub fn write_to_disk(&self, cache_dir: &Path, git_head: Option<&str>) {
664 if fs::create_dir_all(cache_dir).is_err() {
665 return;
666 }
667
668 let cache_path = cache_dir.join("cache.bin");
669 let tmp_cache = cache_dir.join(format!(
670 "cache.bin.tmp.{}.{}",
671 std::process::id(),
672 SystemTime::now()
673 .duration_since(UNIX_EPOCH)
674 .unwrap_or(Duration::ZERO)
675 .as_nanos()
676 ));
677
678 let active_ids = self.active_file_ids();
679 let mut id_map = HashMap::new();
680 for (new_id, old_id) in active_ids.iter().enumerate() {
681 let Ok(new_id_u32) = u32::try_from(new_id) else {
682 return;
683 };
684 id_map.insert(*old_id, new_id_u32);
685 }
686
687 let write_result = (|| -> std::io::Result<()> {
688 let mut postings_writer = BufWriter::new(Cursor::new(Vec::new()));
689
690 postings_writer.write_all(INDEX_MAGIC)?;
691 write_u32(&mut postings_writer, INDEX_VERSION)?;
692
693 let head = git_head.unwrap_or_default();
694 let root = self.project_root.to_string_lossy();
695 let head_len = u32::try_from(head.len())
696 .map_err(|_| std::io::Error::other("git head too large to cache"))?;
697 let root_len = u32::try_from(root.len())
698 .map_err(|_| std::io::Error::other("project root too large to cache"))?;
699 let file_count = u32::try_from(active_ids.len())
700 .map_err(|_| std::io::Error::other("too many files to cache"))?;
701
702 write_u32(&mut postings_writer, head_len)?;
703 write_u32(&mut postings_writer, root_len)?;
704 write_u64(&mut postings_writer, self.max_file_size)?;
705 write_u32(&mut postings_writer, file_count)?;
706 postings_writer.write_all(head.as_bytes())?;
707 postings_writer.write_all(root.as_bytes())?;
708
709 for old_id in &active_ids {
710 let Some(file) = self.files.get(*old_id as usize) else {
711 return Err(std::io::Error::other("missing file entry for cache write"));
712 };
713 let path =
714 cache_relative_path(&self.project_root, &file.path).ok_or_else(|| {
715 std::io::Error::other(format!(
716 "refusing to cache path outside project root: {}",
717 file.path.display()
718 ))
719 })?;
720 let path = path.to_string_lossy();
721 let path_len = u32::try_from(path.len())
722 .map_err(|_| std::io::Error::other("cached path too large"))?;
723 let modified = file
724 .modified
725 .duration_since(UNIX_EPOCH)
726 .unwrap_or(Duration::ZERO);
727 let unindexed = if self.unindexed_files.contains(old_id) {
728 1u8
729 } else {
730 0u8
731 };
732
733 postings_writer.write_all(&[unindexed])?;
734 write_u32(&mut postings_writer, path_len)?;
735 write_u64(&mut postings_writer, file.size)?;
736 write_u64(&mut postings_writer, modified.as_secs())?;
737 write_u32(&mut postings_writer, modified.subsec_nanos())?;
738 postings_writer.write_all(file.content_hash.as_bytes())?;
739 postings_writer.write_all(path.as_bytes())?;
740 }
741
742 let mut lookup_entries = Vec::new();
743 let mut postings_blob = Vec::new();
744 let mut sorted_postings: Vec<_> = self.postings.iter().collect();
745 sorted_postings.sort_by_key(|(trigram, _)| **trigram);
746
747 for (trigram, postings) in sorted_postings {
748 let offset = u64::try_from(postings_blob.len())
749 .map_err(|_| std::io::Error::other("postings blob too large"))?;
750 let mut count = 0u32;
751
752 for posting in postings {
753 let Some(mapped_file_id) = id_map.get(&posting.file_id).copied() else {
754 continue;
755 };
756
757 postings_blob.extend_from_slice(&mapped_file_id.to_le_bytes());
758 postings_blob.push(posting.next_mask);
759 postings_blob.push(posting.loc_mask);
760 count = count.saturating_add(1);
761 }
762
763 if count > 0 {
764 lookup_entries.push((*trigram, offset, count));
765 }
766 }
767
768 write_u64(
769 &mut postings_writer,
770 u64::try_from(postings_blob.len())
771 .map_err(|_| std::io::Error::other("postings blob too large"))?,
772 )?;
773 postings_writer.write_all(&postings_blob)?;
774 postings_writer.flush()?;
775 let mut postings_blob_file = postings_writer
776 .into_inner()
777 .map_err(|error| std::io::Error::other(error.to_string()))?
778 .into_inner();
779 let checksum = crc32fast::hash(&postings_blob_file);
780 postings_blob_file.extend_from_slice(&checksum.to_le_bytes());
781
782 let mut lookup_writer = BufWriter::new(Cursor::new(Vec::new()));
783 let entry_count = u32::try_from(lookup_entries.len())
784 .map_err(|_| std::io::Error::other("too many lookup entries to cache"))?;
785
786 lookup_writer.write_all(LOOKUP_MAGIC)?;
787 write_u32(&mut lookup_writer, INDEX_VERSION)?;
788 write_u32(&mut lookup_writer, entry_count)?;
789
790 for (trigram, offset, count) in lookup_entries {
791 write_u32(&mut lookup_writer, trigram)?;
792 write_u64(&mut lookup_writer, offset)?;
793 write_u32(&mut lookup_writer, count)?;
794 }
795
796 lookup_writer.flush()?;
797 let mut lookup_blob_file = lookup_writer
798 .into_inner()
799 .map_err(|error| std::io::Error::other(error.to_string()))?
800 .into_inner();
801 let checksum = crc32fast::hash(&lookup_blob_file);
802 lookup_blob_file.extend_from_slice(&checksum.to_le_bytes());
803
804 let mut cache_writer = BufWriter::new(File::create(&tmp_cache)?);
805 write_u32(&mut cache_writer, CACHE_MAGIC)?;
806 write_u32(&mut cache_writer, INDEX_VERSION)?;
807 write_u64(
808 &mut cache_writer,
809 u64::try_from(postings_blob_file.len())
810 .map_err(|_| std::io::Error::other("postings section too large"))?,
811 )?;
812 cache_writer.write_all(&postings_blob_file)?;
813 cache_writer.write_all(&lookup_blob_file)?;
814 cache_writer.flush()?;
815 cache_writer.get_ref().sync_all()?;
816 drop(cache_writer);
817 fs::rename(&tmp_cache, &cache_path)?;
818
819 Ok(())
820 })();
821
822 if write_result.is_err() {
823 let _ = fs::remove_file(&tmp_cache);
824 }
825 }
826
827 pub fn read_from_disk(cache_dir: &Path, current_canonical_root: &Path) -> Option<Self> {
828 debug_assert!(current_canonical_root.is_absolute());
829 let cache_path = cache_dir.join("cache.bin");
830 let cache_bytes = fs::read(&cache_path).ok()?;
831 if cache_bytes.len() < 16 {
832 return None;
833 }
834 let mut header = Cursor::new(&cache_bytes);
835 if read_u32(&mut header).ok()? != CACHE_MAGIC {
836 return None;
837 }
838 if read_u32(&mut header).ok()? != INDEX_VERSION {
839 return None;
840 }
841 let postings_len_total = usize::try_from(read_u64(&mut header).ok()?).ok()?;
842 let start = usize::try_from(header.position()).ok()?;
843 let postings_end = start.checked_add(postings_len_total)?;
844 if postings_end > cache_bytes.len() {
845 return None;
846 }
847 let postings_bytes = &cache_bytes[start..postings_end];
848 let lookup_bytes = &cache_bytes[postings_end..];
849 let lookup_len_total = lookup_bytes.len();
850 let mut postings_reader = BufReader::new(Cursor::new(postings_bytes));
851 let mut lookup_reader = BufReader::new(Cursor::new(lookup_bytes));
852 if postings_len_total < 4 || lookup_len_total < 4 {
853 return None;
854 }
855 verify_crc32_bytes_slice(postings_bytes).ok()?;
856 verify_crc32_bytes_slice(lookup_bytes).ok()?;
857
858 let mut magic = [0u8; 8];
859 postings_reader.read_exact(&mut magic).ok()?;
860 if &magic != INDEX_MAGIC {
861 return None;
862 }
863 if read_u32(&mut postings_reader).ok()? != INDEX_VERSION {
864 return None;
865 }
866
867 let head_len = read_u32(&mut postings_reader).ok()? as usize;
868 let root_len = read_u32(&mut postings_reader).ok()? as usize;
869 let max_file_size = read_u64(&mut postings_reader).ok()?;
870 let file_count = read_u32(&mut postings_reader).ok()? as usize;
871 if file_count > MAX_ENTRIES {
872 return None;
873 }
874 let postings_body_len = postings_len_total.checked_sub(4)?;
875 let lookup_body_len = lookup_len_total.checked_sub(4)?;
876
877 let remaining_postings = remaining_bytes(&mut postings_reader, postings_body_len)?;
878 let minimum_file_bytes = file_count.checked_mul(MIN_FILE_ENTRY_BYTES)?;
879 if minimum_file_bytes > remaining_postings {
880 return None;
881 }
882
883 if head_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
884 return None;
885 }
886 let mut head_bytes = vec![0u8; head_len];
887 postings_reader.read_exact(&mut head_bytes).ok()?;
888 let git_head = String::from_utf8(head_bytes)
889 .ok()
890 .filter(|head| !head.is_empty());
891
892 if root_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
893 return None;
894 }
895 let mut root_bytes = vec![0u8; root_len];
896 postings_reader.read_exact(&mut root_bytes).ok()?;
897 let _stored_project_root = PathBuf::from(String::from_utf8(root_bytes).ok()?);
898 let project_root = current_canonical_root.to_path_buf();
899
900 let mut files = Vec::with_capacity(file_count);
901 let mut path_to_id = HashMap::new();
902 let mut unindexed_files = HashSet::new();
903
904 for file_id in 0..file_count {
905 let mut unindexed = [0u8; 1];
906 postings_reader.read_exact(&mut unindexed).ok()?;
907 let path_len = read_u32(&mut postings_reader).ok()? as usize;
908 let size = read_u64(&mut postings_reader).ok()?;
909 let secs = read_u64(&mut postings_reader).ok()?;
910 let nanos = read_u32(&mut postings_reader).ok()?;
911 let mut hash_bytes = [0u8; 32];
912 postings_reader.read_exact(&mut hash_bytes).ok()?;
913 let content_hash = blake3::Hash::from_bytes(hash_bytes);
914 if nanos >= 1_000_000_000 {
915 return None;
916 }
917 if path_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
918 return None;
919 }
920 let mut path_bytes = vec![0u8; path_len];
921 postings_reader.read_exact(&mut path_bytes).ok()?;
922 let relative_path = PathBuf::from(String::from_utf8(path_bytes).ok()?);
923 let full_path = cached_path_under_root(&project_root, &relative_path)?;
924 let file_id_u32 = u32::try_from(file_id).ok()?;
925
926 files.push(FileEntry {
927 path: full_path.clone(),
928 size,
929 modified: UNIX_EPOCH + Duration::new(secs, nanos),
930 content_hash,
931 });
932 path_to_id.insert(full_path, file_id_u32);
933 if unindexed[0] == 1 {
934 unindexed_files.insert(file_id_u32);
935 }
936 }
937
938 let postings_len = read_u64(&mut postings_reader).ok()? as usize;
939 let max_postings_bytes = MAX_ENTRIES.checked_mul(POSTING_BYTES)?;
940 if postings_len > max_postings_bytes {
941 return None;
942 }
943 if postings_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
944 return None;
945 }
946 let mut postings_blob = vec![0u8; postings_len];
947 postings_reader.read_exact(&mut postings_blob).ok()?;
948
949 let mut lookup_magic = [0u8; 8];
950 lookup_reader.read_exact(&mut lookup_magic).ok()?;
951 if &lookup_magic != LOOKUP_MAGIC {
952 return None;
953 }
954 if read_u32(&mut lookup_reader).ok()? != INDEX_VERSION {
955 return None;
956 }
957 let entry_count = read_u32(&mut lookup_reader).ok()? as usize;
958 if entry_count > MAX_ENTRIES {
959 return None;
960 }
961 let remaining_lookup = remaining_bytes(&mut lookup_reader, lookup_body_len)?;
962 let minimum_lookup_bytes = entry_count.checked_mul(LOOKUP_ENTRY_BYTES)?;
963 if minimum_lookup_bytes > remaining_lookup {
964 return None;
965 }
966
967 let mut postings = HashMap::new();
968 let mut file_trigrams: HashMap<u32, Vec<u32>> = HashMap::new();
969
970 for _ in 0..entry_count {
971 let trigram = read_u32(&mut lookup_reader).ok()?;
972 let offset = read_u64(&mut lookup_reader).ok()? as usize;
973 let count = read_u32(&mut lookup_reader).ok()? as usize;
974 if count > MAX_ENTRIES {
975 return None;
976 }
977 let bytes_len = count.checked_mul(POSTING_BYTES)?;
978 let end = offset.checked_add(bytes_len)?;
979 if end > postings_blob.len() {
980 return None;
981 }
982
983 let mut trigram_postings = Vec::with_capacity(count);
984 for chunk in postings_blob[offset..end].chunks_exact(6) {
985 let file_id = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
986 let posting = Posting {
987 file_id,
988 next_mask: chunk[4],
989 loc_mask: chunk[5],
990 };
991 trigram_postings.push(posting.clone());
992 file_trigrams.entry(file_id).or_default().push(trigram);
993 }
994 postings.insert(trigram, trigram_postings);
995 }
996
997 Some(SearchIndex {
998 postings,
999 files,
1000 path_to_id,
1001 ready: false,
1002 project_root,
1003 git_head,
1004 max_file_size,
1005 file_trigrams,
1006 unindexed_files,
1007 })
1008 }
1009
1010 pub(crate) fn stored_git_head(&self) -> Option<&str> {
1011 self.git_head.as_deref()
1012 }
1013
1014 pub(crate) fn set_ready(&mut self, ready: bool) {
1015 self.ready = ready;
1016 }
1017
1018 pub(crate) fn verify_against_disk(&mut self, current_head: Option<String>) {
1019 self.git_head = current_head;
1020 verify_file_mtimes(self);
1021 self.ready = true;
1022 }
1023
1024 pub(crate) fn rebuild_or_refresh(
1025 root: &Path,
1026 max_file_size: u64,
1027 current_head: Option<String>,
1028 baseline: Option<SearchIndex>,
1029 ) -> Self {
1030 if let Some(mut baseline) = baseline {
1031 baseline.project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
1032 baseline.max_file_size = max_file_size;
1033
1034 if baseline.git_head == current_head || current_head.is_none() {
1035 baseline.git_head = current_head;
1042 verify_file_mtimes(&mut baseline);
1043 baseline.ready = true;
1044 return baseline;
1045 }
1046
1047 if let (Some(previous), Some(current)) =
1048 (baseline.git_head.clone(), current_head.clone())
1049 {
1050 let project_root = baseline.project_root.clone();
1051 if apply_git_diff_updates(&mut baseline, &project_root, &previous, ¤t) {
1052 baseline.git_head = Some(current);
1053 verify_file_mtimes(&mut baseline);
1054 baseline.ready = true;
1055 return baseline;
1056 }
1057 }
1058 }
1059
1060 SearchIndex::build_with_limit(root, max_file_size)
1061 }
1062
1063 fn allocate_file_id(&mut self, path: &Path, size_hint: u64) -> Option<u32> {
1064 let file_id = u32::try_from(self.files.len()).ok()?;
1065 let metadata = fs::metadata(path).ok();
1066 let size = metadata
1067 .as_ref()
1068 .map_or(size_hint, |metadata| metadata.len());
1069 let modified = metadata
1070 .and_then(|metadata| metadata.modified().ok())
1071 .unwrap_or(UNIX_EPOCH);
1072
1073 self.files.push(FileEntry {
1074 path: path.to_path_buf(),
1075 size,
1076 modified,
1077 content_hash: cache_freshness::zero_hash(),
1078 });
1079 self.path_to_id.insert(path.to_path_buf(), file_id);
1080 Some(file_id)
1081 }
1082
1083 fn track_unindexed_file(&mut self, path: &Path, metadata: &fs::Metadata) {
1084 let Some(file_id) = self.allocate_file_id(path, metadata.len()) else {
1085 return;
1086 };
1087 self.unindexed_files.insert(file_id);
1088 self.file_trigrams.insert(file_id, Vec::new());
1089 }
1090
1091 fn active_file_ids(&self) -> Vec<u32> {
1092 let mut ids: Vec<u32> = self.path_to_id.values().copied().collect();
1093 ids.sort_unstable();
1094 ids
1095 }
1096
1097 fn is_active_file(&self, file_id: u32) -> bool {
1098 self.files
1099 .get(file_id as usize)
1100 .map(|file| !file.path.as_os_str().is_empty())
1101 .unwrap_or(false)
1102 }
1103
1104 fn postings_for_trigram(&self, trigram: u32, filter: Option<PostingFilter>) -> Vec<u32> {
1105 let Some(postings) = self.postings.get(&trigram) else {
1106 return Vec::new();
1107 };
1108
1109 let mut matches = Vec::with_capacity(postings.len());
1110
1111 for posting in postings {
1112 if let Some(filter) = filter {
1113 if filter.next_mask != 0 && posting.next_mask & filter.next_mask == 0 {
1116 continue;
1117 }
1118 }
1123 if self.is_active_file(posting.file_id) {
1124 matches.push(posting.file_id);
1125 }
1126 }
1127
1128 matches
1129 }
1130}
1131
1132fn search_candidate_file(
1133 file: &FileEntry,
1134 matcher: &SearchMatcher,
1135 max_results: usize,
1136 stop_after: usize,
1137 total_matches: &AtomicUsize,
1138 files_searched: &AtomicUsize,
1139 files_with_matches: &AtomicUsize,
1140 truncated: &AtomicBool,
1141) -> Vec<SharedGrepMatch> {
1142 if should_stop_search(truncated, total_matches, stop_after) {
1143 return Vec::new();
1144 }
1145
1146 let content = match read_indexed_file_bytes(&file.path) {
1147 Some(content) => content,
1148 None => return Vec::new(),
1149 };
1150 if is_binary_bytes(&content) {
1157 return Vec::new();
1158 }
1159 files_searched.fetch_add(1, Ordering::Relaxed);
1160
1161 let shared_path = Arc::new(file.path.clone());
1162 let mut matches = Vec::new();
1163 let mut line_starts = None;
1164 let mut seen_lines = HashSet::new();
1165 let mut matched_this_file = false;
1166
1167 match matcher {
1168 SearchMatcher::Literal(LiteralSearch::CaseSensitive(needle)) => {
1169 let finder = memchr::memmem::Finder::new(needle);
1170 let mut start = 0;
1171
1172 while let Some(position) = finder.find(&content[start..]) {
1173 if should_stop_search(truncated, total_matches, stop_after) {
1174 break;
1175 }
1176
1177 let offset = start + position;
1178 start = offset + 1;
1179
1180 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1181 let (line, column, line_text) = line_details_bytes(&content, line_starts, offset);
1182 if !seen_lines.insert(line) {
1183 continue;
1184 }
1185
1186 matched_this_file = true;
1187 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1188 if match_number > max_results {
1189 truncated.store(true, Ordering::Relaxed);
1190 break;
1191 }
1192
1193 let end = offset + needle.len();
1194 matches.push(SharedGrepMatch {
1195 file: shared_path.clone(),
1196 line,
1197 column,
1198 line_text,
1199 match_text: String::from_utf8_lossy(&content[offset..end]).into_owned(),
1200 });
1201 }
1202 }
1203 SearchMatcher::Literal(LiteralSearch::AsciiCaseInsensitive(needle)) => {
1204 let search_content = content.to_ascii_lowercase();
1205 let finder = memchr::memmem::Finder::new(needle);
1206 let mut start = 0;
1207
1208 while let Some(position) = finder.find(&search_content[start..]) {
1209 if should_stop_search(truncated, total_matches, stop_after) {
1210 break;
1211 }
1212
1213 let offset = start + position;
1214 start = offset + 1;
1215
1216 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1217 let (line, column, line_text) = line_details_bytes(&content, line_starts, offset);
1218 if !seen_lines.insert(line) {
1219 continue;
1220 }
1221
1222 matched_this_file = true;
1223 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1224 if match_number > max_results {
1225 truncated.store(true, Ordering::Relaxed);
1226 break;
1227 }
1228
1229 let end = offset + needle.len();
1230 matches.push(SharedGrepMatch {
1231 file: shared_path.clone(),
1232 line,
1233 column,
1234 line_text,
1235 match_text: String::from_utf8_lossy(&content[offset..end]).into_owned(),
1236 });
1237 }
1238 }
1239 SearchMatcher::Regex(regex) => {
1240 for matched in regex.find_iter(&content) {
1241 if should_stop_search(truncated, total_matches, stop_after) {
1242 break;
1243 }
1244
1245 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1246 let (line, column, line_text) =
1247 line_details_bytes(&content, line_starts, matched.start());
1248 if !seen_lines.insert(line) {
1249 continue;
1250 }
1251
1252 matched_this_file = true;
1253 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1254 if match_number > max_results {
1255 truncated.store(true, Ordering::Relaxed);
1256 break;
1257 }
1258
1259 matches.push(SharedGrepMatch {
1260 file: shared_path.clone(),
1261 line,
1262 column,
1263 line_text,
1264 match_text: String::from_utf8_lossy(matched.as_bytes()).into_owned(),
1265 });
1266 }
1267 }
1268 }
1269
1270 if matched_this_file {
1271 files_with_matches.fetch_add(1, Ordering::Relaxed);
1272 }
1273
1274 matches
1275}
1276
1277fn should_stop_search(
1278 truncated: &AtomicBool,
1279 total_matches: &AtomicUsize,
1280 stop_after: usize,
1281) -> bool {
1282 truncated.load(Ordering::Relaxed) && total_matches.load(Ordering::Relaxed) >= stop_after
1283}
1284
1285fn intersect_sorted_ids(left: &[u32], right: &[u32]) -> Vec<u32> {
1286 let mut merged = Vec::with_capacity(left.len().min(right.len()));
1287 let mut left_index = 0;
1288 let mut right_index = 0;
1289
1290 while left_index < left.len() && right_index < right.len() {
1291 match left[left_index].cmp(&right[right_index]) {
1292 std::cmp::Ordering::Less => left_index += 1,
1293 std::cmp::Ordering::Greater => right_index += 1,
1294 std::cmp::Ordering::Equal => {
1295 merged.push(left[left_index]);
1296 left_index += 1;
1297 right_index += 1;
1298 }
1299 }
1300 }
1301
1302 merged
1303}
1304
1305fn union_sorted_ids(left: &[u32], right: &[u32]) -> Vec<u32> {
1306 let mut merged = Vec::with_capacity(left.len() + right.len());
1307 let mut left_index = 0;
1308 let mut right_index = 0;
1309
1310 while left_index < left.len() && right_index < right.len() {
1311 match left[left_index].cmp(&right[right_index]) {
1312 std::cmp::Ordering::Less => {
1313 merged.push(left[left_index]);
1314 left_index += 1;
1315 }
1316 std::cmp::Ordering::Greater => {
1317 merged.push(right[right_index]);
1318 right_index += 1;
1319 }
1320 std::cmp::Ordering::Equal => {
1321 merged.push(left[left_index]);
1322 left_index += 1;
1323 right_index += 1;
1324 }
1325 }
1326 }
1327
1328 merged.extend_from_slice(&left[left_index..]);
1329 merged.extend_from_slice(&right[right_index..]);
1330 merged
1331}
1332
1333pub fn decompose_regex(pattern: &str) -> RegexQuery {
1334 let hir = match regex_syntax::parse(pattern) {
1335 Ok(hir) => hir,
1336 Err(_) => return RegexQuery::default(),
1337 };
1338
1339 let build = build_query(&hir);
1340 build.into_query()
1341}
1342
1343pub fn pack_trigram(a: u8, b: u8, c: u8) -> u32 {
1344 ((a as u32) << 16) | ((b as u32) << 8) | c as u32
1345}
1346
1347pub fn normalize_char(c: u8) -> u8 {
1348 c.to_ascii_lowercase()
1349}
1350
1351pub fn extract_trigrams(content: &[u8]) -> Vec<(u32, u8, usize)> {
1352 if content.len() < 3 {
1353 return Vec::new();
1354 }
1355
1356 let mut trigrams = Vec::with_capacity(content.len().saturating_sub(2));
1357 for start in 0..=content.len() - 3 {
1358 let trigram = pack_trigram(
1359 normalize_char(content[start]),
1360 normalize_char(content[start + 1]),
1361 normalize_char(content[start + 2]),
1362 );
1363 let next_char = content.get(start + 3).copied().unwrap_or(EOF_SENTINEL);
1364 trigrams.push((trigram, next_char, start));
1365 }
1366 trigrams
1367}
1368
1369pub fn query_trigrams_from_tokens(tokens: &[&str]) -> Vec<u32> {
1370 let mut seen = HashSet::new();
1371 let mut out = Vec::new();
1372 for token in tokens {
1373 for (trigram, _, _) in extract_trigrams(token.as_bytes()) {
1374 if seen.insert(trigram) {
1375 out.push(trigram);
1376 }
1377 }
1378 }
1379 out
1380}
1381
1382pub fn lexical_score(index: &SearchIndex, query_trigrams: &[u32], file_id: u32) -> f32 {
1383 if query_trigrams.is_empty() {
1384 return 0.0;
1385 }
1386
1387 let mut hits = 0u32;
1388 for &trigram in query_trigrams {
1389 if let Some(postings) = index.postings.get(&trigram) {
1390 if postings
1391 .binary_search_by(|posting| posting.file_id.cmp(&file_id))
1392 .is_ok()
1393 {
1394 hits += 1;
1395 }
1396 }
1397 }
1398
1399 if hits == 0 {
1400 return 0.0;
1401 }
1402
1403 let file_trigram_count = index
1404 .file_trigrams
1405 .get(&file_id)
1406 .map_or(1, |trigrams| trigrams.len().max(1)) as f32;
1407 (hits as f32) / (1.0 + file_trigram_count.ln())
1408}
1409
1410pub fn resolve_cache_dir(project_root: &Path, storage_dir: Option<&Path>) -> PathBuf {
1411 if let Some(override_dir) = std::env::var_os("AFT_CACHE_DIR") {
1413 return PathBuf::from(override_dir)
1414 .join("index")
1415 .join(project_cache_key(project_root));
1416 }
1417 if let Some(dir) = storage_dir {
1419 return dir.join("index").join(project_cache_key(project_root));
1420 }
1421 let home = std::env::var_os("HOME")
1426 .or_else(|| std::env::var_os("USERPROFILE"))
1427 .map(PathBuf::from)
1428 .unwrap_or_else(std::env::temp_dir);
1429 home.join(".cache")
1430 .join("aft")
1431 .join("index")
1432 .join(project_cache_key(project_root))
1433}
1434
1435pub(crate) fn build_path_filters(
1436 include: &[String],
1437 exclude: &[String],
1438) -> Result<PathFilters, String> {
1439 Ok(PathFilters {
1440 includes: build_globset(include)?,
1441 excludes: build_globset(exclude)?,
1442 })
1443}
1444
1445pub(crate) fn walk_project_files(root: &Path, filters: &PathFilters) -> Vec<PathBuf> {
1446 walk_project_files_from(root, root, filters)
1447}
1448
1449pub(crate) fn walk_project_files_from(
1450 filter_root: &Path,
1451 search_root: &Path,
1452 filters: &PathFilters,
1453) -> Vec<PathBuf> {
1454 let mut builder = WalkBuilder::new(search_root);
1455 builder
1456 .hidden(false)
1457 .git_ignore(true)
1458 .git_global(true)
1459 .git_exclude(true)
1460 .filter_entry(|entry| {
1461 let name = entry.file_name().to_string_lossy();
1462 if entry.file_type().map_or(false, |ft| ft.is_dir()) {
1463 return !matches!(
1464 name.as_ref(),
1465 "node_modules"
1466 | "target"
1467 | "venv"
1468 | ".venv"
1469 | ".git"
1470 | "__pycache__"
1471 | ".tox"
1472 | "dist"
1473 | "build"
1474 );
1475 }
1476 true
1477 });
1478
1479 let mut files = Vec::new();
1480 for entry in builder.build().filter_map(|entry| entry.ok()) {
1481 if !entry
1482 .file_type()
1483 .map_or(false, |file_type| file_type.is_file())
1484 {
1485 continue;
1486 }
1487 let path = entry.into_path();
1488 if filters.matches(filter_root, &path) {
1489 files.push(path);
1490 }
1491 }
1492
1493 sort_paths_by_mtime_desc(&mut files);
1494 files
1495}
1496
1497pub(crate) fn read_searchable_text(path: &Path) -> Option<String> {
1498 let bytes = fs::read(path).ok()?;
1499 if is_binary_bytes(&bytes) {
1500 return None;
1501 }
1502 String::from_utf8(bytes).ok()
1503}
1504
1505fn read_indexed_file_bytes(path: &Path) -> Option<Vec<u8>> {
1506 fs::read(path).ok()
1507}
1508
1509pub(crate) fn relative_to_root(root: &Path, path: &Path) -> PathBuf {
1510 path.strip_prefix(root)
1511 .map(PathBuf::from)
1512 .unwrap_or_else(|_| path.to_path_buf())
1513}
1514
1515pub(crate) fn cache_relative_path(root: &Path, path: &Path) -> Option<PathBuf> {
1516 let normalized_root = normalize_path(root);
1517 let normalized_path = normalize_path(path);
1518 let relative = normalized_path.strip_prefix(&normalized_root).ok()?;
1519 validate_cached_relative_path(relative)
1520}
1521
1522pub(crate) fn cached_path_under_root(root: &Path, relative_path: &Path) -> Option<PathBuf> {
1523 let relative = validate_cached_relative_path(relative_path)?;
1524 let normalized_root = normalize_path(root);
1525 let full_path = normalize_path(&normalized_root.join(relative));
1526
1527 match fs::canonicalize(&full_path) {
1528 Ok(canonical_path) => {
1529 if canonical_path.starts_with(&normalized_root) {
1530 return Some(full_path);
1531 }
1532
1533 let canonical_root = fs::canonicalize(&normalized_root).ok()?;
1534 canonical_path
1535 .starts_with(&canonical_root)
1536 .then_some(full_path)
1537 }
1538 Err(_) => full_path.starts_with(&normalized_root).then_some(full_path),
1539 }
1540}
1541
1542pub(crate) fn validate_cached_relative_path(path: &Path) -> Option<PathBuf> {
1543 if path.is_absolute() {
1544 return None;
1545 }
1546
1547 let mut normalized = PathBuf::new();
1548 for component in path.components() {
1549 match component {
1550 Component::Normal(part) => normalized.push(part),
1551 Component::CurDir => {}
1552 Component::ParentDir | Component::RootDir | Component::Prefix(_) => return None,
1553 }
1554 }
1555 (!normalized.as_os_str().is_empty()).then_some(normalized)
1556}
1557
1558pub(crate) fn sort_paths_by_mtime_desc(paths: &mut [PathBuf]) {
1571 use std::collections::HashMap;
1572 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::with_capacity(paths.len());
1573 for path in paths.iter() {
1574 mtimes
1575 .entry(path.clone())
1576 .or_insert_with(|| path_modified_time(path));
1577 }
1578 paths.sort_by(|left, right| {
1579 let left_mtime = mtimes.get(left).and_then(|v| *v);
1580 let right_mtime = mtimes.get(right).and_then(|v| *v);
1581 right_mtime.cmp(&left_mtime).then_with(|| left.cmp(right))
1582 });
1583}
1584
1585pub(crate) fn sort_grep_matches_by_mtime_desc(matches: &mut [GrepMatch], project_root: &Path) {
1588 use std::collections::HashMap;
1589 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::new();
1590 for m in matches.iter() {
1591 mtimes.entry(m.file.clone()).or_insert_with(|| {
1592 let resolved = resolve_match_path(project_root, &m.file);
1593 path_modified_time(&resolved)
1594 });
1595 }
1596 matches.sort_by(|left, right| {
1597 let left_mtime = mtimes.get(&left.file).and_then(|v| *v);
1598 let right_mtime = mtimes.get(&right.file).and_then(|v| *v);
1599 right_mtime
1600 .cmp(&left_mtime)
1601 .then_with(|| left.file.cmp(&right.file))
1602 .then_with(|| left.line.cmp(&right.line))
1603 .then_with(|| left.column.cmp(&right.column))
1604 });
1605}
1606
1607fn sort_shared_grep_matches_by_cached_mtime_desc<F>(
1612 matches: &mut [SharedGrepMatch],
1613 modified_for_path: F,
1614) where
1615 F: Fn(&Path) -> Option<SystemTime>,
1616{
1617 use std::collections::HashMap;
1618 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::with_capacity(matches.len());
1619 for m in matches.iter() {
1620 let path = m.file.as_path().to_path_buf();
1621 mtimes
1622 .entry(path.clone())
1623 .or_insert_with(|| modified_for_path(&path));
1624 }
1625 matches.sort_by(|left, right| {
1626 let left_mtime = mtimes.get(left.file.as_path()).and_then(|v| *v);
1627 let right_mtime = mtimes.get(right.file.as_path()).and_then(|v| *v);
1628 right_mtime
1629 .cmp(&left_mtime)
1630 .then_with(|| left.file.as_path().cmp(right.file.as_path()))
1631 .then_with(|| left.line.cmp(&right.line))
1632 .then_with(|| left.column.cmp(&right.column))
1633 });
1634}
1635
1636pub(crate) fn resolve_search_scope(project_root: &Path, path: Option<&str>) -> SearchScope {
1637 let resolved_project_root = canonicalize_or_normalize(project_root);
1638 let root = match path {
1639 Some(path) => {
1640 let path = PathBuf::from(path);
1641 if path.is_absolute() {
1642 canonicalize_or_normalize(&path)
1643 } else {
1644 normalize_path(&resolved_project_root.join(path))
1645 }
1646 }
1647 None => resolved_project_root.clone(),
1648 };
1649
1650 let use_index = is_within_search_root(&resolved_project_root, &root);
1651 SearchScope { root, use_index }
1652}
1653
1654pub(crate) fn is_binary_bytes(content: &[u8]) -> bool {
1655 content_inspector::inspect(content).is_binary()
1656}
1657
1658pub(crate) fn current_git_head(root: &Path) -> Option<String> {
1659 run_git(root, &["rev-parse", "HEAD"])
1660}
1661
1662pub fn project_cache_key(project_root: &Path) -> String {
1663 use sha2::{Digest, Sha256};
1664
1665 let mut hasher = Sha256::new();
1666
1667 if let Some(root_commit) = run_git(project_root, &["rev-list", "--max-parents=0", "HEAD"]) {
1668 hasher.update(root_commit.as_bytes());
1671 } else {
1672 let canonical_root = canonicalize_or_normalize(project_root);
1674 hasher.update(canonical_root.to_string_lossy().as_bytes());
1675 }
1676
1677 let digest = format!("{:x}", hasher.finalize());
1678 digest[..16].to_string()
1679}
1680
1681impl PathFilters {
1682 fn matches(&self, root: &Path, path: &Path) -> bool {
1683 let relative = to_glob_path(&relative_to_root(root, path));
1684 if self
1685 .includes
1686 .as_ref()
1687 .is_some_and(|includes| !includes.is_match(&relative))
1688 {
1689 return false;
1690 }
1691 if self
1692 .excludes
1693 .as_ref()
1694 .is_some_and(|excludes| excludes.is_match(&relative))
1695 {
1696 return false;
1697 }
1698 true
1699 }
1700}
1701
1702fn canonicalize_or_normalize(path: &Path) -> PathBuf {
1703 fs::canonicalize(path).unwrap_or_else(|_| normalize_path(path))
1704}
1705
1706fn resolve_match_path(project_root: &Path, path: &Path) -> PathBuf {
1707 if path.is_absolute() {
1708 path.to_path_buf()
1709 } else {
1710 project_root.join(path)
1711 }
1712}
1713
1714fn path_modified_time(path: &Path) -> Option<SystemTime> {
1715 fs::metadata(path)
1716 .and_then(|metadata| metadata.modified())
1717 .ok()
1718}
1719
1720fn normalize_path(path: &Path) -> PathBuf {
1721 let mut result = PathBuf::new();
1722 for component in path.components() {
1723 match component {
1724 Component::ParentDir => {
1725 if !result.pop() {
1726 result.push(component);
1727 }
1728 }
1729 Component::CurDir => {}
1730 _ => result.push(component),
1731 }
1732 }
1733 result
1734}
1735
1736fn verify_file_mtimes(index: &mut SearchIndex) {
1739 let mut stale_paths = Vec::new();
1741 for entry in &index.files {
1742 if entry.path.as_os_str().is_empty() {
1743 continue; }
1745 let cached = FileFreshness {
1746 mtime: entry.modified,
1747 size: entry.size,
1748 content_hash: entry.content_hash,
1749 };
1750 match cache_freshness::verify_file_strict(&entry.path, &cached) {
1751 FreshnessVerdict::HotFresh | FreshnessVerdict::ContentFresh { .. } => {}
1752 FreshnessVerdict::Stale | FreshnessVerdict::Deleted => {
1753 stale_paths.push(entry.path.clone())
1754 }
1755 }
1756 }
1757
1758 for entry in &mut index.files {
1759 if entry.path.as_os_str().is_empty() {
1760 continue;
1761 }
1762 let cached = FileFreshness {
1763 mtime: entry.modified,
1764 size: entry.size,
1765 content_hash: entry.content_hash,
1766 };
1767 if let FreshnessVerdict::ContentFresh {
1768 new_mtime,
1769 new_size,
1770 } = cache_freshness::verify_file_strict(&entry.path, &cached)
1771 {
1772 entry.modified = new_mtime;
1773 entry.size = new_size;
1774 }
1775 }
1776
1777 for path in &stale_paths {
1779 index.update_file(path);
1780 }
1781
1782 let filters = PathFilters::default();
1784 for path in walk_project_files(&index.project_root, &filters) {
1785 if !index.path_to_id.contains_key(&path) {
1786 index.update_file(&path);
1787 }
1788 }
1789
1790 if !stale_paths.is_empty() {
1791 crate::slog_info!(
1792 "search index: refreshed {} stale file(s) from disk cache",
1793 stale_paths.len()
1794 );
1795 }
1796}
1797
1798fn is_within_search_root(search_root: &Path, path: &Path) -> bool {
1799 normalize_path(path).starts_with(normalize_path(search_root))
1800}
1801
1802impl QueryBuild {
1803 fn into_query(self) -> RegexQuery {
1804 let mut query = RegexQuery::default();
1805
1806 for run in self.and_runs {
1807 add_run_to_and_query(&mut query, &run);
1808 }
1809
1810 for group in self.or_groups {
1811 let mut trigrams = BTreeSet::new();
1812 let mut filters = HashMap::new();
1813 for run in group {
1814 for (trigram, filter) in trigram_filters(&run) {
1815 trigrams.insert(trigram);
1816 merge_filter(filters.entry(trigram).or_default(), filter);
1817 }
1818 }
1819 if !trigrams.is_empty() {
1820 query.or_groups.push(trigrams.into_iter().collect());
1821 query.or_filters.push(filters);
1822 }
1823 }
1824
1825 query
1826 }
1827}
1828
1829fn build_query(hir: &Hir) -> QueryBuild {
1830 match hir.kind() {
1831 HirKind::Literal(literal) => {
1832 if literal.0.len() >= 3 {
1833 QueryBuild {
1834 and_runs: vec![literal.0.to_vec()],
1835 or_groups: Vec::new(),
1836 }
1837 } else {
1838 QueryBuild::default()
1839 }
1840 }
1841 HirKind::Capture(capture) => build_query(&capture.sub),
1842 HirKind::Concat(parts) => {
1843 let mut build = QueryBuild::default();
1844 for part in parts {
1845 let part_build = build_query(part);
1846 build.and_runs.extend(part_build.and_runs);
1847 build.or_groups.extend(part_build.or_groups);
1848 }
1849 build
1850 }
1851 HirKind::Alternation(parts) => {
1852 let mut group = Vec::new();
1853 for part in parts {
1854 let Some(mut choices) = guaranteed_run_choices(part) else {
1855 return QueryBuild::default();
1856 };
1857 group.append(&mut choices);
1858 }
1859 if group.is_empty() {
1860 QueryBuild::default()
1861 } else {
1862 QueryBuild {
1863 and_runs: Vec::new(),
1864 or_groups: vec![group],
1865 }
1866 }
1867 }
1868 HirKind::Repetition(repetition) => {
1869 if repetition.min == 0 {
1870 QueryBuild::default()
1871 } else {
1872 build_query(&repetition.sub)
1873 }
1874 }
1875 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => QueryBuild::default(),
1876 }
1877}
1878
1879fn guaranteed_run_choices(hir: &Hir) -> Option<Vec<Vec<u8>>> {
1880 match hir.kind() {
1881 HirKind::Literal(literal) => {
1882 if literal.0.len() >= 3 {
1883 Some(vec![literal.0.to_vec()])
1884 } else {
1885 None
1886 }
1887 }
1888 HirKind::Capture(capture) => guaranteed_run_choices(&capture.sub),
1889 HirKind::Concat(parts) => {
1890 let mut runs = Vec::new();
1891 for part in parts {
1892 if let Some(mut part_runs) = guaranteed_run_choices(part) {
1893 runs.append(&mut part_runs);
1894 }
1895 }
1896 if runs.is_empty() {
1897 None
1898 } else {
1899 Some(runs)
1900 }
1901 }
1902 HirKind::Alternation(parts) => {
1903 let mut runs = Vec::new();
1904 for part in parts {
1905 let Some(mut part_runs) = guaranteed_run_choices(part) else {
1906 return None;
1907 };
1908 runs.append(&mut part_runs);
1909 }
1910 if runs.is_empty() {
1911 None
1912 } else {
1913 Some(runs)
1914 }
1915 }
1916 HirKind::Repetition(repetition) => {
1917 if repetition.min == 0 {
1918 None
1919 } else {
1920 guaranteed_run_choices(&repetition.sub)
1921 }
1922 }
1923 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => None,
1924 }
1925}
1926
1927fn add_run_to_and_query(query: &mut RegexQuery, run: &[u8]) {
1928 for (trigram, filter) in trigram_filters(run) {
1929 if !query.and_trigrams.contains(&trigram) {
1930 query.and_trigrams.push(trigram);
1931 }
1932 merge_filter(query.and_filters.entry(trigram).or_default(), filter);
1933 }
1934}
1935
1936fn trigram_filters(run: &[u8]) -> Vec<(u32, PostingFilter)> {
1937 let mut filters: BTreeMap<u32, PostingFilter> = BTreeMap::new();
1938 for (trigram, next_char, position) in extract_trigrams(run) {
1939 let entry: &mut PostingFilter = filters.entry(trigram).or_default();
1940 if next_char != EOF_SENTINEL {
1941 entry.next_mask |= mask_for_next_char(next_char);
1942 }
1943 entry.loc_mask |= mask_for_position(position);
1944 }
1945 filters.into_iter().collect()
1946}
1947
1948fn merge_filter(target: &mut PostingFilter, filter: PostingFilter) {
1949 target.next_mask |= filter.next_mask;
1950 target.loc_mask |= filter.loc_mask;
1951}
1952
1953fn mask_for_next_char(next_char: u8) -> u8 {
1954 let bit = (normalize_char(next_char).wrapping_mul(31) & 7) as u32;
1955 1u8 << bit
1956}
1957
1958fn mask_for_position(position: usize) -> u8 {
1959 1u8 << (position % 8)
1960}
1961
1962fn build_globset(patterns: &[String]) -> Result<Option<GlobSet>, String> {
1963 if patterns.is_empty() {
1964 return Ok(None);
1965 }
1966
1967 let mut builder = GlobSetBuilder::new();
1968 for pattern in patterns {
1969 let glob = Glob::new(pattern).map_err(|error| error.to_string())?;
1970 builder.add(glob);
1971 }
1972 builder.build().map(Some).map_err(|error| error.to_string())
1973}
1974
1975fn read_u32<R: Read>(reader: &mut R) -> std::io::Result<u32> {
1976 let mut buffer = [0u8; 4];
1977 reader.read_exact(&mut buffer)?;
1978 Ok(u32::from_le_bytes(buffer))
1979}
1980
1981fn read_u64<R: Read>(reader: &mut R) -> std::io::Result<u64> {
1982 let mut buffer = [0u8; 8];
1983 reader.read_exact(&mut buffer)?;
1984 Ok(u64::from_le_bytes(buffer))
1985}
1986
1987fn write_u32<W: Write>(writer: &mut W, value: u32) -> std::io::Result<()> {
1988 writer.write_all(&value.to_le_bytes())
1989}
1990
1991fn write_u64<W: Write>(writer: &mut W, value: u64) -> std::io::Result<()> {
1992 writer.write_all(&value.to_le_bytes())
1993}
1994
1995fn verify_crc32_bytes_slice(bytes: &[u8]) -> std::io::Result<()> {
1996 let Some((body, stored)) = bytes.split_last_chunk::<4>() else {
1997 return Err(std::io::Error::other("search index checksum missing"));
1998 };
1999 let expected = u32::from_le_bytes(*stored);
2000 let actual = crc32fast::hash(body);
2001 if actual != expected {
2002 return Err(std::io::Error::other("search index checksum mismatch"));
2003 }
2004 Ok(())
2005}
2006
2007fn remaining_bytes<R: Seek>(reader: &mut R, total_len: usize) -> Option<usize> {
2008 let pos = usize::try_from(reader.stream_position().ok()?).ok()?;
2009 total_len.checked_sub(pos)
2010}
2011
2012fn run_git(root: &Path, args: &[&str]) -> Option<String> {
2013 let output = Command::new("git")
2014 .arg("-C")
2015 .arg(root)
2016 .args(args)
2017 .output()
2018 .ok()?;
2019 if !output.status.success() {
2020 return None;
2021 }
2022 let value = String::from_utf8(output.stdout).ok()?;
2023 let value = value.trim().to_string();
2024 if value.is_empty() {
2025 None
2026 } else {
2027 Some(value)
2028 }
2029}
2030
2031fn apply_git_diff_updates(index: &mut SearchIndex, root: &Path, from: &str, to: &str) -> bool {
2032 let diff_range = format!("{}..{}", from, to);
2033 let output = match Command::new("git")
2034 .arg("-C")
2035 .arg(root)
2036 .args(["diff", "--name-status", "-M", &diff_range])
2037 .output()
2038 {
2039 Ok(output) => output,
2040 Err(_) => return false,
2041 };
2042
2043 if !output.status.success() {
2044 return false;
2045 }
2046
2047 let Ok(diff) = String::from_utf8(output.stdout) else {
2048 return false;
2049 };
2050
2051 for line in diff.lines().map(str::trim).filter(|line| !line.is_empty()) {
2052 let mut fields = line.split('\t');
2053 let Some(status) = fields.next() else {
2054 continue;
2055 };
2056
2057 if status.starts_with('R') {
2058 let Some(old_path) = fields
2059 .next()
2060 .and_then(|path| cached_path_under_root(root, &PathBuf::from(path)))
2061 else {
2062 continue;
2063 };
2064 let Some(new_path) = fields
2065 .next()
2066 .and_then(|path| cached_path_under_root(root, &PathBuf::from(path)))
2067 else {
2068 continue;
2069 };
2070 index.remove_file(&old_path);
2071 index.update_file(&new_path);
2072 continue;
2073 }
2074
2075 let Some(path) = fields
2076 .next()
2077 .and_then(|path| cached_path_under_root(root, &PathBuf::from(path)))
2078 else {
2079 continue;
2080 };
2081 if status.starts_with('D') || !path.exists() {
2082 index.remove_file(&path);
2083 } else {
2084 index.update_file(&path);
2085 }
2086 }
2087
2088 true
2089}
2090
2091fn is_binary_path(path: &Path, size: u64) -> bool {
2092 if size == 0 {
2093 return false;
2094 }
2095
2096 let mut file = match File::open(path) {
2097 Ok(file) => file,
2098 Err(_) => return true,
2099 };
2100
2101 let mut preview = vec![0u8; PREVIEW_BYTES.min(size as usize)];
2102 match file.read(&mut preview) {
2103 Ok(read) => is_binary_bytes(&preview[..read]),
2104 Err(_) => true,
2105 }
2106}
2107
2108fn line_starts_bytes(content: &[u8]) -> Vec<usize> {
2109 let mut starts = vec![0usize];
2110 for (index, byte) in content.iter().copied().enumerate() {
2111 if byte == b'\n' {
2112 starts.push(index + 1);
2113 }
2114 }
2115 starts
2116}
2117
2118fn line_details_bytes(content: &[u8], line_starts: &[usize], offset: usize) -> (u32, u32, String) {
2119 let line_index = match line_starts.binary_search(&offset) {
2120 Ok(index) => index,
2121 Err(index) => index.saturating_sub(1),
2122 };
2123 let line_start = line_starts.get(line_index).copied().unwrap_or(0);
2124 let line_end = content[line_start..]
2125 .iter()
2126 .position(|byte| *byte == b'\n')
2127 .map(|length| line_start + length)
2128 .unwrap_or(content.len());
2129 let mut line_slice = &content[line_start..line_end];
2130 if line_slice.ends_with(b"\r") {
2131 line_slice = &line_slice[..line_slice.len() - 1];
2132 }
2133 let line_text = String::from_utf8_lossy(line_slice).into_owned();
2134 let column = String::from_utf8_lossy(&content[line_start..offset])
2135 .chars()
2136 .count() as u32
2137 + 1;
2138 (line_index as u32 + 1, column, line_text)
2139}
2140
2141fn to_glob_path(path: &Path) -> String {
2142 path.to_string_lossy().replace('\\', "/")
2143}
2144
2145#[cfg(test)]
2146mod tests {
2147 use std::process::Command;
2148
2149 use super::*;
2150
2151 #[test]
2152 fn cached_path_under_root_allows_missing_lexical_child() {
2153 let dir = tempfile::tempdir().expect("create temp dir");
2154 let project = dir.path().join("project");
2155 fs::create_dir_all(&project).expect("create project dir");
2156 let root = fs::canonicalize(&project).expect("canonicalize project");
2157
2158 let path = cached_path_under_root(&root, Path::new("future/file.rs"))
2159 .expect("missing child should fall back to lexical validation");
2160
2161 assert_eq!(path, root.join("future/file.rs"));
2162 }
2163
2164 #[cfg(unix)]
2165 #[test]
2166 fn cached_path_under_root_rejects_symlink_escape() {
2167 let dir = tempfile::tempdir().expect("create temp dir");
2168 let project = dir.path().join("project");
2169 let outside = dir.path().join("outside");
2170 fs::create_dir_all(&project).expect("create project dir");
2171 fs::create_dir_all(&outside).expect("create outside dir");
2172 fs::write(outside.join("secret.txt"), "secret").expect("write outside file");
2173 std::os::unix::fs::symlink(&outside, project.join("link")).expect("create symlink");
2174 let root = fs::canonicalize(&project).expect("canonicalize project");
2175
2176 assert!(cached_path_under_root(&root, Path::new("link/secret.txt")).is_none());
2177 }
2178
2179 #[test]
2180 fn extract_trigrams_tracks_next_char_and_position() {
2181 let trigrams = extract_trigrams(b"Rust");
2182 assert_eq!(trigrams.len(), 2);
2183 assert_eq!(trigrams[0], (pack_trigram(b'r', b'u', b's'), b't', 0));
2184 assert_eq!(
2185 trigrams[1],
2186 (pack_trigram(b'u', b's', b't'), EOF_SENTINEL, 1)
2187 );
2188 }
2189
2190 #[test]
2191 fn decompose_regex_extracts_literals_and_alternations() {
2192 let query = decompose_regex("abc(def|ghi)xyz");
2193 assert!(query.and_trigrams.contains(&pack_trigram(b'a', b'b', b'c')));
2194 assert!(query.and_trigrams.contains(&pack_trigram(b'x', b'y', b'z')));
2195 assert_eq!(query.or_groups.len(), 1);
2196 assert!(query.or_groups[0].contains(&pack_trigram(b'd', b'e', b'f')));
2197 assert!(query.or_groups[0].contains(&pack_trigram(b'g', b'h', b'i')));
2198 }
2199
2200 #[test]
2201 fn candidates_intersect_posting_lists() {
2202 let mut index = SearchIndex::new();
2203 let dir = tempfile::tempdir().expect("create temp dir");
2204 let alpha = dir.path().join("alpha.txt");
2205 let beta = dir.path().join("beta.txt");
2206 fs::write(&alpha, "abcdef").expect("write alpha");
2207 fs::write(&beta, "abcxyz").expect("write beta");
2208 index.project_root = dir.path().to_path_buf();
2209 index.index_file(&alpha, b"abcdef");
2210 index.index_file(&beta, b"abcxyz");
2211
2212 let query = RegexQuery {
2213 and_trigrams: vec![
2214 pack_trigram(b'a', b'b', b'c'),
2215 pack_trigram(b'd', b'e', b'f'),
2216 ],
2217 ..RegexQuery::default()
2218 };
2219
2220 let candidates = index.candidates(&query);
2221 assert_eq!(candidates.len(), 1);
2222 assert_eq!(index.files[candidates[0] as usize].path, alpha);
2223 }
2224
2225 #[test]
2226 fn candidates_apply_bloom_filters() {
2227 let mut index = SearchIndex::new();
2228 let dir = tempfile::tempdir().expect("create temp dir");
2229 let file = dir.path().join("sample.txt");
2230 fs::write(&file, "abcd efgh").expect("write sample");
2231 index.project_root = dir.path().to_path_buf();
2232 index.index_file(&file, b"abcd efgh");
2233
2234 let trigram = pack_trigram(b'a', b'b', b'c');
2235 let matching_filter = PostingFilter {
2236 next_mask: mask_for_next_char(b'd'),
2237 loc_mask: mask_for_position(0),
2238 };
2239 let non_matching_filter = PostingFilter {
2240 next_mask: mask_for_next_char(b'z'),
2241 loc_mask: mask_for_position(0),
2242 };
2243
2244 assert_eq!(
2245 index
2246 .postings_for_trigram(trigram, Some(matching_filter))
2247 .len(),
2248 1
2249 );
2250 assert!(index
2251 .postings_for_trigram(trigram, Some(non_matching_filter))
2252 .is_empty());
2253 }
2254
2255 #[test]
2256 fn disk_round_trip_preserves_postings_and_files() {
2257 let dir = tempfile::tempdir().expect("create temp dir");
2258 let project = dir.path().join("project");
2259 fs::create_dir_all(&project).expect("create project dir");
2260 let file = project.join("src.txt");
2261 fs::write(&file, "abcdef").expect("write source");
2262
2263 let mut index = SearchIndex::build(&project);
2264 index.git_head = Some("deadbeef".to_string());
2265 let cache_dir = dir.path().join("cache");
2266 index.write_to_disk(&cache_dir, index.git_head.as_deref());
2267
2268 let loaded =
2269 SearchIndex::read_from_disk(&cache_dir, &project).expect("load index from disk");
2270 assert_eq!(loaded.stored_git_head(), Some("deadbeef"));
2271 assert_eq!(loaded.files.len(), 1);
2272 assert_eq!(
2273 relative_to_root(&loaded.project_root, &loaded.files[0].path),
2274 PathBuf::from("src.txt")
2275 );
2276 assert_eq!(loaded.postings.len(), index.postings.len());
2277 assert!(loaded
2278 .postings
2279 .contains_key(&pack_trigram(b'a', b'b', b'c')));
2280 }
2281
2282 #[test]
2283 fn cache_path_helpers_reject_absolute_and_parent_paths() {
2284 let root = PathBuf::from("/tmp/aft-project");
2285
2286 assert_eq!(
2287 cache_relative_path(&root, &root.join("src/lib.rs")),
2288 Some(PathBuf::from("src/lib.rs"))
2289 );
2290 assert!(cache_relative_path(&root, Path::new("/tmp/outside.rs")).is_none());
2291 assert!(cached_path_under_root(&root, Path::new("../outside.rs")).is_none());
2292 assert!(cached_path_under_root(&root, Path::new("/tmp/outside.rs")).is_none());
2293 assert_eq!(
2294 cached_path_under_root(&root, Path::new("src/./lib.rs")),
2295 Some(root.join("src/lib.rs"))
2296 );
2297 }
2298
2299 #[test]
2300 fn refresh_after_head_change_removes_renames_and_detects_local_files() {
2301 let dir = tempfile::tempdir().expect("create temp dir");
2302 let project = dir.path().join("project");
2303 fs::create_dir_all(&project).expect("create project dir");
2304 let canonical_project = fs::canonicalize(&project).expect("canonical project");
2305 fs::write(project.join("old.txt"), "old token\n").expect("write old");
2306 fs::write(project.join("unchanged.txt"), "before\n").expect("write unchanged");
2307
2308 Command::new("git")
2309 .arg("init")
2310 .arg(&project)
2311 .status()
2312 .expect("git init");
2313 for args in [
2314 ["config", "user.email", "aft@example.invalid"],
2315 ["config", "user.name", "AFT Test"],
2316 ] {
2317 Command::new("git")
2318 .arg("-C")
2319 .arg(&project)
2320 .args(args)
2321 .status()
2322 .expect("git config");
2323 }
2324 Command::new("git")
2325 .arg("-C")
2326 .arg(&project)
2327 .args(["add", "."])
2328 .status()
2329 .expect("git add initial");
2330 Command::new("git")
2331 .arg("-C")
2332 .arg(&project)
2333 .args(["commit", "-m", "initial"])
2334 .status()
2335 .expect("git commit initial");
2336 let previous = run_git(&project, &["rev-parse", "HEAD"]).expect("previous head");
2337 let mut baseline = SearchIndex::build(&project);
2338 baseline.git_head = Some(previous.clone());
2339
2340 fs::rename(project.join("old.txt"), project.join("new.txt")).expect("rename file");
2341 Command::new("git")
2342 .arg("-C")
2343 .arg(&project)
2344 .args(["add", "-A"])
2345 .status()
2346 .expect("git add rename");
2347 Command::new("git")
2348 .arg("-C")
2349 .arg(&project)
2350 .args(["commit", "-m", "rename"])
2351 .status()
2352 .expect("git commit rename");
2353 let current = run_git(&project, &["rev-parse", "HEAD"]).expect("current head");
2354
2355 fs::write(project.join("unchanged.txt"), "after local edit\n").expect("local edit");
2356 fs::write(project.join("untracked.txt"), "untracked token\n").expect("untracked");
2357
2358 let refreshed = SearchIndex::rebuild_or_refresh(
2359 &project,
2360 DEFAULT_MAX_FILE_SIZE,
2361 Some(current),
2362 Some(baseline),
2363 );
2364
2365 assert!(!refreshed
2366 .path_to_id
2367 .contains_key(&canonical_project.join("old.txt")));
2368 assert!(refreshed
2369 .path_to_id
2370 .contains_key(&canonical_project.join("new.txt")));
2371 assert!(refreshed
2372 .path_to_id
2373 .contains_key(&canonical_project.join("untracked.txt")));
2374 let matches =
2375 refreshed.search_grep("after local edit", true, &[], &[], &canonical_project, 10);
2376 assert_eq!(matches.matches.len(), 1);
2377 }
2378
2379 #[test]
2380 fn read_from_disk_rejects_corrupt_postings_checksum() {
2381 let dir = tempfile::tempdir().expect("create temp dir");
2382 let project = dir.path().join("project");
2383 fs::create_dir_all(&project).expect("create project dir");
2384 fs::write(project.join("src.txt"), "abcdef").expect("write source");
2385
2386 let index = SearchIndex::build(&project);
2387 let cache_dir = dir.path().join("cache");
2388 index.write_to_disk(&cache_dir, None);
2389
2390 let cache_path = cache_dir.join("cache.bin");
2391 let mut bytes = fs::read(&cache_path).expect("read cache");
2392 let middle = bytes.len() / 2;
2393 bytes[middle] ^= 0xff;
2394 fs::write(&cache_path, bytes).expect("write corrupted cache");
2395
2396 assert!(SearchIndex::read_from_disk(&cache_dir, &project).is_none());
2397 }
2398
2399 #[test]
2400 fn write_to_disk_uses_temp_files_and_cleans_them_up() {
2401 let dir = tempfile::tempdir().expect("create temp dir");
2402 let project = dir.path().join("project");
2403 fs::create_dir_all(&project).expect("create project dir");
2404 fs::write(project.join("src.txt"), "abcdef").expect("write source");
2405
2406 let index = SearchIndex::build(&project);
2407 let cache_dir = dir.path().join("cache");
2408 index.write_to_disk(&cache_dir, None);
2409
2410 assert!(cache_dir.join("cache.bin").is_file());
2411 assert!(fs::read_dir(&cache_dir)
2412 .expect("read cache dir")
2413 .all(|entry| !entry
2414 .expect("cache entry")
2415 .file_name()
2416 .to_string_lossy()
2417 .contains(".tmp.")));
2418 }
2419
2420 #[test]
2421 fn concurrent_search_index_writes_do_not_corrupt() {
2422 let dir = tempfile::tempdir().expect("create temp dir");
2423 let project = dir.path().join("project");
2424 fs::create_dir_all(&project).expect("create project dir");
2425 fs::write(project.join("src.txt"), "abcdef\n").expect("write source");
2426 let cache_dir = dir.path().join("cache");
2427
2428 let a_project = project.clone();
2429 let a_cache = cache_dir.clone();
2430 let a = std::thread::spawn(move || {
2431 let _lock = CacheLock::acquire(&a_cache).expect("acquire cache lock a");
2432 let index = SearchIndex::build(&a_project);
2433 index.write_to_disk(&a_cache, None);
2434 });
2435 let b_project = project.clone();
2436 let b_cache = cache_dir.clone();
2437 let b = std::thread::spawn(move || {
2438 let _lock = CacheLock::acquire(&b_cache).expect("acquire cache lock b");
2439 let index = SearchIndex::build(&b_project);
2440 index.write_to_disk(&b_cache, None);
2441 });
2442 a.join().expect("writer a");
2443 b.join().expect("writer b");
2444
2445 assert!(SearchIndex::read_from_disk(&cache_dir, &project).is_some());
2446 }
2447
2448 #[test]
2449 fn search_index_atomic_rename_survives_partial_write() {
2450 let dir = tempfile::tempdir().expect("create temp dir");
2451 let cache_dir = dir.path().join("cache");
2452 fs::create_dir_all(&cache_dir).expect("create cache dir");
2453 fs::write(cache_dir.join("cache.bin.tmp.1.1"), b"partial").expect("write partial tmp");
2454
2455 assert!(SearchIndex::read_from_disk(&cache_dir, dir.path()).is_none());
2456 }
2457
2458 #[test]
2459 fn project_cache_key_includes_checkout_path() {
2460 let dir = tempfile::tempdir().expect("create temp dir");
2461 let source = dir.path().join("source");
2462 fs::create_dir_all(&source).expect("create source repo dir");
2463 fs::write(source.join("tracked.txt"), "content\n").expect("write tracked file");
2464
2465 assert!(Command::new("git")
2466 .current_dir(&source)
2467 .args(["init"])
2468 .status()
2469 .expect("init git repo")
2470 .success());
2471 assert!(Command::new("git")
2472 .current_dir(&source)
2473 .args(["add", "."])
2474 .status()
2475 .expect("git add")
2476 .success());
2477 assert!(Command::new("git")
2478 .current_dir(&source)
2479 .args([
2480 "-c",
2481 "user.name=AFT Tests",
2482 "-c",
2483 "user.email=aft-tests@example.com",
2484 "commit",
2485 "-m",
2486 "initial",
2487 ])
2488 .status()
2489 .expect("git commit")
2490 .success());
2491
2492 let clone = dir.path().join("clone");
2493 assert!(Command::new("git")
2494 .args(["clone", "--quiet"])
2495 .arg(&source)
2496 .arg(&clone)
2497 .status()
2498 .expect("git clone")
2499 .success());
2500
2501 let source_key = project_cache_key(&source);
2502 let clone_key = project_cache_key(&clone);
2503
2504 assert_eq!(source_key.len(), 16);
2505 assert_eq!(clone_key.len(), 16);
2506 assert_eq!(source_key, clone_key);
2508 }
2509
2510 #[test]
2511 fn git_head_unchanged_picks_up_local_edits() {
2512 let dir = tempfile::tempdir().expect("create temp dir");
2513 let project = dir.path().join("repo");
2514 fs::create_dir_all(&project).expect("create repo dir");
2515 let file = project.join("tracked.txt");
2516 fs::write(&file, "oldtoken\n").expect("write file");
2517 assert!(Command::new("git")
2518 .current_dir(&project)
2519 .arg("init")
2520 .status()
2521 .unwrap()
2522 .success());
2523 assert!(Command::new("git")
2524 .current_dir(&project)
2525 .args(["add", "."])
2526 .status()
2527 .unwrap()
2528 .success());
2529 assert!(Command::new("git")
2530 .current_dir(&project)
2531 .args([
2532 "-c",
2533 "user.name=AFT Tests",
2534 "-c",
2535 "user.email=aft-tests@example.com",
2536 "commit",
2537 "-m",
2538 "initial"
2539 ])
2540 .status()
2541 .unwrap()
2542 .success());
2543 let head = current_git_head(&project);
2544 let mut baseline = SearchIndex::build(&project);
2545 baseline.git_head = head.clone();
2546 fs::write(&file, "newtoken\n").expect("edit tracked file");
2547
2548 let refreshed =
2549 SearchIndex::rebuild_or_refresh(&project, DEFAULT_MAX_FILE_SIZE, head, Some(baseline));
2550 let result = refreshed.search_grep("newtoken", true, &[], &[], &project, 10);
2551
2552 assert_eq!(result.total_matches, 1);
2553 }
2554
2555 #[test]
2556 fn non_git_project_reuses_cache_when_files_unchanged() {
2557 let dir = tempfile::tempdir().expect("create temp dir");
2558 let project = dir.path().join("project");
2559 fs::create_dir_all(&project).expect("create project dir");
2560 fs::write(project.join("file.txt"), "unchangedtoken\n").expect("write file");
2561 let baseline = SearchIndex::build(&project);
2562 let baseline_file_count = baseline.file_count();
2563
2564 let refreshed =
2565 SearchIndex::rebuild_or_refresh(&project, DEFAULT_MAX_FILE_SIZE, None, Some(baseline));
2566
2567 assert_eq!(refreshed.file_count(), baseline_file_count);
2568 assert_eq!(
2569 refreshed
2570 .search_grep("unchangedtoken", true, &[], &[], &project, 10)
2571 .total_matches,
2572 1
2573 );
2574 }
2575
2576 #[test]
2577 fn resolve_search_scope_disables_index_for_external_path() {
2578 let dir = tempfile::tempdir().expect("create temp dir");
2579 let project = dir.path().join("project");
2580 let outside = dir.path().join("outside");
2581 fs::create_dir_all(&project).expect("create project dir");
2582 fs::create_dir_all(&outside).expect("create outside dir");
2583
2584 let scope = resolve_search_scope(&project, outside.to_str());
2585
2586 assert_eq!(
2587 scope.root,
2588 fs::canonicalize(&outside).expect("canonicalize outside")
2589 );
2590 assert!(!scope.use_index);
2591 }
2592
2593 #[test]
2594 fn grep_filters_matches_to_search_root() {
2595 let dir = tempfile::tempdir().expect("create temp dir");
2596 let project = dir.path().join("project");
2597 let src = project.join("src");
2598 let docs = project.join("docs");
2599 fs::create_dir_all(&src).expect("create src dir");
2600 fs::create_dir_all(&docs).expect("create docs dir");
2601 fs::write(src.join("main.rs"), "pub struct SearchIndex;\n").expect("write src file");
2602 fs::write(docs.join("guide.md"), "SearchIndex guide\n").expect("write docs file");
2603
2604 let index = SearchIndex::build(&project);
2605 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 10);
2606
2607 assert_eq!(result.files_searched, 1);
2608 assert_eq!(result.files_with_matches, 1);
2609 assert_eq!(result.matches.len(), 1);
2610 let expected = fs::canonicalize(src.join("main.rs")).expect("canonicalize");
2612 assert_eq!(result.matches[0].file, expected);
2613 }
2614
2615 #[test]
2616 fn grep_deduplicates_multiple_matches_on_same_line() {
2617 let dir = tempfile::tempdir().expect("create temp dir");
2618 let project = dir.path().join("project");
2619 let src = project.join("src");
2620 fs::create_dir_all(&src).expect("create src dir");
2621 fs::write(src.join("main.rs"), "SearchIndex SearchIndex\n").expect("write src file");
2622
2623 let index = SearchIndex::build(&project);
2624 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 10);
2625
2626 assert_eq!(result.total_matches, 1);
2627 assert_eq!(result.matches.len(), 1);
2628 }
2629
2630 #[test]
2631 fn grep_case_insensitive_unicode_literal_matches_indexed_file() {
2632 let dir = tempfile::tempdir().expect("create temp dir");
2633 let project = dir.path().join("project");
2634 fs::create_dir_all(&project).expect("create project dir");
2635 let file = project.join("unicode.txt");
2636 fs::write(&file, "äbc\n").expect("write unicode file");
2637
2638 let index = SearchIndex::build(&project);
2639 let result = index.search_grep("Äbc", false, &[], &[], &project, 10);
2640
2641 assert_eq!(result.total_matches, 1);
2642 assert_eq!(result.matches.len(), 1);
2643 assert_eq!(
2644 result.matches[0].file,
2645 fs::canonicalize(file).expect("canonicalize unicode file")
2646 );
2647 }
2648
2649 #[test]
2650 fn refresh_reindexes_same_size_edit_with_preserved_mtime() {
2651 let dir = tempfile::tempdir().expect("create temp dir");
2652 let project = dir.path().join("project");
2653 fs::create_dir_all(&project).expect("create project dir");
2654 let file = project.join("tokens.txt");
2655 let original_mtime = filetime::FileTime::from_unix_time(1_700_000_000, 0);
2656 fs::write(&file, "alpha").expect("write original file");
2657 filetime::set_file_mtime(&file, original_mtime).expect("set original mtime");
2658
2659 let baseline = SearchIndex::build(&project);
2660 fs::write(&file, "bravo").expect("write same-size edit");
2661 filetime::set_file_mtime(&file, original_mtime).expect("restore original mtime");
2662
2663 let refreshed =
2664 SearchIndex::rebuild_or_refresh(&project, DEFAULT_MAX_FILE_SIZE, None, Some(baseline));
2665 let result = refreshed.search_grep("bravo", true, &[], &[], &project, 10);
2666 let canonical_file = fs::canonicalize(&file).expect("canonicalize edited file");
2667 let refreshed_id = *refreshed
2668 .path_to_id
2669 .get(&canonical_file)
2670 .expect("file remains indexed");
2671
2672 assert_eq!(result.total_matches, 1);
2673 assert!(refreshed
2674 .postings_for_trigram(pack_trigram(b'b', b'r', b'a'), None)
2675 .contains(&refreshed_id));
2676 assert!(!refreshed
2677 .postings_for_trigram(pack_trigram(b'a', b'l', b'p'), None)
2678 .contains(&refreshed_id));
2679 }
2680
2681 #[test]
2682 fn grep_reports_total_matches_before_truncation() {
2683 let dir = tempfile::tempdir().expect("create temp dir");
2684 let project = dir.path().join("project");
2685 let src = project.join("src");
2686 fs::create_dir_all(&src).expect("create src dir");
2687 fs::write(src.join("main.rs"), "SearchIndex\nSearchIndex\n").expect("write src file");
2688
2689 let index = SearchIndex::build(&project);
2690 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 1);
2691
2692 assert_eq!(result.total_matches, 2);
2693 assert_eq!(result.matches.len(), 1);
2694 assert!(result.truncated);
2695 }
2696
2697 #[test]
2698 fn glob_filters_results_to_search_root() {
2699 let dir = tempfile::tempdir().expect("create temp dir");
2700 let project = dir.path().join("project");
2701 let src = project.join("src");
2702 let scripts = project.join("scripts");
2703 fs::create_dir_all(&src).expect("create src dir");
2704 fs::create_dir_all(&scripts).expect("create scripts dir");
2705 fs::write(src.join("main.rs"), "pub fn main() {}\n").expect("write src file");
2706 fs::write(scripts.join("tool.rs"), "pub fn tool() {}\n").expect("write scripts file");
2707
2708 let index = SearchIndex::build(&project);
2709 let files = index.glob("**/*.rs", &src);
2710
2711 assert_eq!(
2712 files,
2713 vec![fs::canonicalize(src.join("main.rs")).expect("canonicalize src file")]
2714 );
2715 }
2716
2717 #[test]
2718 fn glob_includes_hidden_and_binary_files() {
2719 let dir = tempfile::tempdir().expect("create temp dir");
2720 let project = dir.path().join("project");
2721 let hidden_dir = project.join(".hidden");
2722 fs::create_dir_all(&hidden_dir).expect("create hidden dir");
2723 let hidden_file = hidden_dir.join("data.bin");
2724 fs::write(&hidden_file, [0u8, 159, 146, 150]).expect("write binary file");
2725
2726 let index = SearchIndex::build(&project);
2727 let files = index.glob("**/*.bin", &project);
2728
2729 assert_eq!(
2730 files,
2731 vec![fs::canonicalize(hidden_file).expect("canonicalize binary file")]
2732 );
2733 }
2734
2735 #[test]
2736 fn read_from_disk_rejects_invalid_nanos() {
2737 let dir = tempfile::tempdir().expect("create temp dir");
2738 let cache_dir = dir.path().join("cache");
2739 fs::create_dir_all(&cache_dir).expect("create cache dir");
2740
2741 let mut postings = Vec::new();
2742 postings.extend_from_slice(INDEX_MAGIC);
2743 postings.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2744 postings.extend_from_slice(&0u32.to_le_bytes());
2745 postings.extend_from_slice(&1u32.to_le_bytes());
2746 postings.extend_from_slice(&DEFAULT_MAX_FILE_SIZE.to_le_bytes());
2747 postings.extend_from_slice(&1u32.to_le_bytes());
2748 postings.extend_from_slice(b"/");
2749 postings.push(0u8);
2750 postings.extend_from_slice(&1u32.to_le_bytes());
2751 postings.extend_from_slice(&0u64.to_le_bytes());
2752 postings.extend_from_slice(&0u64.to_le_bytes());
2753 postings.extend_from_slice(&1_000_000_000u32.to_le_bytes());
2754 postings.extend_from_slice(b"a");
2755 postings.extend_from_slice(&0u64.to_le_bytes());
2756
2757 let mut lookup = Vec::new();
2758 lookup.extend_from_slice(LOOKUP_MAGIC);
2759 lookup.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2760 lookup.extend_from_slice(&0u32.to_le_bytes());
2761
2762 let postings_checksum = crc32fast::hash(&postings);
2763 postings.extend_from_slice(&postings_checksum.to_le_bytes());
2764 let lookup_checksum = crc32fast::hash(&lookup);
2765 lookup.extend_from_slice(&lookup_checksum.to_le_bytes());
2766 let mut cache = Vec::new();
2767 cache.extend_from_slice(&CACHE_MAGIC.to_le_bytes());
2768 cache.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2769 cache.extend_from_slice(&(postings.len() as u64).to_le_bytes());
2770 cache.extend_from_slice(&postings);
2771 cache.extend_from_slice(&lookup);
2772 fs::write(cache_dir.join("cache.bin"), cache).expect("write cache");
2773
2774 assert!(SearchIndex::read_from_disk(&cache_dir, dir.path()).is_none());
2775 }
2776
2777 #[test]
2792 fn sort_paths_by_mtime_desc_does_not_panic_on_missing_files() {
2793 let dir = tempfile::tempdir().expect("create tempdir");
2797 let mut paths: Vec<PathBuf> = Vec::new();
2798 for i in 0..30 {
2799 let path = if i % 2 == 0 {
2801 let p = dir.path().join(format!("real-{i}.rs"));
2802 fs::write(&p, format!("// {i}\n")).expect("write");
2803 p
2804 } else {
2805 dir.path().join(format!("missing-{i}.rs"))
2806 };
2807 paths.push(path);
2808 }
2809
2810 for _ in 0..50 {
2813 let mut copy = paths.clone();
2814 sort_paths_by_mtime_desc(&mut copy);
2815 assert_eq!(copy.len(), paths.len());
2816 }
2817 }
2818
2819 #[test]
2823 fn sort_grep_matches_by_mtime_desc_does_not_panic_on_missing_files() {
2824 let dir = tempfile::tempdir().expect("create tempdir");
2825 let mut matches: Vec<GrepMatch> = Vec::new();
2826 for i in 0..30 {
2827 let file = if i % 2 == 0 {
2828 let p = dir.path().join(format!("real-{i}.rs"));
2829 fs::write(&p, format!("// {i}\n")).expect("write");
2830 p
2831 } else {
2832 dir.path().join(format!("missing-{i}.rs"))
2833 };
2834 matches.push(GrepMatch {
2835 file,
2836 line: u32::try_from(i).unwrap_or(0),
2837 column: 0,
2838 line_text: format!("match {i}"),
2839 match_text: format!("match {i}"),
2840 });
2841 }
2842
2843 for _ in 0..50 {
2844 let mut copy = matches.clone();
2845 sort_grep_matches_by_mtime_desc(&mut copy, dir.path());
2846 assert_eq!(copy.len(), matches.len());
2847 }
2848 }
2849}