1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2use std::fs::{self, File};
3use std::io::{BufReader, BufWriter, Cursor, Read, Seek, Write};
4use std::path::{Component, Path, PathBuf};
5use std::process::Command;
6use std::sync::{
7 atomic::{AtomicBool, AtomicUsize, Ordering},
8 Arc, Mutex,
9};
10use std::time::{Duration, SystemTime, UNIX_EPOCH};
11
12use globset::{Glob, GlobSet, GlobSetBuilder};
13use ignore::WalkBuilder;
14use rayon::prelude::*;
15use regex::bytes::Regex;
16use regex_syntax::hir::{Hir, HirKind};
17
18use crate::cache_freshness::{self, FileFreshness, FreshnessVerdict};
19use crate::fs_lock;
20use crate::pattern_compile::{self, CompileOpts, CompileResult, CompiledPattern, LiteralSearch};
21
22const DEFAULT_MAX_FILE_SIZE: u64 = 1_048_576;
23const CACHE_MAGIC: u32 = 0x3144_4958; const INDEX_MAGIC: &[u8; 8] = b"AFTIDX01";
25const LOOKUP_MAGIC: &[u8; 8] = b"AFTLKP01";
26const INDEX_VERSION: u32 = 3;
27const PREVIEW_BYTES: usize = 8 * 1024;
28const EOF_SENTINEL: u8 = 0;
29const MAX_ENTRIES: usize = 10_000_000;
30const MIN_FILE_ENTRY_BYTES: usize = 57;
31const LOOKUP_ENTRY_BYTES: usize = 16;
32const POSTING_BYTES: usize = 6;
33static CACHE_LOCK_ACQUIRE_MUTEX: Mutex<()> = Mutex::new(());
34
35pub struct CacheLock {
36 _guard: fs_lock::LockGuard,
37}
38
39impl CacheLock {
40 pub fn acquire(cache_dir: &Path) -> std::io::Result<Self> {
41 fs::create_dir_all(cache_dir)?;
42 let path = cache_dir.join("cache.lock");
43 let _acquire_guard = CACHE_LOCK_ACQUIRE_MUTEX
44 .lock()
45 .map_err(|_| std::io::Error::other("search cache lock acquisition mutex poisoned"))?;
46 fs_lock::try_acquire(&path, Duration::from_secs(2))
47 .map(|guard| Self { _guard: guard })
48 .map_err(|error| match error {
49 fs_lock::AcquireError::Timeout => {
50 std::io::Error::other("timed out acquiring search cache lock")
51 }
52 fs_lock::AcquireError::Io(error) => error,
53 })
54 }
55}
56
57#[derive(Clone, Debug)]
58pub struct SearchIndex {
59 pub postings: HashMap<u32, Vec<Posting>>,
60 pub files: Vec<FileEntry>,
61 pub path_to_id: HashMap<PathBuf, u32>,
62 pub ready: bool,
63 project_root: PathBuf,
64 git_head: Option<String>,
65 max_file_size: u64,
66 pub file_trigrams: HashMap<u32, Vec<u32>>,
67 unindexed_files: HashSet<u32>,
68}
69
70#[derive(Clone, Debug, Default)]
71pub struct LexicalRankResult {
72 pub files: Vec<(PathBuf, f32)>,
73 pub engine_capped: bool,
74}
75
76impl SearchIndex {
77 pub fn file_count(&self) -> usize {
79 self.files.len()
80 }
81
82 pub fn trigram_count(&self) -> usize {
84 self.postings.len()
85 }
86
87 pub fn query_trigrams_from_tokens(tokens: &[&str]) -> Vec<u32> {
89 query_trigrams_from_tokens(tokens)
90 }
91
92 pub fn lexical_rank(
94 &self,
95 query_trigrams: &[u32],
96 candidate_filter: Option<&dyn Fn(&Path) -> bool>,
97 max_files: usize,
98 ) -> Vec<(PathBuf, f32)> {
99 self.lexical_rank_with_stats(query_trigrams, candidate_filter, max_files)
100 .files
101 }
102
103 pub fn lexical_rank_with_stats(
106 &self,
107 query_trigrams: &[u32],
108 candidate_filter: Option<&dyn Fn(&Path) -> bool>,
109 max_files: usize,
110 ) -> LexicalRankResult {
111 if query_trigrams.is_empty() || max_files == 0 {
112 return LexicalRankResult::default();
113 }
114
115 let mut non_zero: Vec<(u32, usize)> = query_trigrams
116 .iter()
117 .filter_map(|trigram| {
118 let posting_count = self.postings.get(trigram).map_or(0, Vec::len);
119 (posting_count > 0).then_some((*trigram, posting_count))
120 })
121 .collect();
122 if non_zero.is_empty() {
123 return LexicalRankResult::default();
124 }
125
126 non_zero.sort_unstable_by_key(|(_, posting_count)| *posting_count);
127 let selected_count = non_zero.len().min(3);
128 let candidate_cap = if selected_count == 3 { 200 } else { 500 };
129
130 let mut candidate_ids = BTreeSet::new();
131 for (trigram, _) in non_zero.iter().take(selected_count) {
132 if let Some(postings) = self.postings.get(trigram) {
133 for posting in postings {
134 if self.is_active_file(posting.file_id) {
135 candidate_ids.insert(posting.file_id);
136 }
137 }
138 }
139 }
140 let engine_capped = candidate_ids.len() > candidate_cap;
141
142 let mut ranked = Vec::new();
143 for file_id in candidate_ids.into_iter().take(candidate_cap) {
144 let Some(entry) = self.files.get(file_id as usize) else {
145 continue;
146 };
147 if let Some(filter) = candidate_filter {
148 if !filter(&entry.path) {
149 continue;
150 }
151 }
152 let score = lexical_score(self, query_trigrams, file_id);
153 if score > 0.0 {
154 ranked.push((entry.path.clone(), score));
155 }
156 }
157
158 ranked.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
159 ranked.truncate(max_files);
160 LexicalRankResult {
161 files: ranked,
162 engine_capped,
163 }
164 }
165}
166
167#[derive(Clone, Debug, PartialEq, Eq)]
168pub struct Posting {
169 pub file_id: u32,
170 pub next_mask: u8,
171 pub loc_mask: u8,
172}
173
174#[derive(Clone, Debug)]
175pub struct FileEntry {
176 pub path: PathBuf,
177 pub size: u64,
178 pub modified: SystemTime,
179 pub content_hash: blake3::Hash,
180}
181
182#[derive(Clone, Debug, PartialEq, Eq)]
183pub struct GrepMatch {
184 pub file: PathBuf,
185 pub line: u32,
186 pub column: u32,
187 pub line_text: String,
188 pub match_text: String,
189}
190
191#[derive(Clone, Debug)]
192pub struct GrepResult {
193 pub matches: Vec<GrepMatch>,
194 pub total_matches: usize,
195 pub files_searched: usize,
196 pub files_with_matches: usize,
197 pub index_status: IndexStatus,
198 pub truncated: bool,
199 pub fully_degraded: bool,
200 pub engine_capped: bool,
201}
202
203#[derive(Clone, Copy, Debug, PartialEq, Eq)]
204pub enum IndexStatus {
205 Ready,
206 Building,
207 Fallback,
208 Disabled,
209}
210
211impl IndexStatus {
212 pub fn as_str(&self) -> &'static str {
213 match self {
214 IndexStatus::Ready => "Ready",
215 IndexStatus::Building => "Building",
216 IndexStatus::Fallback => "Fallback",
217 IndexStatus::Disabled => "Disabled",
218 }
219 }
220}
221
222#[derive(Clone, Debug, Default)]
223pub struct RegexQuery {
224 pub and_trigrams: Vec<u32>,
225 pub or_groups: Vec<Vec<u32>>,
226 pub(crate) and_filters: HashMap<u32, PostingFilter>,
227 pub(crate) or_filters: Vec<HashMap<u32, PostingFilter>>,
228}
229
230#[derive(Clone, Copy, Debug, Default)]
231pub(crate) struct PostingFilter {
232 next_mask: u8,
233 loc_mask: u8,
234}
235
236#[derive(Clone, Debug, Default)]
237struct QueryBuild {
238 and_runs: Vec<Vec<u8>>,
239 or_groups: Vec<Vec<Vec<u8>>>,
240}
241
242#[derive(Clone, Debug, Default)]
243pub(crate) struct PathFilters {
244 includes: Option<GlobSet>,
245 excludes: Option<GlobSet>,
246}
247
248#[derive(Clone, Debug)]
249pub(crate) struct SearchScope {
250 pub root: PathBuf,
251 pub use_index: bool,
252}
253
254#[derive(Clone, Debug)]
255struct SharedGrepMatch {
256 file: Arc<PathBuf>,
257 line: u32,
258 column: u32,
259 line_text: String,
260 match_text: String,
261}
262
263#[derive(Clone, Debug)]
264enum SearchMatcher {
265 Literal(LiteralSearch),
266 Regex(Regex),
267}
268
269impl SearchIndex {
270 pub fn new() -> Self {
271 SearchIndex {
272 postings: HashMap::new(),
273 files: Vec::new(),
274 path_to_id: HashMap::new(),
275 ready: false,
276 project_root: PathBuf::new(),
277 git_head: None,
278 max_file_size: DEFAULT_MAX_FILE_SIZE,
279 file_trigrams: HashMap::new(),
280 unindexed_files: HashSet::new(),
281 }
282 }
283
284 pub fn build(root: &Path) -> Self {
285 Self::build_with_limit(root, DEFAULT_MAX_FILE_SIZE)
286 }
287
288 pub(crate) fn build_with_limit(root: &Path, max_file_size: u64) -> Self {
289 let project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
290 let mut index = SearchIndex {
291 project_root: project_root.clone(),
292 max_file_size,
293 ..SearchIndex::new()
294 };
295
296 let filters = PathFilters::default();
297 for path in walk_project_files(&project_root, &filters) {
298 index.update_file(&path);
299 }
300
301 index.git_head = current_git_head(&project_root);
302 index.ready = true;
303 index
304 }
305
306 pub fn index_file(&mut self, path: &Path, content: &[u8]) {
307 self.remove_file(path);
308
309 let file_id = match self.allocate_file_id(path, content.len() as u64) {
310 Some(file_id) => file_id,
311 None => return,
312 };
313 if let Some(file) = self.files.get_mut(file_id as usize) {
314 file.content_hash = cache_freshness::hash_bytes(content);
315 }
316
317 let mut trigram_map: BTreeMap<u32, PostingFilter> = BTreeMap::new();
318 for (trigram, next_char, position) in extract_trigrams(content) {
319 let entry = trigram_map.entry(trigram).or_default();
320 entry.next_mask |= mask_for_next_char(next_char);
321 entry.loc_mask |= mask_for_position(position);
322 }
323
324 let mut file_trigrams = Vec::with_capacity(trigram_map.len());
325 for (trigram, filter) in trigram_map {
326 let postings = self.postings.entry(trigram).or_default();
327 postings.push(Posting {
328 file_id,
329 next_mask: filter.next_mask,
330 loc_mask: filter.loc_mask,
331 });
332 if postings.len() > 1
336 && postings[postings.len() - 2].file_id > postings[postings.len() - 1].file_id
337 {
338 postings.sort_unstable_by_key(|p| p.file_id);
339 }
340 file_trigrams.push(trigram);
341 }
342
343 self.file_trigrams.insert(file_id, file_trigrams);
344 self.unindexed_files.remove(&file_id);
345 }
346
347 pub fn remove_file(&mut self, path: &Path) {
348 let Some(file_id) = self.path_to_id.remove(path) else {
349 return;
350 };
351
352 if let Some(trigrams) = self.file_trigrams.remove(&file_id) {
353 for trigram in trigrams {
354 let should_remove = if let Some(postings) = self.postings.get_mut(&trigram) {
355 postings.retain(|posting| posting.file_id != file_id);
356 postings.is_empty()
357 } else {
358 false
359 };
360
361 if should_remove {
362 self.postings.remove(&trigram);
363 }
364 }
365 }
366
367 self.unindexed_files.remove(&file_id);
368 if let Some(file) = self.files.get_mut(file_id as usize) {
369 file.path = PathBuf::new();
370 file.size = 0;
371 file.modified = UNIX_EPOCH;
372 file.content_hash = cache_freshness::zero_hash();
373 }
374 }
375
376 pub fn update_file(&mut self, path: &Path) {
377 self.remove_file(path);
378
379 let metadata = match fs::metadata(path) {
380 Ok(metadata) if metadata.is_file() => metadata,
381 _ => return,
382 };
383
384 if is_binary_path(path, metadata.len()) {
385 self.track_unindexed_file(path, &metadata);
386 return;
387 }
388
389 if metadata.len() > self.max_file_size {
390 self.track_unindexed_file(path, &metadata);
391 return;
392 }
393
394 let content = match fs::read(path) {
395 Ok(content) => content,
396 Err(_) => return,
397 };
398
399 if is_binary_bytes(&content) {
400 self.track_unindexed_file(path, &metadata);
401 return;
402 }
403
404 self.index_file(path, &content);
405 }
406
407 pub fn grep(
408 &self,
409 pattern: &str,
410 case_sensitive: bool,
411 include: &[String],
412 exclude: &[String],
413 search_root: &Path,
414 max_results: usize,
415 ) -> GrepResult {
416 match pattern_compile::compile(
417 pattern,
418 CompileOpts {
419 case_insensitive: !case_sensitive,
420 ..CompileOpts::default()
421 },
422 ) {
423 CompileResult::Ok(compiled) => {
424 self.search_grep(&compiled, include, exclude, search_root, max_results)
425 }
426 CompileResult::InvalidPattern { .. } | CompileResult::UnsupportedSyntax { .. } => {
427 self.empty_grep_result()
428 }
429 }
430 }
431
432 pub fn search_grep(
433 &self,
434 pattern: &CompiledPattern,
435 include: &[String],
436 exclude: &[String],
437 search_root: &Path,
438 max_results: usize,
439 ) -> GrepResult {
440 let matcher = match pattern {
441 CompiledPattern::Literal(literal) => SearchMatcher::Literal(literal.clone()),
442 CompiledPattern::Regex { compiled, .. } => SearchMatcher::Regex(compiled.clone()),
443 };
444
445 let filters = match build_path_filters(include, exclude) {
446 Ok(filters) => filters,
447 Err(_) => PathFilters::default(),
448 };
449 let search_root = canonicalize_or_normalize(search_root);
450
451 let raw_pattern = pattern.raw_pattern_for_trigrams();
452 let query = if pattern.case_insensitive() && !raw_pattern.is_ascii() {
453 RegexQuery::default()
454 } else {
455 decompose_regex(&raw_pattern)
456 };
457 let fully_degraded = query.and_trigrams.is_empty() && query.or_groups.is_empty();
458 let candidate_ids = self.candidates(&query);
459
460 let candidate_files: Vec<&FileEntry> = candidate_ids
461 .into_iter()
462 .filter_map(|file_id| self.files.get(file_id as usize))
463 .filter(|file| !file.path.as_os_str().is_empty())
464 .filter(|file| is_within_search_root(&search_root, &file.path))
465 .filter(|file| filters.matches(&self.project_root, &file.path))
466 .collect();
467
468 let total_matches = AtomicUsize::new(0);
469 let files_searched = AtomicUsize::new(0);
470 let files_with_matches = AtomicUsize::new(0);
471 let truncated = AtomicBool::new(false);
472 let engine_capped = AtomicBool::new(false);
473 let stop_after = max_results.saturating_mul(2);
474
475 let mut matches = if candidate_files.len() > 10 {
476 candidate_files
477 .par_iter()
478 .map(|file| {
479 search_candidate_file(
480 file,
481 &matcher,
482 max_results,
483 stop_after,
484 &total_matches,
485 &files_searched,
486 &files_with_matches,
487 &truncated,
488 &engine_capped,
489 )
490 })
491 .reduce(Vec::new, |mut left, mut right| {
492 left.append(&mut right);
493 left
494 })
495 } else {
496 let mut matches = Vec::new();
497 for file in candidate_files {
498 matches.extend(search_candidate_file(
499 file,
500 &matcher,
501 max_results,
502 stop_after,
503 &total_matches,
504 &files_searched,
505 &files_with_matches,
506 &truncated,
507 &engine_capped,
508 ));
509
510 if should_stop_search(&truncated, &total_matches, stop_after) {
511 engine_capped.store(true, Ordering::Relaxed);
512 break;
513 }
514 }
515 matches
516 };
517
518 sort_shared_grep_matches_by_cached_mtime_desc(&mut matches, |path| {
519 self.path_to_id
520 .get(path)
521 .and_then(|file_id| self.files.get(*file_id as usize))
522 .map(|file| file.modified)
523 });
524
525 let matches = matches
526 .into_iter()
527 .map(|matched| GrepMatch {
528 file: matched.file.as_ref().clone(),
529 line: matched.line,
530 column: matched.column,
531 line_text: matched.line_text,
532 match_text: matched.match_text,
533 })
534 .collect();
535
536 GrepResult {
537 total_matches: total_matches.load(Ordering::Relaxed),
538 matches,
539 files_searched: files_searched.load(Ordering::Relaxed),
540 files_with_matches: files_with_matches.load(Ordering::Relaxed),
541 index_status: if self.ready {
542 IndexStatus::Ready
543 } else {
544 IndexStatus::Building
545 },
546 truncated: truncated.load(Ordering::Relaxed),
547 fully_degraded,
548 engine_capped: engine_capped.load(Ordering::Relaxed),
549 }
550 }
551
552 fn empty_grep_result(&self) -> GrepResult {
553 GrepResult {
554 matches: Vec::new(),
555 total_matches: 0,
556 files_searched: 0,
557 files_with_matches: 0,
558 index_status: if self.ready {
559 IndexStatus::Ready
560 } else {
561 IndexStatus::Building
562 },
563 truncated: false,
564 fully_degraded: false,
565 engine_capped: false,
566 }
567 }
568
569 pub fn glob(&self, pattern: &str, search_root: &Path) -> Vec<PathBuf> {
570 let filters = match build_path_filters(&[pattern.to_string()], &[]) {
571 Ok(filters) => filters,
572 Err(_) => return Vec::new(),
573 };
574 let search_root = canonicalize_or_normalize(search_root);
575 let mut entries = self
576 .files
577 .iter()
578 .filter(|file| !file.path.as_os_str().is_empty())
579 .filter(|file| is_within_search_root(&search_root, &file.path))
580 .filter(|file| filters.matches(&self.project_root, &file.path))
581 .map(|file| (file.path.clone(), file.modified))
582 .collect::<Vec<_>>();
583
584 entries.sort_by(|(left_path, left_mtime), (right_path, right_mtime)| {
585 right_mtime
586 .cmp(left_mtime)
587 .then_with(|| left_path.cmp(right_path))
588 });
589
590 entries.into_iter().map(|(path, _)| path).collect()
591 }
592
593 pub fn candidates(&self, query: &RegexQuery) -> Vec<u32> {
594 if query.and_trigrams.is_empty() && query.or_groups.is_empty() {
595 return self.active_file_ids();
596 }
597
598 let mut and_trigrams = query.and_trigrams.clone();
599 and_trigrams.sort_unstable_by_key(|trigram| self.postings.get(trigram).map_or(0, Vec::len));
600
601 let mut current: Option<Vec<u32>> = None;
602
603 for trigram in and_trigrams {
604 let filter = query.and_filters.get(&trigram).copied();
605 let matches = self.postings_for_trigram(trigram, filter);
606 current = Some(match current.take() {
607 Some(existing) => intersect_sorted_ids(&existing, &matches),
608 None => matches,
609 });
610
611 if current.as_ref().is_some_and(|ids| ids.is_empty()) {
612 break;
613 }
614 }
615
616 let mut current = current.unwrap_or_else(|| self.active_file_ids());
617
618 for (index, group) in query.or_groups.iter().enumerate() {
619 let mut group_matches = Vec::new();
620 let filters = query.or_filters.get(index);
621
622 for trigram in group {
623 let filter = filters.and_then(|filters| filters.get(trigram).copied());
624 let matches = self.postings_for_trigram(*trigram, filter);
625 if group_matches.is_empty() {
626 group_matches = matches;
627 } else {
628 group_matches = union_sorted_ids(&group_matches, &matches);
629 }
630 }
631
632 current = intersect_sorted_ids(¤t, &group_matches);
633 if current.is_empty() {
634 break;
635 }
636 }
637
638 let mut unindexed = self
639 .unindexed_files
640 .iter()
641 .copied()
642 .filter(|file_id| self.is_active_file(*file_id))
643 .collect::<Vec<_>>();
644 if !unindexed.is_empty() {
645 unindexed.sort_unstable();
646 current = union_sorted_ids(¤t, &unindexed);
647 }
648
649 current
650 }
651
652 pub fn write_to_disk(&self, cache_dir: &Path, git_head: Option<&str>) {
653 if fs::create_dir_all(cache_dir).is_err() {
654 return;
655 }
656
657 let cache_path = cache_dir.join("cache.bin");
658 let tmp_cache = cache_dir.join(format!(
659 "cache.bin.tmp.{}.{}",
660 std::process::id(),
661 SystemTime::now()
662 .duration_since(UNIX_EPOCH)
663 .unwrap_or(Duration::ZERO)
664 .as_nanos()
665 ));
666
667 let active_ids = self.active_file_ids();
668 let mut id_map = HashMap::new();
669 for (new_id, old_id) in active_ids.iter().enumerate() {
670 let Ok(new_id_u32) = u32::try_from(new_id) else {
671 return;
672 };
673 id_map.insert(*old_id, new_id_u32);
674 }
675
676 let write_result = (|| -> std::io::Result<()> {
677 let mut postings_writer = BufWriter::new(Cursor::new(Vec::new()));
678
679 postings_writer.write_all(INDEX_MAGIC)?;
680 write_u32(&mut postings_writer, INDEX_VERSION)?;
681
682 let head = git_head.unwrap_or_default();
683 let root = self.project_root.to_string_lossy();
684 let head_len = u32::try_from(head.len())
685 .map_err(|_| std::io::Error::other("git head too large to cache"))?;
686 let root_len = u32::try_from(root.len())
687 .map_err(|_| std::io::Error::other("project root too large to cache"))?;
688 let file_count = u32::try_from(active_ids.len())
689 .map_err(|_| std::io::Error::other("too many files to cache"))?;
690
691 write_u32(&mut postings_writer, head_len)?;
692 write_u32(&mut postings_writer, root_len)?;
693 write_u64(&mut postings_writer, self.max_file_size)?;
694 write_u32(&mut postings_writer, file_count)?;
695 postings_writer.write_all(head.as_bytes())?;
696 postings_writer.write_all(root.as_bytes())?;
697
698 for old_id in &active_ids {
699 let Some(file) = self.files.get(*old_id as usize) else {
700 return Err(std::io::Error::other("missing file entry for cache write"));
701 };
702 let path =
703 cache_relative_path(&self.project_root, &file.path).ok_or_else(|| {
704 std::io::Error::other(format!(
705 "refusing to cache path outside project root: {}",
706 file.path.display()
707 ))
708 })?;
709 let path = path.to_string_lossy();
710 let path_len = u32::try_from(path.len())
711 .map_err(|_| std::io::Error::other("cached path too large"))?;
712 let modified = file
713 .modified
714 .duration_since(UNIX_EPOCH)
715 .unwrap_or(Duration::ZERO);
716 let unindexed = if self.unindexed_files.contains(old_id) {
717 1u8
718 } else {
719 0u8
720 };
721
722 postings_writer.write_all(&[unindexed])?;
723 write_u32(&mut postings_writer, path_len)?;
724 write_u64(&mut postings_writer, file.size)?;
725 write_u64(&mut postings_writer, modified.as_secs())?;
726 write_u32(&mut postings_writer, modified.subsec_nanos())?;
727 postings_writer.write_all(file.content_hash.as_bytes())?;
728 postings_writer.write_all(path.as_bytes())?;
729 }
730
731 let mut lookup_entries = Vec::new();
732 let mut postings_blob = Vec::new();
733 let mut sorted_postings: Vec<_> = self.postings.iter().collect();
734 sorted_postings.sort_by_key(|(trigram, _)| **trigram);
735
736 for (trigram, postings) in sorted_postings {
737 let offset = u64::try_from(postings_blob.len())
738 .map_err(|_| std::io::Error::other("postings blob too large"))?;
739 let mut count = 0u32;
740
741 for posting in postings {
742 let Some(mapped_file_id) = id_map.get(&posting.file_id).copied() else {
743 continue;
744 };
745
746 postings_blob.extend_from_slice(&mapped_file_id.to_le_bytes());
747 postings_blob.push(posting.next_mask);
748 postings_blob.push(posting.loc_mask);
749 count = count.saturating_add(1);
750 }
751
752 if count > 0 {
753 lookup_entries.push((*trigram, offset, count));
754 }
755 }
756
757 write_u64(
758 &mut postings_writer,
759 u64::try_from(postings_blob.len())
760 .map_err(|_| std::io::Error::other("postings blob too large"))?,
761 )?;
762 postings_writer.write_all(&postings_blob)?;
763 postings_writer.flush()?;
764 let mut postings_blob_file = postings_writer
765 .into_inner()
766 .map_err(|error| std::io::Error::other(error.to_string()))?
767 .into_inner();
768 let checksum = crc32fast::hash(&postings_blob_file);
769 postings_blob_file.extend_from_slice(&checksum.to_le_bytes());
770
771 let mut lookup_writer = BufWriter::new(Cursor::new(Vec::new()));
772 let entry_count = u32::try_from(lookup_entries.len())
773 .map_err(|_| std::io::Error::other("too many lookup entries to cache"))?;
774
775 lookup_writer.write_all(LOOKUP_MAGIC)?;
776 write_u32(&mut lookup_writer, INDEX_VERSION)?;
777 write_u32(&mut lookup_writer, entry_count)?;
778
779 for (trigram, offset, count) in lookup_entries {
780 write_u32(&mut lookup_writer, trigram)?;
781 write_u64(&mut lookup_writer, offset)?;
782 write_u32(&mut lookup_writer, count)?;
783 }
784
785 lookup_writer.flush()?;
786 let mut lookup_blob_file = lookup_writer
787 .into_inner()
788 .map_err(|error| std::io::Error::other(error.to_string()))?
789 .into_inner();
790 let checksum = crc32fast::hash(&lookup_blob_file);
791 lookup_blob_file.extend_from_slice(&checksum.to_le_bytes());
792
793 let mut cache_writer = BufWriter::new(File::create(&tmp_cache)?);
794 write_u32(&mut cache_writer, CACHE_MAGIC)?;
795 write_u32(&mut cache_writer, INDEX_VERSION)?;
796 write_u64(
797 &mut cache_writer,
798 u64::try_from(postings_blob_file.len())
799 .map_err(|_| std::io::Error::other("postings section too large"))?,
800 )?;
801 cache_writer.write_all(&postings_blob_file)?;
802 cache_writer.write_all(&lookup_blob_file)?;
803 cache_writer.flush()?;
804 cache_writer.get_ref().sync_all()?;
805 drop(cache_writer);
806 fs::rename(&tmp_cache, &cache_path)?;
807
808 Ok(())
809 })();
810
811 if write_result.is_err() {
812 let _ = fs::remove_file(&tmp_cache);
813 }
814 }
815
816 pub fn read_from_disk(cache_dir: &Path, current_canonical_root: &Path) -> Option<Self> {
817 debug_assert!(current_canonical_root.is_absolute());
818 let cache_path = cache_dir.join("cache.bin");
819 let cache_bytes = fs::read(&cache_path).ok()?;
820 if cache_bytes.len() < 16 {
821 return None;
822 }
823 let mut header = Cursor::new(&cache_bytes);
824 if read_u32(&mut header).ok()? != CACHE_MAGIC {
825 return None;
826 }
827 if read_u32(&mut header).ok()? != INDEX_VERSION {
828 return None;
829 }
830 let postings_len_total = usize::try_from(read_u64(&mut header).ok()?).ok()?;
831 let start = usize::try_from(header.position()).ok()?;
832 let postings_end = start.checked_add(postings_len_total)?;
833 if postings_end > cache_bytes.len() {
834 return None;
835 }
836 let postings_bytes = &cache_bytes[start..postings_end];
837 let lookup_bytes = &cache_bytes[postings_end..];
838 let lookup_len_total = lookup_bytes.len();
839 let mut postings_reader = BufReader::new(Cursor::new(postings_bytes));
840 let mut lookup_reader = BufReader::new(Cursor::new(lookup_bytes));
841 if postings_len_total < 4 || lookup_len_total < 4 {
842 return None;
843 }
844 verify_crc32_bytes_slice(postings_bytes).ok()?;
845 verify_crc32_bytes_slice(lookup_bytes).ok()?;
846
847 let mut magic = [0u8; 8];
848 postings_reader.read_exact(&mut magic).ok()?;
849 if &magic != INDEX_MAGIC {
850 return None;
851 }
852 if read_u32(&mut postings_reader).ok()? != INDEX_VERSION {
853 return None;
854 }
855
856 let head_len = read_u32(&mut postings_reader).ok()? as usize;
857 let root_len = read_u32(&mut postings_reader).ok()? as usize;
858 let max_file_size = read_u64(&mut postings_reader).ok()?;
859 let file_count = read_u32(&mut postings_reader).ok()? as usize;
860 if file_count > MAX_ENTRIES {
861 return None;
862 }
863 let postings_body_len = postings_len_total.checked_sub(4)?;
864 let lookup_body_len = lookup_len_total.checked_sub(4)?;
865
866 let remaining_postings = remaining_bytes(&mut postings_reader, postings_body_len)?;
867 let minimum_file_bytes = file_count.checked_mul(MIN_FILE_ENTRY_BYTES)?;
868 if minimum_file_bytes > remaining_postings {
869 return None;
870 }
871
872 if head_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
873 return None;
874 }
875 let mut head_bytes = vec![0u8; head_len];
876 postings_reader.read_exact(&mut head_bytes).ok()?;
877 let git_head = String::from_utf8(head_bytes)
878 .ok()
879 .filter(|head| !head.is_empty());
880
881 if root_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
882 return None;
883 }
884 let mut root_bytes = vec![0u8; root_len];
885 postings_reader.read_exact(&mut root_bytes).ok()?;
886 let _stored_project_root = PathBuf::from(String::from_utf8(root_bytes).ok()?);
887 let project_root = current_canonical_root.to_path_buf();
888
889 let mut files = Vec::with_capacity(file_count);
890 let mut path_to_id = HashMap::new();
891 let mut unindexed_files = HashSet::new();
892
893 for file_id in 0..file_count {
894 let mut unindexed = [0u8; 1];
895 postings_reader.read_exact(&mut unindexed).ok()?;
896 let path_len = read_u32(&mut postings_reader).ok()? as usize;
897 let size = read_u64(&mut postings_reader).ok()?;
898 let secs = read_u64(&mut postings_reader).ok()?;
899 let nanos = read_u32(&mut postings_reader).ok()?;
900 let mut hash_bytes = [0u8; 32];
901 postings_reader.read_exact(&mut hash_bytes).ok()?;
902 let content_hash = blake3::Hash::from_bytes(hash_bytes);
903 if nanos >= 1_000_000_000 {
904 return None;
905 }
906 if path_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
907 return None;
908 }
909 let mut path_bytes = vec![0u8; path_len];
910 postings_reader.read_exact(&mut path_bytes).ok()?;
911 let relative_path = PathBuf::from(String::from_utf8(path_bytes).ok()?);
912 let full_path = cached_path_under_root(&project_root, &relative_path)?;
913 let file_id_u32 = u32::try_from(file_id).ok()?;
914
915 files.push(FileEntry {
916 path: full_path.clone(),
917 size,
918 modified: UNIX_EPOCH + Duration::new(secs, nanos),
919 content_hash,
920 });
921 path_to_id.insert(full_path, file_id_u32);
922 if unindexed[0] == 1 {
923 unindexed_files.insert(file_id_u32);
924 }
925 }
926
927 let postings_len = read_u64(&mut postings_reader).ok()? as usize;
928 let max_postings_bytes = MAX_ENTRIES.checked_mul(POSTING_BYTES)?;
929 if postings_len > max_postings_bytes {
930 return None;
931 }
932 if postings_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
933 return None;
934 }
935 let mut postings_blob = vec![0u8; postings_len];
936 postings_reader.read_exact(&mut postings_blob).ok()?;
937
938 let mut lookup_magic = [0u8; 8];
939 lookup_reader.read_exact(&mut lookup_magic).ok()?;
940 if &lookup_magic != LOOKUP_MAGIC {
941 return None;
942 }
943 if read_u32(&mut lookup_reader).ok()? != INDEX_VERSION {
944 return None;
945 }
946 let entry_count = read_u32(&mut lookup_reader).ok()? as usize;
947 if entry_count > MAX_ENTRIES {
948 return None;
949 }
950 let remaining_lookup = remaining_bytes(&mut lookup_reader, lookup_body_len)?;
951 let minimum_lookup_bytes = entry_count.checked_mul(LOOKUP_ENTRY_BYTES)?;
952 if minimum_lookup_bytes > remaining_lookup {
953 return None;
954 }
955
956 let mut postings = HashMap::new();
957 let mut file_trigrams: HashMap<u32, Vec<u32>> = HashMap::new();
958
959 for _ in 0..entry_count {
960 let trigram = read_u32(&mut lookup_reader).ok()?;
961 let offset = read_u64(&mut lookup_reader).ok()? as usize;
962 let count = read_u32(&mut lookup_reader).ok()? as usize;
963 if count > MAX_ENTRIES {
964 return None;
965 }
966 let bytes_len = count.checked_mul(POSTING_BYTES)?;
967 let end = offset.checked_add(bytes_len)?;
968 if end > postings_blob.len() {
969 return None;
970 }
971
972 let mut trigram_postings = Vec::with_capacity(count);
973 for chunk in postings_blob[offset..end].chunks_exact(6) {
974 let file_id = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
975 let posting = Posting {
976 file_id,
977 next_mask: chunk[4],
978 loc_mask: chunk[5],
979 };
980 trigram_postings.push(posting.clone());
981 file_trigrams.entry(file_id).or_default().push(trigram);
982 }
983 postings.insert(trigram, trigram_postings);
984 }
985
986 Some(SearchIndex {
987 postings,
988 files,
989 path_to_id,
990 ready: false,
991 project_root,
992 git_head,
993 max_file_size,
994 file_trigrams,
995 unindexed_files,
996 })
997 }
998
999 pub(crate) fn stored_git_head(&self) -> Option<&str> {
1000 self.git_head.as_deref()
1001 }
1002
1003 pub(crate) fn set_ready(&mut self, ready: bool) {
1004 self.ready = ready;
1005 }
1006
1007 pub(crate) fn verify_against_disk(&mut self, current_head: Option<String>) {
1008 self.git_head = current_head;
1009 verify_file_mtimes(self);
1010 self.ready = true;
1011 }
1012
1013 pub(crate) fn rebuild_or_refresh(
1014 root: &Path,
1015 max_file_size: u64,
1016 current_head: Option<String>,
1017 baseline: Option<SearchIndex>,
1018 ) -> Self {
1019 if let Some(mut baseline) = baseline {
1020 baseline.project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
1021 baseline.max_file_size = max_file_size;
1022
1023 if baseline.git_head == current_head || current_head.is_none() {
1024 baseline.git_head = current_head;
1031 verify_file_mtimes(&mut baseline);
1032 baseline.ready = true;
1033 return baseline;
1034 }
1035
1036 if let (Some(previous), Some(current)) =
1037 (baseline.git_head.clone(), current_head.clone())
1038 {
1039 let project_root = baseline.project_root.clone();
1040 if apply_git_diff_updates(&mut baseline, &project_root, &previous, ¤t) {
1041 baseline.git_head = Some(current);
1042 verify_file_mtimes(&mut baseline);
1043 baseline.ready = true;
1044 return baseline;
1045 }
1046 }
1047 }
1048
1049 SearchIndex::build_with_limit(root, max_file_size)
1050 }
1051
1052 fn allocate_file_id(&mut self, path: &Path, size_hint: u64) -> Option<u32> {
1053 let file_id = u32::try_from(self.files.len()).ok()?;
1054 let metadata = fs::metadata(path).ok();
1055 let size = metadata
1056 .as_ref()
1057 .map_or(size_hint, |metadata| metadata.len());
1058 let modified = metadata
1059 .and_then(|metadata| metadata.modified().ok())
1060 .unwrap_or(UNIX_EPOCH);
1061
1062 self.files.push(FileEntry {
1063 path: path.to_path_buf(),
1064 size,
1065 modified,
1066 content_hash: cache_freshness::zero_hash(),
1067 });
1068 self.path_to_id.insert(path.to_path_buf(), file_id);
1069 Some(file_id)
1070 }
1071
1072 fn track_unindexed_file(&mut self, path: &Path, metadata: &fs::Metadata) {
1073 let Some(file_id) = self.allocate_file_id(path, metadata.len()) else {
1074 return;
1075 };
1076 self.unindexed_files.insert(file_id);
1077 self.file_trigrams.insert(file_id, Vec::new());
1078 }
1079
1080 fn active_file_ids(&self) -> Vec<u32> {
1081 let mut ids: Vec<u32> = self.path_to_id.values().copied().collect();
1082 ids.sort_unstable();
1083 ids
1084 }
1085
1086 fn is_active_file(&self, file_id: u32) -> bool {
1087 self.files
1088 .get(file_id as usize)
1089 .map(|file| !file.path.as_os_str().is_empty())
1090 .unwrap_or(false)
1091 }
1092
1093 fn postings_for_trigram(&self, trigram: u32, filter: Option<PostingFilter>) -> Vec<u32> {
1094 let Some(postings) = self.postings.get(&trigram) else {
1095 return Vec::new();
1096 };
1097
1098 let mut matches = Vec::with_capacity(postings.len());
1099
1100 for posting in postings {
1101 if let Some(filter) = filter {
1102 if filter.next_mask != 0 && posting.next_mask & filter.next_mask == 0 {
1105 continue;
1106 }
1107 }
1112 if self.is_active_file(posting.file_id) {
1113 matches.push(posting.file_id);
1114 }
1115 }
1116
1117 matches
1118 }
1119}
1120
1121fn search_candidate_file(
1122 file: &FileEntry,
1123 matcher: &SearchMatcher,
1124 max_results: usize,
1125 stop_after: usize,
1126 total_matches: &AtomicUsize,
1127 files_searched: &AtomicUsize,
1128 files_with_matches: &AtomicUsize,
1129 truncated: &AtomicBool,
1130 engine_capped: &AtomicBool,
1131) -> Vec<SharedGrepMatch> {
1132 if should_stop_search(truncated, total_matches, stop_after) {
1133 engine_capped.store(true, Ordering::Relaxed);
1134 return Vec::new();
1135 }
1136
1137 let content = match read_indexed_file_bytes(&file.path) {
1138 Some(content) => content,
1139 None => return Vec::new(),
1140 };
1141 if is_binary_bytes(&content) {
1148 return Vec::new();
1149 }
1150 files_searched.fetch_add(1, Ordering::Relaxed);
1151
1152 let shared_path = Arc::new(file.path.clone());
1153 let mut matches = Vec::new();
1154 let mut line_starts = None;
1155 let mut seen_lines = HashSet::new();
1156 let mut matched_this_file = false;
1157
1158 match matcher {
1159 SearchMatcher::Literal(literal) if !literal.case_insensitive_ascii => {
1160 let needle = &literal.needle;
1161 let finder = memchr::memmem::Finder::new(needle);
1162 let mut start = 0;
1163
1164 while let Some(position) = finder.find(&content[start..]) {
1165 if should_stop_search(truncated, total_matches, stop_after) {
1166 engine_capped.store(true, Ordering::Relaxed);
1167 break;
1168 }
1169
1170 let offset = start + position;
1171 start = offset + 1;
1172
1173 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1174 let (line, column, line_text) = line_details_bytes(&content, line_starts, offset);
1175 if !seen_lines.insert(line) {
1176 continue;
1177 }
1178
1179 matched_this_file = true;
1180 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1181 if match_number > max_results {
1182 truncated.store(true, Ordering::Relaxed);
1183 break;
1184 }
1185
1186 let end = offset + needle.len();
1187 matches.push(SharedGrepMatch {
1188 file: shared_path.clone(),
1189 line,
1190 column,
1191 line_text,
1192 match_text: String::from_utf8_lossy(&content[offset..end]).into_owned(),
1193 });
1194 }
1195 }
1196 SearchMatcher::Literal(literal) => {
1197 let needle = &literal.needle;
1198 let search_content = content.to_ascii_lowercase();
1199 let finder = memchr::memmem::Finder::new(needle);
1200 let mut start = 0;
1201
1202 while let Some(position) = finder.find(&search_content[start..]) {
1203 if should_stop_search(truncated, total_matches, stop_after) {
1204 engine_capped.store(true, Ordering::Relaxed);
1205 break;
1206 }
1207
1208 let offset = start + position;
1209 start = offset + 1;
1210
1211 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1212 let (line, column, line_text) = line_details_bytes(&content, line_starts, offset);
1213 if !seen_lines.insert(line) {
1214 continue;
1215 }
1216
1217 matched_this_file = true;
1218 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1219 if match_number > max_results {
1220 truncated.store(true, Ordering::Relaxed);
1221 break;
1222 }
1223
1224 let end = offset + needle.len();
1225 matches.push(SharedGrepMatch {
1226 file: shared_path.clone(),
1227 line,
1228 column,
1229 line_text,
1230 match_text: String::from_utf8_lossy(&content[offset..end]).into_owned(),
1231 });
1232 }
1233 }
1234 SearchMatcher::Regex(regex) => {
1235 for matched in regex.find_iter(&content) {
1236 if should_stop_search(truncated, total_matches, stop_after) {
1237 engine_capped.store(true, Ordering::Relaxed);
1238 break;
1239 }
1240
1241 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1242 let (line, column, line_text) =
1243 line_details_bytes(&content, line_starts, matched.start());
1244 if !seen_lines.insert(line) {
1245 continue;
1246 }
1247
1248 matched_this_file = true;
1249 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1250 if match_number > max_results {
1251 truncated.store(true, Ordering::Relaxed);
1252 break;
1253 }
1254
1255 matches.push(SharedGrepMatch {
1256 file: shared_path.clone(),
1257 line,
1258 column,
1259 line_text,
1260 match_text: String::from_utf8_lossy(matched.as_bytes()).into_owned(),
1261 });
1262 }
1263 }
1264 }
1265
1266 if matched_this_file {
1267 files_with_matches.fetch_add(1, Ordering::Relaxed);
1268 }
1269
1270 matches
1271}
1272
1273fn should_stop_search(
1274 truncated: &AtomicBool,
1275 total_matches: &AtomicUsize,
1276 stop_after: usize,
1277) -> bool {
1278 truncated.load(Ordering::Relaxed) && total_matches.load(Ordering::Relaxed) >= stop_after
1279}
1280
1281fn intersect_sorted_ids(left: &[u32], right: &[u32]) -> Vec<u32> {
1282 let mut merged = Vec::with_capacity(left.len().min(right.len()));
1283 let mut left_index = 0;
1284 let mut right_index = 0;
1285
1286 while left_index < left.len() && right_index < right.len() {
1287 match left[left_index].cmp(&right[right_index]) {
1288 std::cmp::Ordering::Less => left_index += 1,
1289 std::cmp::Ordering::Greater => right_index += 1,
1290 std::cmp::Ordering::Equal => {
1291 merged.push(left[left_index]);
1292 left_index += 1;
1293 right_index += 1;
1294 }
1295 }
1296 }
1297
1298 merged
1299}
1300
1301fn union_sorted_ids(left: &[u32], right: &[u32]) -> Vec<u32> {
1302 let mut merged = Vec::with_capacity(left.len() + right.len());
1303 let mut left_index = 0;
1304 let mut right_index = 0;
1305
1306 while left_index < left.len() && right_index < right.len() {
1307 match left[left_index].cmp(&right[right_index]) {
1308 std::cmp::Ordering::Less => {
1309 merged.push(left[left_index]);
1310 left_index += 1;
1311 }
1312 std::cmp::Ordering::Greater => {
1313 merged.push(right[right_index]);
1314 right_index += 1;
1315 }
1316 std::cmp::Ordering::Equal => {
1317 merged.push(left[left_index]);
1318 left_index += 1;
1319 right_index += 1;
1320 }
1321 }
1322 }
1323
1324 merged.extend_from_slice(&left[left_index..]);
1325 merged.extend_from_slice(&right[right_index..]);
1326 merged
1327}
1328
1329pub fn decompose_regex(pattern: &str) -> RegexQuery {
1330 let hir = match regex_syntax::parse(pattern) {
1331 Ok(hir) => hir,
1332 Err(_) => return RegexQuery::default(),
1333 };
1334
1335 let build = build_query(&hir);
1336 build.into_query()
1337}
1338
1339pub fn pack_trigram(a: u8, b: u8, c: u8) -> u32 {
1340 ((a as u32) << 16) | ((b as u32) << 8) | c as u32
1341}
1342
1343pub fn normalize_char(c: u8) -> u8 {
1344 c.to_ascii_lowercase()
1345}
1346
1347pub fn extract_trigrams(content: &[u8]) -> Vec<(u32, u8, usize)> {
1348 if content.len() < 3 {
1349 return Vec::new();
1350 }
1351
1352 let mut trigrams = Vec::with_capacity(content.len().saturating_sub(2));
1353 for start in 0..=content.len() - 3 {
1354 let trigram = pack_trigram(
1355 normalize_char(content[start]),
1356 normalize_char(content[start + 1]),
1357 normalize_char(content[start + 2]),
1358 );
1359 let next_char = content.get(start + 3).copied().unwrap_or(EOF_SENTINEL);
1360 trigrams.push((trigram, next_char, start));
1361 }
1362 trigrams
1363}
1364
1365pub fn query_trigrams_from_tokens(tokens: &[&str]) -> Vec<u32> {
1366 let mut seen = HashSet::new();
1367 let mut out = Vec::new();
1368 for token in tokens {
1369 for (trigram, _, _) in extract_trigrams(token.as_bytes()) {
1370 if seen.insert(trigram) {
1371 out.push(trigram);
1372 }
1373 }
1374 }
1375 out
1376}
1377
1378pub fn lexical_score(index: &SearchIndex, query_trigrams: &[u32], file_id: u32) -> f32 {
1379 if query_trigrams.is_empty() {
1380 return 0.0;
1381 }
1382
1383 let mut hits = 0u32;
1384 for &trigram in query_trigrams {
1385 if let Some(postings) = index.postings.get(&trigram) {
1386 if postings
1387 .binary_search_by(|posting| posting.file_id.cmp(&file_id))
1388 .is_ok()
1389 {
1390 hits += 1;
1391 }
1392 }
1393 }
1394
1395 if hits == 0 {
1396 return 0.0;
1397 }
1398
1399 let file_trigram_count = index
1400 .file_trigrams
1401 .get(&file_id)
1402 .map_or(1, |trigrams| trigrams.len().max(1)) as f32;
1403 (hits as f32) / (1.0 + file_trigram_count.ln())
1404}
1405
1406pub fn resolve_cache_dir(project_root: &Path, storage_dir: Option<&Path>) -> PathBuf {
1407 if let Some(override_dir) = std::env::var_os("AFT_CACHE_DIR") {
1409 return PathBuf::from(override_dir)
1410 .join("index")
1411 .join(project_cache_key(project_root));
1412 }
1413 if let Some(dir) = storage_dir {
1415 return dir.join("index").join(project_cache_key(project_root));
1416 }
1417 let home = std::env::var_os("HOME")
1422 .or_else(|| std::env::var_os("USERPROFILE"))
1423 .map(PathBuf::from)
1424 .unwrap_or_else(std::env::temp_dir);
1425 home.join(".cache")
1426 .join("aft")
1427 .join("index")
1428 .join(project_cache_key(project_root))
1429}
1430
1431pub(crate) fn build_path_filters(
1432 include: &[String],
1433 exclude: &[String],
1434) -> Result<PathFilters, String> {
1435 Ok(PathFilters {
1436 includes: build_globset(include)?,
1437 excludes: build_globset(exclude)?,
1438 })
1439}
1440
1441pub(crate) fn walk_project_files(root: &Path, filters: &PathFilters) -> Vec<PathBuf> {
1442 walk_project_files_from(root, root, filters)
1443}
1444
1445pub(crate) fn walk_project_files_from(
1446 filter_root: &Path,
1447 search_root: &Path,
1448 filters: &PathFilters,
1449) -> Vec<PathBuf> {
1450 let mut builder = WalkBuilder::new(search_root);
1451 builder
1452 .hidden(false)
1453 .git_ignore(true)
1454 .git_global(true)
1455 .git_exclude(true)
1456 .filter_entry(|entry| {
1457 let name = entry.file_name().to_string_lossy();
1458 if entry.file_type().map_or(false, |ft| ft.is_dir()) {
1459 return !matches!(
1460 name.as_ref(),
1461 "node_modules"
1462 | "target"
1463 | "venv"
1464 | ".venv"
1465 | ".git"
1466 | "__pycache__"
1467 | ".tox"
1468 | "dist"
1469 | "build"
1470 );
1471 }
1472 true
1473 });
1474
1475 let mut files = Vec::new();
1476 for entry in builder.build().filter_map(|entry| entry.ok()) {
1477 if !entry
1478 .file_type()
1479 .map_or(false, |file_type| file_type.is_file())
1480 {
1481 continue;
1482 }
1483 let path = entry.into_path();
1484 if filters.matches(filter_root, &path) {
1485 files.push(path);
1486 }
1487 }
1488
1489 sort_paths_by_mtime_desc(&mut files);
1490 files
1491}
1492
1493pub(crate) fn read_searchable_text(path: &Path) -> Option<String> {
1494 let bytes = fs::read(path).ok()?;
1495 if is_binary_bytes(&bytes) {
1496 return None;
1497 }
1498 String::from_utf8(bytes).ok()
1499}
1500
1501fn read_indexed_file_bytes(path: &Path) -> Option<Vec<u8>> {
1502 fs::read(path).ok()
1503}
1504
1505pub(crate) fn relative_to_root(root: &Path, path: &Path) -> PathBuf {
1506 path.strip_prefix(root)
1507 .map(PathBuf::from)
1508 .unwrap_or_else(|_| path.to_path_buf())
1509}
1510
1511pub(crate) fn cache_relative_path(root: &Path, path: &Path) -> Option<PathBuf> {
1512 let normalized_root = normalize_path(root);
1513 let normalized_path = normalize_path(path);
1514 let relative = normalized_path.strip_prefix(&normalized_root).ok()?;
1515 validate_cached_relative_path(relative)
1516}
1517
1518pub(crate) fn cached_path_under_root(root: &Path, relative_path: &Path) -> Option<PathBuf> {
1519 let relative = validate_cached_relative_path(relative_path)?;
1520 let normalized_root = normalize_path(root);
1521 let full_path = normalize_path(&normalized_root.join(relative));
1522
1523 match fs::canonicalize(&full_path) {
1524 Ok(canonical_path) => {
1525 if canonical_path.starts_with(&normalized_root) {
1526 return Some(full_path);
1527 }
1528
1529 let canonical_root = fs::canonicalize(&normalized_root).ok()?;
1530 canonical_path
1531 .starts_with(&canonical_root)
1532 .then_some(full_path)
1533 }
1534 Err(_) => full_path.starts_with(&normalized_root).then_some(full_path),
1535 }
1536}
1537
1538pub(crate) fn validate_cached_relative_path(path: &Path) -> Option<PathBuf> {
1539 if path.is_absolute() {
1540 return None;
1541 }
1542
1543 let mut normalized = PathBuf::new();
1544 for component in path.components() {
1545 match component {
1546 Component::Normal(part) => normalized.push(part),
1547 Component::CurDir => {}
1548 Component::ParentDir | Component::RootDir | Component::Prefix(_) => return None,
1549 }
1550 }
1551 (!normalized.as_os_str().is_empty()).then_some(normalized)
1552}
1553
1554pub(crate) fn sort_paths_by_mtime_desc(paths: &mut [PathBuf]) {
1567 use std::collections::HashMap;
1568 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::with_capacity(paths.len());
1569 for path in paths.iter() {
1570 mtimes
1571 .entry(path.clone())
1572 .or_insert_with(|| path_modified_time(path));
1573 }
1574 paths.sort_by(|left, right| {
1575 let left_mtime = mtimes.get(left).and_then(|v| *v);
1576 let right_mtime = mtimes.get(right).and_then(|v| *v);
1577 right_mtime.cmp(&left_mtime).then_with(|| left.cmp(right))
1578 });
1579}
1580
1581pub(crate) fn sort_grep_matches_by_mtime_desc(matches: &mut [GrepMatch], project_root: &Path) {
1584 use std::collections::HashMap;
1585 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::new();
1586 for m in matches.iter() {
1587 mtimes.entry(m.file.clone()).or_insert_with(|| {
1588 let resolved = resolve_match_path(project_root, &m.file);
1589 path_modified_time(&resolved)
1590 });
1591 }
1592 matches.sort_by(|left, right| {
1593 let left_mtime = mtimes.get(&left.file).and_then(|v| *v);
1594 let right_mtime = mtimes.get(&right.file).and_then(|v| *v);
1595 right_mtime
1596 .cmp(&left_mtime)
1597 .then_with(|| left.file.cmp(&right.file))
1598 .then_with(|| left.line.cmp(&right.line))
1599 .then_with(|| left.column.cmp(&right.column))
1600 });
1601}
1602
1603fn sort_shared_grep_matches_by_cached_mtime_desc<F>(
1608 matches: &mut [SharedGrepMatch],
1609 modified_for_path: F,
1610) where
1611 F: Fn(&Path) -> Option<SystemTime>,
1612{
1613 use std::collections::HashMap;
1614 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::with_capacity(matches.len());
1615 for m in matches.iter() {
1616 let path = m.file.as_path().to_path_buf();
1617 mtimes
1618 .entry(path.clone())
1619 .or_insert_with(|| modified_for_path(&path));
1620 }
1621 matches.sort_by(|left, right| {
1622 let left_mtime = mtimes.get(left.file.as_path()).and_then(|v| *v);
1623 let right_mtime = mtimes.get(right.file.as_path()).and_then(|v| *v);
1624 right_mtime
1625 .cmp(&left_mtime)
1626 .then_with(|| left.file.as_path().cmp(right.file.as_path()))
1627 .then_with(|| left.line.cmp(&right.line))
1628 .then_with(|| left.column.cmp(&right.column))
1629 });
1630}
1631
1632pub(crate) fn resolve_search_scope(project_root: &Path, path: Option<&str>) -> SearchScope {
1633 let resolved_project_root = canonicalize_or_normalize(project_root);
1634 let root = match path {
1635 Some(path) => {
1636 let path = PathBuf::from(path);
1637 if path.is_absolute() {
1638 canonicalize_or_normalize(&path)
1639 } else {
1640 normalize_path(&resolved_project_root.join(path))
1641 }
1642 }
1643 None => resolved_project_root.clone(),
1644 };
1645
1646 let use_index = is_within_search_root(&resolved_project_root, &root);
1647 SearchScope { root, use_index }
1648}
1649
1650pub(crate) fn is_binary_bytes(content: &[u8]) -> bool {
1651 content_inspector::inspect(content).is_binary()
1652}
1653
1654pub(crate) fn current_git_head(root: &Path) -> Option<String> {
1655 run_git(root, &["rev-parse", "HEAD"])
1656}
1657
1658pub fn project_cache_key(project_root: &Path) -> String {
1659 use sha2::{Digest, Sha256};
1660
1661 let mut hasher = Sha256::new();
1662
1663 if let Some(root_commit) = run_git(project_root, &["rev-list", "--max-parents=0", "HEAD"]) {
1664 hasher.update(root_commit.as_bytes());
1667 } else {
1668 let canonical_root = canonicalize_or_normalize(project_root);
1670 hasher.update(canonical_root.to_string_lossy().as_bytes());
1671 }
1672
1673 let digest = format!("{:x}", hasher.finalize());
1674 digest[..16].to_string()
1675}
1676
1677impl PathFilters {
1678 fn matches(&self, root: &Path, path: &Path) -> bool {
1679 let relative = to_glob_path(&relative_to_root(root, path));
1680 if self
1681 .includes
1682 .as_ref()
1683 .is_some_and(|includes| !includes.is_match(&relative))
1684 {
1685 return false;
1686 }
1687 if self
1688 .excludes
1689 .as_ref()
1690 .is_some_and(|excludes| excludes.is_match(&relative))
1691 {
1692 return false;
1693 }
1694 true
1695 }
1696}
1697
1698fn canonicalize_or_normalize(path: &Path) -> PathBuf {
1699 fs::canonicalize(path).unwrap_or_else(|_| normalize_path(path))
1700}
1701
1702fn resolve_match_path(project_root: &Path, path: &Path) -> PathBuf {
1703 if path.is_absolute() {
1704 path.to_path_buf()
1705 } else {
1706 project_root.join(path)
1707 }
1708}
1709
1710fn path_modified_time(path: &Path) -> Option<SystemTime> {
1711 fs::metadata(path)
1712 .and_then(|metadata| metadata.modified())
1713 .ok()
1714}
1715
1716fn normalize_path(path: &Path) -> PathBuf {
1717 let mut result = PathBuf::new();
1718 for component in path.components() {
1719 match component {
1720 Component::ParentDir => {
1721 if !result.pop() {
1722 result.push(component);
1723 }
1724 }
1725 Component::CurDir => {}
1726 _ => result.push(component),
1727 }
1728 }
1729 result
1730}
1731
1732fn verify_file_mtimes(index: &mut SearchIndex) {
1735 let mut stale_paths = Vec::new();
1737 for entry in &index.files {
1738 if entry.path.as_os_str().is_empty() {
1739 continue; }
1741 let cached = FileFreshness {
1742 mtime: entry.modified,
1743 size: entry.size,
1744 content_hash: entry.content_hash,
1745 };
1746 match cache_freshness::verify_file_strict(&entry.path, &cached) {
1747 FreshnessVerdict::HotFresh | FreshnessVerdict::ContentFresh { .. } => {}
1748 FreshnessVerdict::Stale | FreshnessVerdict::Deleted => {
1749 stale_paths.push(entry.path.clone())
1750 }
1751 }
1752 }
1753
1754 for entry in &mut index.files {
1755 if entry.path.as_os_str().is_empty() {
1756 continue;
1757 }
1758 let cached = FileFreshness {
1759 mtime: entry.modified,
1760 size: entry.size,
1761 content_hash: entry.content_hash,
1762 };
1763 if let FreshnessVerdict::ContentFresh {
1764 new_mtime,
1765 new_size,
1766 } = cache_freshness::verify_file_strict(&entry.path, &cached)
1767 {
1768 entry.modified = new_mtime;
1769 entry.size = new_size;
1770 }
1771 }
1772
1773 for path in &stale_paths {
1775 index.update_file(path);
1776 }
1777
1778 let filters = PathFilters::default();
1780 for path in walk_project_files(&index.project_root, &filters) {
1781 if !index.path_to_id.contains_key(&path) {
1782 index.update_file(&path);
1783 }
1784 }
1785
1786 if !stale_paths.is_empty() {
1787 crate::slog_info!(
1788 "search index: refreshed {} stale file(s) from disk cache",
1789 stale_paths.len()
1790 );
1791 }
1792}
1793
1794fn is_within_search_root(search_root: &Path, path: &Path) -> bool {
1795 normalize_path(path).starts_with(normalize_path(search_root))
1796}
1797
1798impl QueryBuild {
1799 fn into_query(self) -> RegexQuery {
1800 let mut query = RegexQuery::default();
1801
1802 for run in self.and_runs {
1803 add_run_to_and_query(&mut query, &run);
1804 }
1805
1806 for group in self.or_groups {
1807 let mut trigrams = BTreeSet::new();
1808 let mut filters = HashMap::new();
1809 for run in group {
1810 for (trigram, filter) in trigram_filters(&run) {
1811 trigrams.insert(trigram);
1812 merge_filter(filters.entry(trigram).or_default(), filter);
1813 }
1814 }
1815 if !trigrams.is_empty() {
1816 query.or_groups.push(trigrams.into_iter().collect());
1817 query.or_filters.push(filters);
1818 }
1819 }
1820
1821 query
1822 }
1823}
1824
1825fn build_query(hir: &Hir) -> QueryBuild {
1826 match hir.kind() {
1827 HirKind::Literal(literal) => {
1828 if literal.0.len() >= 3 {
1829 QueryBuild {
1830 and_runs: vec![literal.0.to_vec()],
1831 or_groups: Vec::new(),
1832 }
1833 } else {
1834 QueryBuild::default()
1835 }
1836 }
1837 HirKind::Capture(capture) => build_query(&capture.sub),
1838 HirKind::Concat(parts) => {
1839 let mut build = QueryBuild::default();
1840 for part in parts {
1841 let part_build = build_query(part);
1842 build.and_runs.extend(part_build.and_runs);
1843 build.or_groups.extend(part_build.or_groups);
1844 }
1845 build
1846 }
1847 HirKind::Alternation(parts) => {
1848 let mut group = Vec::new();
1849 for part in parts {
1850 let Some(mut choices) = guaranteed_run_choices(part) else {
1851 return QueryBuild::default();
1852 };
1853 group.append(&mut choices);
1854 }
1855 if group.is_empty() {
1856 QueryBuild::default()
1857 } else {
1858 QueryBuild {
1859 and_runs: Vec::new(),
1860 or_groups: vec![group],
1861 }
1862 }
1863 }
1864 HirKind::Repetition(repetition) => {
1865 if repetition.min == 0 {
1866 QueryBuild::default()
1867 } else {
1868 build_query(&repetition.sub)
1869 }
1870 }
1871 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => QueryBuild::default(),
1872 }
1873}
1874
1875fn guaranteed_run_choices(hir: &Hir) -> Option<Vec<Vec<u8>>> {
1876 match hir.kind() {
1877 HirKind::Literal(literal) => {
1878 if literal.0.len() >= 3 {
1879 Some(vec![literal.0.to_vec()])
1880 } else {
1881 None
1882 }
1883 }
1884 HirKind::Capture(capture) => guaranteed_run_choices(&capture.sub),
1885 HirKind::Concat(parts) => {
1886 let mut runs = Vec::new();
1887 for part in parts {
1888 if let Some(mut part_runs) = guaranteed_run_choices(part) {
1889 runs.append(&mut part_runs);
1890 }
1891 }
1892 if runs.is_empty() {
1893 None
1894 } else {
1895 Some(runs)
1896 }
1897 }
1898 HirKind::Alternation(parts) => {
1899 let mut runs = Vec::new();
1900 for part in parts {
1901 let Some(mut part_runs) = guaranteed_run_choices(part) else {
1902 return None;
1903 };
1904 runs.append(&mut part_runs);
1905 }
1906 if runs.is_empty() {
1907 None
1908 } else {
1909 Some(runs)
1910 }
1911 }
1912 HirKind::Repetition(repetition) => {
1913 if repetition.min == 0 {
1914 None
1915 } else {
1916 guaranteed_run_choices(&repetition.sub)
1917 }
1918 }
1919 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => None,
1920 }
1921}
1922
1923fn add_run_to_and_query(query: &mut RegexQuery, run: &[u8]) {
1924 for (trigram, filter) in trigram_filters(run) {
1925 if !query.and_trigrams.contains(&trigram) {
1926 query.and_trigrams.push(trigram);
1927 }
1928 merge_filter(query.and_filters.entry(trigram).or_default(), filter);
1929 }
1930}
1931
1932fn trigram_filters(run: &[u8]) -> Vec<(u32, PostingFilter)> {
1933 let mut filters: BTreeMap<u32, PostingFilter> = BTreeMap::new();
1934 for (trigram, next_char, position) in extract_trigrams(run) {
1935 let entry: &mut PostingFilter = filters.entry(trigram).or_default();
1936 if next_char != EOF_SENTINEL {
1937 entry.next_mask |= mask_for_next_char(next_char);
1938 }
1939 entry.loc_mask |= mask_for_position(position);
1940 }
1941 filters.into_iter().collect()
1942}
1943
1944fn merge_filter(target: &mut PostingFilter, filter: PostingFilter) {
1945 target.next_mask |= filter.next_mask;
1946 target.loc_mask |= filter.loc_mask;
1947}
1948
1949fn mask_for_next_char(next_char: u8) -> u8 {
1950 let bit = (normalize_char(next_char).wrapping_mul(31) & 7) as u32;
1951 1u8 << bit
1952}
1953
1954fn mask_for_position(position: usize) -> u8 {
1955 1u8 << (position % 8)
1956}
1957
1958fn build_globset(patterns: &[String]) -> Result<Option<GlobSet>, String> {
1959 if patterns.is_empty() {
1960 return Ok(None);
1961 }
1962
1963 let mut builder = GlobSetBuilder::new();
1964 for pattern in patterns {
1965 let glob = Glob::new(pattern).map_err(|error| error.to_string())?;
1966 builder.add(glob);
1967 }
1968 builder.build().map(Some).map_err(|error| error.to_string())
1969}
1970
1971fn read_u32<R: Read>(reader: &mut R) -> std::io::Result<u32> {
1972 let mut buffer = [0u8; 4];
1973 reader.read_exact(&mut buffer)?;
1974 Ok(u32::from_le_bytes(buffer))
1975}
1976
1977fn read_u64<R: Read>(reader: &mut R) -> std::io::Result<u64> {
1978 let mut buffer = [0u8; 8];
1979 reader.read_exact(&mut buffer)?;
1980 Ok(u64::from_le_bytes(buffer))
1981}
1982
1983fn write_u32<W: Write>(writer: &mut W, value: u32) -> std::io::Result<()> {
1984 writer.write_all(&value.to_le_bytes())
1985}
1986
1987fn write_u64<W: Write>(writer: &mut W, value: u64) -> std::io::Result<()> {
1988 writer.write_all(&value.to_le_bytes())
1989}
1990
1991fn verify_crc32_bytes_slice(bytes: &[u8]) -> std::io::Result<()> {
1992 let Some((body, stored)) = bytes.split_last_chunk::<4>() else {
1993 return Err(std::io::Error::other("search index checksum missing"));
1994 };
1995 let expected = u32::from_le_bytes(*stored);
1996 let actual = crc32fast::hash(body);
1997 if actual != expected {
1998 return Err(std::io::Error::other("search index checksum mismatch"));
1999 }
2000 Ok(())
2001}
2002
2003fn remaining_bytes<R: Seek>(reader: &mut R, total_len: usize) -> Option<usize> {
2004 let pos = usize::try_from(reader.stream_position().ok()?).ok()?;
2005 total_len.checked_sub(pos)
2006}
2007
2008fn run_git(root: &Path, args: &[&str]) -> Option<String> {
2009 let output = Command::new("git")
2010 .arg("-C")
2011 .arg(root)
2012 .args(args)
2013 .output()
2014 .ok()?;
2015 if !output.status.success() {
2016 return None;
2017 }
2018 let value = String::from_utf8(output.stdout).ok()?;
2019 let value = value.trim().to_string();
2020 if value.is_empty() {
2021 None
2022 } else {
2023 Some(value)
2024 }
2025}
2026
2027fn apply_git_diff_updates(index: &mut SearchIndex, root: &Path, from: &str, to: &str) -> bool {
2028 let diff_range = format!("{}..{}", from, to);
2029 let output = match Command::new("git")
2030 .arg("-C")
2031 .arg(root)
2032 .args(["diff", "--name-status", "-M", &diff_range])
2033 .output()
2034 {
2035 Ok(output) => output,
2036 Err(_) => return false,
2037 };
2038
2039 if !output.status.success() {
2040 return false;
2041 }
2042
2043 let Ok(diff) = String::from_utf8(output.stdout) else {
2044 return false;
2045 };
2046
2047 for line in diff.lines().map(str::trim).filter(|line| !line.is_empty()) {
2048 let mut fields = line.split('\t');
2049 let Some(status) = fields.next() else {
2050 continue;
2051 };
2052
2053 if status.starts_with('R') {
2054 let Some(old_path) = fields
2055 .next()
2056 .and_then(|path| cached_path_under_root(root, &PathBuf::from(path)))
2057 else {
2058 continue;
2059 };
2060 let Some(new_path) = fields
2061 .next()
2062 .and_then(|path| cached_path_under_root(root, &PathBuf::from(path)))
2063 else {
2064 continue;
2065 };
2066 index.remove_file(&old_path);
2067 index.update_file(&new_path);
2068 continue;
2069 }
2070
2071 let Some(path) = fields
2072 .next()
2073 .and_then(|path| cached_path_under_root(root, &PathBuf::from(path)))
2074 else {
2075 continue;
2076 };
2077 if status.starts_with('D') || !path.exists() {
2078 index.remove_file(&path);
2079 } else {
2080 index.update_file(&path);
2081 }
2082 }
2083
2084 true
2085}
2086
2087fn is_binary_path(path: &Path, size: u64) -> bool {
2088 if size == 0 {
2089 return false;
2090 }
2091
2092 let mut file = match File::open(path) {
2093 Ok(file) => file,
2094 Err(_) => return true,
2095 };
2096
2097 let mut preview = vec![0u8; PREVIEW_BYTES.min(size as usize)];
2098 match file.read(&mut preview) {
2099 Ok(read) => is_binary_bytes(&preview[..read]),
2100 Err(_) => true,
2101 }
2102}
2103
2104fn line_starts_bytes(content: &[u8]) -> Vec<usize> {
2105 let mut starts = vec![0usize];
2106 for (index, byte) in content.iter().copied().enumerate() {
2107 if byte == b'\n' {
2108 starts.push(index + 1);
2109 }
2110 }
2111 starts
2112}
2113
2114fn line_details_bytes(content: &[u8], line_starts: &[usize], offset: usize) -> (u32, u32, String) {
2115 let line_index = match line_starts.binary_search(&offset) {
2116 Ok(index) => index,
2117 Err(index) => index.saturating_sub(1),
2118 };
2119 let line_start = line_starts.get(line_index).copied().unwrap_or(0);
2120 let line_end = content[line_start..]
2121 .iter()
2122 .position(|byte| *byte == b'\n')
2123 .map(|length| line_start + length)
2124 .unwrap_or(content.len());
2125 let mut line_slice = &content[line_start..line_end];
2126 if line_slice.ends_with(b"\r") {
2127 line_slice = &line_slice[..line_slice.len() - 1];
2128 }
2129 let line_text = String::from_utf8_lossy(line_slice).into_owned();
2130 let column = String::from_utf8_lossy(&content[line_start..offset])
2131 .chars()
2132 .count() as u32
2133 + 1;
2134 (line_index as u32 + 1, column, line_text)
2135}
2136
2137fn to_glob_path(path: &Path) -> String {
2138 path.to_string_lossy().replace('\\', "/")
2139}
2140
2141#[cfg(test)]
2142mod tests {
2143 use std::process::Command;
2144
2145 use super::*;
2146
2147 #[test]
2148 fn cached_path_under_root_allows_missing_lexical_child() {
2149 let dir = tempfile::tempdir().expect("create temp dir");
2150 let project = dir.path().join("project");
2151 fs::create_dir_all(&project).expect("create project dir");
2152 let root = fs::canonicalize(&project).expect("canonicalize project");
2153
2154 let path = cached_path_under_root(&root, Path::new("future/file.rs"))
2155 .expect("missing child should fall back to lexical validation");
2156
2157 assert_eq!(path, root.join("future/file.rs"));
2158 }
2159
2160 #[cfg(unix)]
2161 #[test]
2162 fn cached_path_under_root_rejects_symlink_escape() {
2163 let dir = tempfile::tempdir().expect("create temp dir");
2164 let project = dir.path().join("project");
2165 let outside = dir.path().join("outside");
2166 fs::create_dir_all(&project).expect("create project dir");
2167 fs::create_dir_all(&outside).expect("create outside dir");
2168 fs::write(outside.join("secret.txt"), "secret").expect("write outside file");
2169 std::os::unix::fs::symlink(&outside, project.join("link")).expect("create symlink");
2170 let root = fs::canonicalize(&project).expect("canonicalize project");
2171
2172 assert!(cached_path_under_root(&root, Path::new("link/secret.txt")).is_none());
2173 }
2174
2175 #[test]
2176 fn extract_trigrams_tracks_next_char_and_position() {
2177 let trigrams = extract_trigrams(b"Rust");
2178 assert_eq!(trigrams.len(), 2);
2179 assert_eq!(trigrams[0], (pack_trigram(b'r', b'u', b's'), b't', 0));
2180 assert_eq!(
2181 trigrams[1],
2182 (pack_trigram(b'u', b's', b't'), EOF_SENTINEL, 1)
2183 );
2184 }
2185
2186 #[test]
2187 fn decompose_regex_extracts_literals_and_alternations() {
2188 let query = decompose_regex("abc(def|ghi)xyz");
2189 assert!(query.and_trigrams.contains(&pack_trigram(b'a', b'b', b'c')));
2190 assert!(query.and_trigrams.contains(&pack_trigram(b'x', b'y', b'z')));
2191 assert_eq!(query.or_groups.len(), 1);
2192 assert!(query.or_groups[0].contains(&pack_trigram(b'd', b'e', b'f')));
2193 assert!(query.or_groups[0].contains(&pack_trigram(b'g', b'h', b'i')));
2194 }
2195
2196 #[test]
2197 fn candidates_intersect_posting_lists() {
2198 let mut index = SearchIndex::new();
2199 let dir = tempfile::tempdir().expect("create temp dir");
2200 let alpha = dir.path().join("alpha.txt");
2201 let beta = dir.path().join("beta.txt");
2202 fs::write(&alpha, "abcdef").expect("write alpha");
2203 fs::write(&beta, "abcxyz").expect("write beta");
2204 index.project_root = dir.path().to_path_buf();
2205 index.index_file(&alpha, b"abcdef");
2206 index.index_file(&beta, b"abcxyz");
2207
2208 let query = RegexQuery {
2209 and_trigrams: vec![
2210 pack_trigram(b'a', b'b', b'c'),
2211 pack_trigram(b'd', b'e', b'f'),
2212 ],
2213 ..RegexQuery::default()
2214 };
2215
2216 let candidates = index.candidates(&query);
2217 assert_eq!(candidates.len(), 1);
2218 assert_eq!(index.files[candidates[0] as usize].path, alpha);
2219 }
2220
2221 #[test]
2222 fn candidates_apply_bloom_filters() {
2223 let mut index = SearchIndex::new();
2224 let dir = tempfile::tempdir().expect("create temp dir");
2225 let file = dir.path().join("sample.txt");
2226 fs::write(&file, "abcd efgh").expect("write sample");
2227 index.project_root = dir.path().to_path_buf();
2228 index.index_file(&file, b"abcd efgh");
2229
2230 let trigram = pack_trigram(b'a', b'b', b'c');
2231 let matching_filter = PostingFilter {
2232 next_mask: mask_for_next_char(b'd'),
2233 loc_mask: mask_for_position(0),
2234 };
2235 let non_matching_filter = PostingFilter {
2236 next_mask: mask_for_next_char(b'z'),
2237 loc_mask: mask_for_position(0),
2238 };
2239
2240 assert_eq!(
2241 index
2242 .postings_for_trigram(trigram, Some(matching_filter))
2243 .len(),
2244 1
2245 );
2246 assert!(index
2247 .postings_for_trigram(trigram, Some(non_matching_filter))
2248 .is_empty());
2249 }
2250
2251 #[test]
2252 fn disk_round_trip_preserves_postings_and_files() {
2253 let dir = tempfile::tempdir().expect("create temp dir");
2254 let project = dir.path().join("project");
2255 fs::create_dir_all(&project).expect("create project dir");
2256 let file = project.join("src.txt");
2257 fs::write(&file, "abcdef").expect("write source");
2258
2259 let mut index = SearchIndex::build(&project);
2260 index.git_head = Some("deadbeef".to_string());
2261 let cache_dir = dir.path().join("cache");
2262 index.write_to_disk(&cache_dir, index.git_head.as_deref());
2263
2264 let loaded =
2265 SearchIndex::read_from_disk(&cache_dir, &project).expect("load index from disk");
2266 assert_eq!(loaded.stored_git_head(), Some("deadbeef"));
2267 assert_eq!(loaded.files.len(), 1);
2268 assert_eq!(
2269 relative_to_root(&loaded.project_root, &loaded.files[0].path),
2270 PathBuf::from("src.txt")
2271 );
2272 assert_eq!(loaded.postings.len(), index.postings.len());
2273 assert!(loaded
2274 .postings
2275 .contains_key(&pack_trigram(b'a', b'b', b'c')));
2276 }
2277
2278 #[test]
2279 fn cache_path_helpers_reject_absolute_and_parent_paths() {
2280 let root = PathBuf::from("/tmp/aft-project");
2281
2282 assert_eq!(
2283 cache_relative_path(&root, &root.join("src/lib.rs")),
2284 Some(PathBuf::from("src/lib.rs"))
2285 );
2286 assert!(cache_relative_path(&root, Path::new("/tmp/outside.rs")).is_none());
2287 assert!(cached_path_under_root(&root, Path::new("../outside.rs")).is_none());
2288 assert!(cached_path_under_root(&root, Path::new("/tmp/outside.rs")).is_none());
2289 assert_eq!(
2290 cached_path_under_root(&root, Path::new("src/./lib.rs")),
2291 Some(root.join("src/lib.rs"))
2292 );
2293 }
2294
2295 #[test]
2296 fn refresh_after_head_change_removes_renames_and_detects_local_files() {
2297 let dir = tempfile::tempdir().expect("create temp dir");
2298 let project = dir.path().join("project");
2299 fs::create_dir_all(&project).expect("create project dir");
2300 let canonical_project = fs::canonicalize(&project).expect("canonical project");
2301 fs::write(project.join("old.txt"), "old token\n").expect("write old");
2302 fs::write(project.join("unchanged.txt"), "before\n").expect("write unchanged");
2303
2304 Command::new("git")
2305 .arg("init")
2306 .arg(&project)
2307 .status()
2308 .expect("git init");
2309 for args in [
2310 ["config", "user.email", "aft@example.invalid"],
2311 ["config", "user.name", "AFT Test"],
2312 ] {
2313 Command::new("git")
2314 .arg("-C")
2315 .arg(&project)
2316 .args(args)
2317 .status()
2318 .expect("git config");
2319 }
2320 Command::new("git")
2321 .arg("-C")
2322 .arg(&project)
2323 .args(["add", "."])
2324 .status()
2325 .expect("git add initial");
2326 Command::new("git")
2327 .arg("-C")
2328 .arg(&project)
2329 .args(["commit", "-m", "initial"])
2330 .status()
2331 .expect("git commit initial");
2332 let previous = run_git(&project, &["rev-parse", "HEAD"]).expect("previous head");
2333 let mut baseline = SearchIndex::build(&project);
2334 baseline.git_head = Some(previous.clone());
2335
2336 fs::rename(project.join("old.txt"), project.join("new.txt")).expect("rename file");
2337 Command::new("git")
2338 .arg("-C")
2339 .arg(&project)
2340 .args(["add", "-A"])
2341 .status()
2342 .expect("git add rename");
2343 Command::new("git")
2344 .arg("-C")
2345 .arg(&project)
2346 .args(["commit", "-m", "rename"])
2347 .status()
2348 .expect("git commit rename");
2349 let current = run_git(&project, &["rev-parse", "HEAD"]).expect("current head");
2350
2351 fs::write(project.join("unchanged.txt"), "after local edit\n").expect("local edit");
2352 fs::write(project.join("untracked.txt"), "untracked token\n").expect("untracked");
2353
2354 let refreshed = SearchIndex::rebuild_or_refresh(
2355 &project,
2356 DEFAULT_MAX_FILE_SIZE,
2357 Some(current),
2358 Some(baseline),
2359 );
2360
2361 assert!(!refreshed
2362 .path_to_id
2363 .contains_key(&canonical_project.join("old.txt")));
2364 assert!(refreshed
2365 .path_to_id
2366 .contains_key(&canonical_project.join("new.txt")));
2367 assert!(refreshed
2368 .path_to_id
2369 .contains_key(&canonical_project.join("untracked.txt")));
2370 let matches = refreshed.grep("after local edit", true, &[], &[], &canonical_project, 10);
2371 assert_eq!(matches.matches.len(), 1);
2372 }
2373
2374 #[test]
2375 fn read_from_disk_rejects_corrupt_postings_checksum() {
2376 let dir = tempfile::tempdir().expect("create temp dir");
2377 let project = dir.path().join("project");
2378 fs::create_dir_all(&project).expect("create project dir");
2379 fs::write(project.join("src.txt"), "abcdef").expect("write source");
2380
2381 let index = SearchIndex::build(&project);
2382 let cache_dir = dir.path().join("cache");
2383 index.write_to_disk(&cache_dir, None);
2384
2385 let cache_path = cache_dir.join("cache.bin");
2386 let mut bytes = fs::read(&cache_path).expect("read cache");
2387 let middle = bytes.len() / 2;
2388 bytes[middle] ^= 0xff;
2389 fs::write(&cache_path, bytes).expect("write corrupted cache");
2390
2391 assert!(SearchIndex::read_from_disk(&cache_dir, &project).is_none());
2392 }
2393
2394 #[test]
2395 fn write_to_disk_uses_temp_files_and_cleans_them_up() {
2396 let dir = tempfile::tempdir().expect("create temp dir");
2397 let project = dir.path().join("project");
2398 fs::create_dir_all(&project).expect("create project dir");
2399 fs::write(project.join("src.txt"), "abcdef").expect("write source");
2400
2401 let index = SearchIndex::build(&project);
2402 let cache_dir = dir.path().join("cache");
2403 index.write_to_disk(&cache_dir, None);
2404
2405 assert!(cache_dir.join("cache.bin").is_file());
2406 assert!(fs::read_dir(&cache_dir)
2407 .expect("read cache dir")
2408 .all(|entry| !entry
2409 .expect("cache entry")
2410 .file_name()
2411 .to_string_lossy()
2412 .contains(".tmp.")));
2413 }
2414
2415 #[test]
2416 fn concurrent_search_index_writes_do_not_corrupt() {
2417 let dir = tempfile::tempdir().expect("create temp dir");
2418 let project = dir.path().join("project");
2419 fs::create_dir_all(&project).expect("create project dir");
2420 fs::write(project.join("src.txt"), "abcdef\n").expect("write source");
2421 let cache_dir = dir.path().join("cache");
2422
2423 let a_project = project.clone();
2424 let a_cache = cache_dir.clone();
2425 let a = std::thread::spawn(move || {
2426 let _lock = CacheLock::acquire(&a_cache).expect("acquire cache lock a");
2427 let index = SearchIndex::build(&a_project);
2428 index.write_to_disk(&a_cache, None);
2429 });
2430 let b_project = project.clone();
2431 let b_cache = cache_dir.clone();
2432 let b = std::thread::spawn(move || {
2433 let _lock = CacheLock::acquire(&b_cache).expect("acquire cache lock b");
2434 let index = SearchIndex::build(&b_project);
2435 index.write_to_disk(&b_cache, None);
2436 });
2437 a.join().expect("writer a");
2438 b.join().expect("writer b");
2439
2440 assert!(SearchIndex::read_from_disk(&cache_dir, &project).is_some());
2441 }
2442
2443 #[test]
2444 fn search_index_atomic_rename_survives_partial_write() {
2445 let dir = tempfile::tempdir().expect("create temp dir");
2446 let cache_dir = dir.path().join("cache");
2447 fs::create_dir_all(&cache_dir).expect("create cache dir");
2448 fs::write(cache_dir.join("cache.bin.tmp.1.1"), b"partial").expect("write partial tmp");
2449
2450 assert!(SearchIndex::read_from_disk(&cache_dir, dir.path()).is_none());
2451 }
2452
2453 #[test]
2454 fn project_cache_key_includes_checkout_path() {
2455 let dir = tempfile::tempdir().expect("create temp dir");
2456 let source = dir.path().join("source");
2457 fs::create_dir_all(&source).expect("create source repo dir");
2458 fs::write(source.join("tracked.txt"), "content\n").expect("write tracked file");
2459
2460 assert!(Command::new("git")
2461 .current_dir(&source)
2462 .args(["init"])
2463 .status()
2464 .expect("init git repo")
2465 .success());
2466 assert!(Command::new("git")
2467 .current_dir(&source)
2468 .args(["add", "."])
2469 .status()
2470 .expect("git add")
2471 .success());
2472 assert!(Command::new("git")
2473 .current_dir(&source)
2474 .args([
2475 "-c",
2476 "user.name=AFT Tests",
2477 "-c",
2478 "user.email=aft-tests@example.com",
2479 "commit",
2480 "-m",
2481 "initial",
2482 ])
2483 .status()
2484 .expect("git commit")
2485 .success());
2486
2487 let clone = dir.path().join("clone");
2488 assert!(Command::new("git")
2489 .args(["clone", "--quiet"])
2490 .arg(&source)
2491 .arg(&clone)
2492 .status()
2493 .expect("git clone")
2494 .success());
2495
2496 let source_key = project_cache_key(&source);
2497 let clone_key = project_cache_key(&clone);
2498
2499 assert_eq!(source_key.len(), 16);
2500 assert_eq!(clone_key.len(), 16);
2501 assert_eq!(source_key, clone_key);
2503 }
2504
2505 #[test]
2506 fn git_head_unchanged_picks_up_local_edits() {
2507 let dir = tempfile::tempdir().expect("create temp dir");
2508 let project = dir.path().join("repo");
2509 fs::create_dir_all(&project).expect("create repo dir");
2510 let file = project.join("tracked.txt");
2511 fs::write(&file, "oldtoken\n").expect("write file");
2512 assert!(Command::new("git")
2513 .current_dir(&project)
2514 .arg("init")
2515 .status()
2516 .unwrap()
2517 .success());
2518 assert!(Command::new("git")
2519 .current_dir(&project)
2520 .args(["add", "."])
2521 .status()
2522 .unwrap()
2523 .success());
2524 assert!(Command::new("git")
2525 .current_dir(&project)
2526 .args([
2527 "-c",
2528 "user.name=AFT Tests",
2529 "-c",
2530 "user.email=aft-tests@example.com",
2531 "commit",
2532 "-m",
2533 "initial"
2534 ])
2535 .status()
2536 .unwrap()
2537 .success());
2538 let head = current_git_head(&project);
2539 let mut baseline = SearchIndex::build(&project);
2540 baseline.git_head = head.clone();
2541 fs::write(&file, "newtoken\n").expect("edit tracked file");
2542
2543 let refreshed =
2544 SearchIndex::rebuild_or_refresh(&project, DEFAULT_MAX_FILE_SIZE, head, Some(baseline));
2545 let result = refreshed.grep("newtoken", true, &[], &[], &project, 10);
2546
2547 assert_eq!(result.total_matches, 1);
2548 }
2549
2550 #[test]
2551 fn non_git_project_reuses_cache_when_files_unchanged() {
2552 let dir = tempfile::tempdir().expect("create temp dir");
2553 let project = dir.path().join("project");
2554 fs::create_dir_all(&project).expect("create project dir");
2555 fs::write(project.join("file.txt"), "unchangedtoken\n").expect("write file");
2556 let baseline = SearchIndex::build(&project);
2557 let baseline_file_count = baseline.file_count();
2558
2559 let refreshed =
2560 SearchIndex::rebuild_or_refresh(&project, DEFAULT_MAX_FILE_SIZE, None, Some(baseline));
2561
2562 assert_eq!(refreshed.file_count(), baseline_file_count);
2563 assert_eq!(
2564 refreshed
2565 .grep("unchangedtoken", true, &[], &[], &project, 10)
2566 .total_matches,
2567 1
2568 );
2569 }
2570
2571 #[test]
2572 fn resolve_search_scope_disables_index_for_external_path() {
2573 let dir = tempfile::tempdir().expect("create temp dir");
2574 let project = dir.path().join("project");
2575 let outside = dir.path().join("outside");
2576 fs::create_dir_all(&project).expect("create project dir");
2577 fs::create_dir_all(&outside).expect("create outside dir");
2578
2579 let scope = resolve_search_scope(&project, outside.to_str());
2580
2581 assert_eq!(
2582 scope.root,
2583 fs::canonicalize(&outside).expect("canonicalize outside")
2584 );
2585 assert!(!scope.use_index);
2586 }
2587
2588 #[test]
2589 fn grep_filters_matches_to_search_root() {
2590 let dir = tempfile::tempdir().expect("create temp dir");
2591 let project = dir.path().join("project");
2592 let src = project.join("src");
2593 let docs = project.join("docs");
2594 fs::create_dir_all(&src).expect("create src dir");
2595 fs::create_dir_all(&docs).expect("create docs dir");
2596 fs::write(src.join("main.rs"), "pub struct SearchIndex;\n").expect("write src file");
2597 fs::write(docs.join("guide.md"), "SearchIndex guide\n").expect("write docs file");
2598
2599 let index = SearchIndex::build(&project);
2600 let result = index.grep("SearchIndex", true, &[], &[], &src, 10);
2601
2602 assert_eq!(result.files_searched, 1);
2603 assert_eq!(result.files_with_matches, 1);
2604 assert_eq!(result.matches.len(), 1);
2605 let expected = fs::canonicalize(src.join("main.rs")).expect("canonicalize");
2607 assert_eq!(result.matches[0].file, expected);
2608 }
2609
2610 #[test]
2611 fn grep_deduplicates_multiple_matches_on_same_line() {
2612 let dir = tempfile::tempdir().expect("create temp dir");
2613 let project = dir.path().join("project");
2614 let src = project.join("src");
2615 fs::create_dir_all(&src).expect("create src dir");
2616 fs::write(src.join("main.rs"), "SearchIndex SearchIndex\n").expect("write src file");
2617
2618 let index = SearchIndex::build(&project);
2619 let result = index.grep("SearchIndex", true, &[], &[], &src, 10);
2620
2621 assert_eq!(result.total_matches, 1);
2622 assert_eq!(result.matches.len(), 1);
2623 }
2624
2625 #[test]
2626 fn grep_case_insensitive_unicode_literal_matches_indexed_file() {
2627 let dir = tempfile::tempdir().expect("create temp dir");
2628 let project = dir.path().join("project");
2629 fs::create_dir_all(&project).expect("create project dir");
2630 let file = project.join("unicode.txt");
2631 fs::write(&file, "äbc\n").expect("write unicode file");
2632
2633 let index = SearchIndex::build(&project);
2634 let result = index.grep("Äbc", false, &[], &[], &project, 10);
2635
2636 assert_eq!(result.total_matches, 1);
2637 assert_eq!(result.matches.len(), 1);
2638 assert_eq!(
2639 result.matches[0].file,
2640 fs::canonicalize(file).expect("canonicalize unicode file")
2641 );
2642 }
2643
2644 #[test]
2645 fn refresh_reindexes_same_size_edit_with_preserved_mtime() {
2646 let dir = tempfile::tempdir().expect("create temp dir");
2647 let project = dir.path().join("project");
2648 fs::create_dir_all(&project).expect("create project dir");
2649 let file = project.join("tokens.txt");
2650 let original_mtime = filetime::FileTime::from_unix_time(1_700_000_000, 0);
2651 fs::write(&file, "alpha").expect("write original file");
2652 filetime::set_file_mtime(&file, original_mtime).expect("set original mtime");
2653
2654 let baseline = SearchIndex::build(&project);
2655 fs::write(&file, "bravo").expect("write same-size edit");
2656 filetime::set_file_mtime(&file, original_mtime).expect("restore original mtime");
2657
2658 let refreshed =
2659 SearchIndex::rebuild_or_refresh(&project, DEFAULT_MAX_FILE_SIZE, None, Some(baseline));
2660 let result = refreshed.grep("bravo", true, &[], &[], &project, 10);
2661 let canonical_file = fs::canonicalize(&file).expect("canonicalize edited file");
2662 let refreshed_id = *refreshed
2663 .path_to_id
2664 .get(&canonical_file)
2665 .expect("file remains indexed");
2666
2667 assert_eq!(result.total_matches, 1);
2668 assert!(refreshed
2669 .postings_for_trigram(pack_trigram(b'b', b'r', b'a'), None)
2670 .contains(&refreshed_id));
2671 assert!(!refreshed
2672 .postings_for_trigram(pack_trigram(b'a', b'l', b'p'), None)
2673 .contains(&refreshed_id));
2674 }
2675
2676 #[test]
2677 fn grep_reports_total_matches_before_truncation() {
2678 let dir = tempfile::tempdir().expect("create temp dir");
2679 let project = dir.path().join("project");
2680 let src = project.join("src");
2681 fs::create_dir_all(&src).expect("create src dir");
2682 fs::write(src.join("main.rs"), "SearchIndex\nSearchIndex\n").expect("write src file");
2683
2684 let index = SearchIndex::build(&project);
2685 let result = index.grep("SearchIndex", true, &[], &[], &src, 1);
2686
2687 assert_eq!(result.total_matches, 2);
2688 assert_eq!(result.matches.len(), 1);
2689 assert!(result.truncated);
2690 }
2691
2692 #[test]
2693 fn glob_filters_results_to_search_root() {
2694 let dir = tempfile::tempdir().expect("create temp dir");
2695 let project = dir.path().join("project");
2696 let src = project.join("src");
2697 let scripts = project.join("scripts");
2698 fs::create_dir_all(&src).expect("create src dir");
2699 fs::create_dir_all(&scripts).expect("create scripts dir");
2700 fs::write(src.join("main.rs"), "pub fn main() {}\n").expect("write src file");
2701 fs::write(scripts.join("tool.rs"), "pub fn tool() {}\n").expect("write scripts file");
2702
2703 let index = SearchIndex::build(&project);
2704 let files = index.glob("**/*.rs", &src);
2705
2706 assert_eq!(
2707 files,
2708 vec![fs::canonicalize(src.join("main.rs")).expect("canonicalize src file")]
2709 );
2710 }
2711
2712 #[test]
2713 fn glob_includes_hidden_and_binary_files() {
2714 let dir = tempfile::tempdir().expect("create temp dir");
2715 let project = dir.path().join("project");
2716 let hidden_dir = project.join(".hidden");
2717 fs::create_dir_all(&hidden_dir).expect("create hidden dir");
2718 let hidden_file = hidden_dir.join("data.bin");
2719 fs::write(&hidden_file, [0u8, 159, 146, 150]).expect("write binary file");
2720
2721 let index = SearchIndex::build(&project);
2722 let files = index.glob("**/*.bin", &project);
2723
2724 assert_eq!(
2725 files,
2726 vec![fs::canonicalize(hidden_file).expect("canonicalize binary file")]
2727 );
2728 }
2729
2730 #[test]
2731 fn read_from_disk_rejects_invalid_nanos() {
2732 let dir = tempfile::tempdir().expect("create temp dir");
2733 let cache_dir = dir.path().join("cache");
2734 fs::create_dir_all(&cache_dir).expect("create cache dir");
2735
2736 let mut postings = Vec::new();
2737 postings.extend_from_slice(INDEX_MAGIC);
2738 postings.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2739 postings.extend_from_slice(&0u32.to_le_bytes());
2740 postings.extend_from_slice(&1u32.to_le_bytes());
2741 postings.extend_from_slice(&DEFAULT_MAX_FILE_SIZE.to_le_bytes());
2742 postings.extend_from_slice(&1u32.to_le_bytes());
2743 postings.extend_from_slice(b"/");
2744 postings.push(0u8);
2745 postings.extend_from_slice(&1u32.to_le_bytes());
2746 postings.extend_from_slice(&0u64.to_le_bytes());
2747 postings.extend_from_slice(&0u64.to_le_bytes());
2748 postings.extend_from_slice(&1_000_000_000u32.to_le_bytes());
2749 postings.extend_from_slice(b"a");
2750 postings.extend_from_slice(&0u64.to_le_bytes());
2751
2752 let mut lookup = Vec::new();
2753 lookup.extend_from_slice(LOOKUP_MAGIC);
2754 lookup.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2755 lookup.extend_from_slice(&0u32.to_le_bytes());
2756
2757 let postings_checksum = crc32fast::hash(&postings);
2758 postings.extend_from_slice(&postings_checksum.to_le_bytes());
2759 let lookup_checksum = crc32fast::hash(&lookup);
2760 lookup.extend_from_slice(&lookup_checksum.to_le_bytes());
2761 let mut cache = Vec::new();
2762 cache.extend_from_slice(&CACHE_MAGIC.to_le_bytes());
2763 cache.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2764 cache.extend_from_slice(&(postings.len() as u64).to_le_bytes());
2765 cache.extend_from_slice(&postings);
2766 cache.extend_from_slice(&lookup);
2767 fs::write(cache_dir.join("cache.bin"), cache).expect("write cache");
2768
2769 assert!(SearchIndex::read_from_disk(&cache_dir, dir.path()).is_none());
2770 }
2771
2772 #[test]
2787 fn sort_paths_by_mtime_desc_does_not_panic_on_missing_files() {
2788 let dir = tempfile::tempdir().expect("create tempdir");
2792 let mut paths: Vec<PathBuf> = Vec::new();
2793 for i in 0..30 {
2794 let path = if i % 2 == 0 {
2796 let p = dir.path().join(format!("real-{i}.rs"));
2797 fs::write(&p, format!("// {i}\n")).expect("write");
2798 p
2799 } else {
2800 dir.path().join(format!("missing-{i}.rs"))
2801 };
2802 paths.push(path);
2803 }
2804
2805 for _ in 0..50 {
2808 let mut copy = paths.clone();
2809 sort_paths_by_mtime_desc(&mut copy);
2810 assert_eq!(copy.len(), paths.len());
2811 }
2812 }
2813
2814 #[test]
2818 fn sort_grep_matches_by_mtime_desc_does_not_panic_on_missing_files() {
2819 let dir = tempfile::tempdir().expect("create tempdir");
2820 let mut matches: Vec<GrepMatch> = Vec::new();
2821 for i in 0..30 {
2822 let file = if i % 2 == 0 {
2823 let p = dir.path().join(format!("real-{i}.rs"));
2824 fs::write(&p, format!("// {i}\n")).expect("write");
2825 p
2826 } else {
2827 dir.path().join(format!("missing-{i}.rs"))
2828 };
2829 matches.push(GrepMatch {
2830 file,
2831 line: u32::try_from(i).unwrap_or(0),
2832 column: 0,
2833 line_text: format!("match {i}"),
2834 match_text: format!("match {i}"),
2835 });
2836 }
2837
2838 for _ in 0..50 {
2839 let mut copy = matches.clone();
2840 sort_grep_matches_by_mtime_desc(&mut copy, dir.path());
2841 assert_eq!(copy.len(), matches.len());
2842 }
2843 }
2844}