1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2use std::fs::{self, File};
3use std::io::{BufReader, BufWriter, Read, Seek, Write};
4use std::path::{Component, Path, PathBuf};
5use std::process::Command;
6use std::sync::{
7 atomic::{AtomicBool, AtomicUsize, Ordering},
8 Arc,
9};
10use std::time::{Duration, SystemTime, UNIX_EPOCH};
11
12use globset::{Glob, GlobSet, GlobSetBuilder};
13use ignore::WalkBuilder;
14use rayon::prelude::*;
15use regex::bytes::{Regex, RegexBuilder};
16use regex_syntax::hir::{Hir, HirKind};
17
18const DEFAULT_MAX_FILE_SIZE: u64 = 1_048_576;
19const INDEX_MAGIC: &[u8; 8] = b"AFTIDX01";
20const LOOKUP_MAGIC: &[u8; 8] = b"AFTLKP01";
21const INDEX_VERSION: u32 = 2;
22const PREVIEW_BYTES: usize = 8 * 1024;
23const EOF_SENTINEL: u8 = 0;
24const MAX_ENTRIES: usize = 10_000_000;
25const MIN_FILE_ENTRY_BYTES: usize = 25;
26const LOOKUP_ENTRY_BYTES: usize = 16;
27const POSTING_BYTES: usize = 6;
28
29#[derive(Clone, Debug)]
30pub struct SearchIndex {
31 pub postings: HashMap<u32, Vec<Posting>>,
32 pub files: Vec<FileEntry>,
33 pub path_to_id: HashMap<PathBuf, u32>,
34 pub ready: bool,
35 project_root: PathBuf,
36 git_head: Option<String>,
37 max_file_size: u64,
38 file_trigrams: HashMap<u32, Vec<u32>>,
39 unindexed_files: HashSet<u32>,
40}
41
42impl SearchIndex {
43 pub fn file_count(&self) -> usize {
45 self.files.len()
46 }
47
48 pub fn trigram_count(&self) -> usize {
50 self.postings.len()
51 }
52}
53
54#[derive(Clone, Debug, PartialEq, Eq)]
55pub struct Posting {
56 pub file_id: u32,
57 pub next_mask: u8,
58 pub loc_mask: u8,
59}
60
61#[derive(Clone, Debug)]
62pub struct FileEntry {
63 pub path: PathBuf,
64 pub size: u64,
65 pub modified: SystemTime,
66}
67
68#[derive(Clone, Debug, PartialEq, Eq)]
69pub struct GrepMatch {
70 pub file: PathBuf,
71 pub line: u32,
72 pub column: u32,
73 pub line_text: String,
74 pub match_text: String,
75}
76
77#[derive(Clone, Debug)]
78pub struct GrepResult {
79 pub matches: Vec<GrepMatch>,
80 pub total_matches: usize,
81 pub files_searched: usize,
82 pub files_with_matches: usize,
83 pub index_status: IndexStatus,
84 pub truncated: bool,
85}
86
87#[derive(Clone, Copy, Debug, PartialEq, Eq)]
88pub enum IndexStatus {
89 Ready,
90 Building,
91 Fallback,
92}
93
94impl IndexStatus {
95 pub fn as_str(&self) -> &'static str {
96 match self {
97 IndexStatus::Ready => "Ready",
98 IndexStatus::Building => "Building",
99 IndexStatus::Fallback => "Fallback",
100 }
101 }
102}
103
104#[derive(Clone, Debug, Default)]
105pub struct RegexQuery {
106 pub and_trigrams: Vec<u32>,
107 pub or_groups: Vec<Vec<u32>>,
108 pub(crate) and_filters: HashMap<u32, PostingFilter>,
109 pub(crate) or_filters: Vec<HashMap<u32, PostingFilter>>,
110}
111
112#[derive(Clone, Copy, Debug, Default)]
113pub(crate) struct PostingFilter {
114 next_mask: u8,
115 loc_mask: u8,
116}
117
118#[derive(Clone, Debug, Default)]
119struct QueryBuild {
120 and_runs: Vec<Vec<u8>>,
121 or_groups: Vec<Vec<Vec<u8>>>,
122}
123
124#[derive(Clone, Debug, Default)]
125pub(crate) struct PathFilters {
126 includes: Option<GlobSet>,
127 excludes: Option<GlobSet>,
128}
129
130#[derive(Clone, Debug)]
131pub(crate) struct SearchScope {
132 pub root: PathBuf,
133 pub use_index: bool,
134}
135
136#[derive(Clone, Debug)]
137struct SharedGrepMatch {
138 file: Arc<PathBuf>,
139 line: u32,
140 column: u32,
141 line_text: String,
142 match_text: String,
143}
144
145#[derive(Clone, Debug)]
146enum SearchMatcher {
147 Literal(LiteralSearch),
148 Regex(Regex),
149}
150
151#[derive(Clone, Debug)]
152enum LiteralSearch {
153 CaseSensitive(Vec<u8>),
154 AsciiCaseInsensitive(Vec<u8>),
155}
156
157impl SearchIndex {
158 pub fn new() -> Self {
159 SearchIndex {
160 postings: HashMap::new(),
161 files: Vec::new(),
162 path_to_id: HashMap::new(),
163 ready: false,
164 project_root: PathBuf::new(),
165 git_head: None,
166 max_file_size: DEFAULT_MAX_FILE_SIZE,
167 file_trigrams: HashMap::new(),
168 unindexed_files: HashSet::new(),
169 }
170 }
171
172 pub fn build(root: &Path) -> Self {
173 Self::build_with_limit(root, DEFAULT_MAX_FILE_SIZE)
174 }
175
176 pub(crate) fn build_with_limit(root: &Path, max_file_size: u64) -> Self {
177 let project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
178 let mut index = SearchIndex {
179 project_root: project_root.clone(),
180 max_file_size,
181 ..SearchIndex::new()
182 };
183
184 let filters = PathFilters::default();
185 for path in walk_project_files(&project_root, &filters) {
186 index.update_file(&path);
187 }
188
189 index.git_head = current_git_head(&project_root);
190 index.ready = true;
191 index
192 }
193
194 pub fn index_file(&mut self, path: &Path, content: &[u8]) {
195 self.remove_file(path);
196
197 let file_id = match self.allocate_file_id(path, content.len() as u64) {
198 Some(file_id) => file_id,
199 None => return,
200 };
201
202 let mut trigram_map: BTreeMap<u32, PostingFilter> = BTreeMap::new();
203 for (trigram, next_char, position) in extract_trigrams(content) {
204 let entry = trigram_map.entry(trigram).or_default();
205 entry.next_mask |= mask_for_next_char(next_char);
206 entry.loc_mask |= mask_for_position(position);
207 }
208
209 let mut file_trigrams = Vec::with_capacity(trigram_map.len());
210 for (trigram, filter) in trigram_map {
211 let postings = self.postings.entry(trigram).or_default();
212 postings.push(Posting {
213 file_id,
214 next_mask: filter.next_mask,
215 loc_mask: filter.loc_mask,
216 });
217 if postings.len() > 1
221 && postings[postings.len() - 2].file_id > postings[postings.len() - 1].file_id
222 {
223 postings.sort_unstable_by_key(|p| p.file_id);
224 }
225 file_trigrams.push(trigram);
226 }
227
228 self.file_trigrams.insert(file_id, file_trigrams);
229 self.unindexed_files.remove(&file_id);
230 }
231
232 pub fn remove_file(&mut self, path: &Path) {
233 let Some(file_id) = self.path_to_id.remove(path) else {
234 return;
235 };
236
237 if let Some(trigrams) = self.file_trigrams.remove(&file_id) {
238 for trigram in trigrams {
239 let should_remove = if let Some(postings) = self.postings.get_mut(&trigram) {
240 postings.retain(|posting| posting.file_id != file_id);
241 postings.is_empty()
242 } else {
243 false
244 };
245
246 if should_remove {
247 self.postings.remove(&trigram);
248 }
249 }
250 }
251
252 self.unindexed_files.remove(&file_id);
253 if let Some(file) = self.files.get_mut(file_id as usize) {
254 file.path = PathBuf::new();
255 file.size = 0;
256 file.modified = UNIX_EPOCH;
257 }
258 }
259
260 pub fn update_file(&mut self, path: &Path) {
261 self.remove_file(path);
262
263 let metadata = match fs::metadata(path) {
264 Ok(metadata) if metadata.is_file() => metadata,
265 _ => return,
266 };
267
268 if is_binary_path(path, metadata.len()) {
269 self.track_unindexed_file(path, &metadata);
270 return;
271 }
272
273 if metadata.len() > self.max_file_size {
274 self.track_unindexed_file(path, &metadata);
275 return;
276 }
277
278 let content = match fs::read(path) {
279 Ok(content) => content,
280 Err(_) => return,
281 };
282
283 if is_binary_bytes(&content) {
284 self.track_unindexed_file(path, &metadata);
285 return;
286 }
287
288 self.index_file(path, &content);
289 }
290
291 pub fn grep(
292 &self,
293 pattern: &str,
294 case_sensitive: bool,
295 include: &[String],
296 exclude: &[String],
297 search_root: &Path,
298 max_results: usize,
299 ) -> GrepResult {
300 self.search_grep(
301 pattern,
302 case_sensitive,
303 include,
304 exclude,
305 search_root,
306 max_results,
307 )
308 }
309
310 pub fn search_grep(
311 &self,
312 pattern: &str,
313 case_sensitive: bool,
314 include: &[String],
315 exclude: &[String],
316 search_root: &Path,
317 max_results: usize,
318 ) -> GrepResult {
319 let is_literal = !pattern.chars().any(|c| {
322 matches!(
323 c,
324 '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|' | '^' | '$' | '\\'
325 )
326 });
327
328 let literal_search = if is_literal {
329 if case_sensitive {
330 Some(LiteralSearch::CaseSensitive(pattern.as_bytes().to_vec()))
331 } else if pattern.is_ascii() {
332 Some(LiteralSearch::AsciiCaseInsensitive(
333 pattern
334 .as_bytes()
335 .iter()
336 .map(|byte| byte.to_ascii_lowercase())
337 .collect(),
338 ))
339 } else {
340 None
341 }
342 } else {
343 None
344 };
345
346 let regex = if literal_search.is_some() {
348 None
349 } else {
350 let regex_pattern = if is_literal {
351 regex::escape(pattern)
352 } else {
353 pattern.to_string()
354 };
355 let mut builder = RegexBuilder::new(®ex_pattern);
356 builder.case_insensitive(!case_sensitive);
357 builder.multi_line(true);
359 match builder.build() {
360 Ok(r) => Some(r),
361 Err(_) => {
362 return GrepResult {
363 matches: Vec::new(),
364 total_matches: 0,
365 files_searched: 0,
366 files_with_matches: 0,
367 index_status: if self.ready {
368 IndexStatus::Ready
369 } else {
370 IndexStatus::Building
371 },
372 truncated: false,
373 };
374 }
375 }
376 };
377
378 let matcher = if let Some(literal_search) = literal_search {
379 SearchMatcher::Literal(literal_search)
380 } else {
381 SearchMatcher::Regex(
382 regex.expect("regex should exist when literal matcher is unavailable"),
383 )
384 };
385
386 let filters = match build_path_filters(include, exclude) {
387 Ok(filters) => filters,
388 Err(_) => PathFilters::default(),
389 };
390 let search_root = canonicalize_or_normalize(search_root);
391
392 let query = decompose_regex(pattern);
393 let candidate_ids = self.candidates(&query);
394
395 let candidate_files: Vec<&FileEntry> = candidate_ids
396 .into_iter()
397 .filter_map(|file_id| self.files.get(file_id as usize))
398 .filter(|file| !file.path.as_os_str().is_empty())
399 .filter(|file| is_within_search_root(&search_root, &file.path))
400 .filter(|file| filters.matches(&self.project_root, &file.path))
401 .collect();
402
403 let total_matches = AtomicUsize::new(0);
404 let files_searched = AtomicUsize::new(0);
405 let files_with_matches = AtomicUsize::new(0);
406 let truncated = AtomicBool::new(false);
407 let stop_after = max_results.saturating_mul(2);
408
409 let mut matches = if candidate_files.len() > 10 {
410 candidate_files
411 .par_iter()
412 .map(|file| {
413 search_candidate_file(
414 file,
415 &matcher,
416 max_results,
417 stop_after,
418 &total_matches,
419 &files_searched,
420 &files_with_matches,
421 &truncated,
422 )
423 })
424 .reduce(Vec::new, |mut left, mut right| {
425 left.append(&mut right);
426 left
427 })
428 } else {
429 let mut matches = Vec::new();
430 for file in candidate_files {
431 matches.extend(search_candidate_file(
432 file,
433 &matcher,
434 max_results,
435 stop_after,
436 &total_matches,
437 &files_searched,
438 &files_with_matches,
439 &truncated,
440 ));
441
442 if should_stop_search(&truncated, &total_matches, stop_after) {
443 break;
444 }
445 }
446 matches
447 };
448
449 sort_shared_grep_matches_by_cached_mtime_desc(&mut matches, |path| {
450 self.path_to_id
451 .get(path)
452 .and_then(|file_id| self.files.get(*file_id as usize))
453 .map(|file| file.modified)
454 });
455
456 let matches = matches
457 .into_iter()
458 .map(|matched| GrepMatch {
459 file: matched.file.as_ref().clone(),
460 line: matched.line,
461 column: matched.column,
462 line_text: matched.line_text,
463 match_text: matched.match_text,
464 })
465 .collect();
466
467 GrepResult {
468 total_matches: total_matches.load(Ordering::Relaxed),
469 matches,
470 files_searched: files_searched.load(Ordering::Relaxed),
471 files_with_matches: files_with_matches.load(Ordering::Relaxed),
472 index_status: if self.ready {
473 IndexStatus::Ready
474 } else {
475 IndexStatus::Building
476 },
477 truncated: truncated.load(Ordering::Relaxed),
478 }
479 }
480
481 pub fn glob(&self, pattern: &str, search_root: &Path) -> Vec<PathBuf> {
482 let filters = match build_path_filters(&[pattern.to_string()], &[]) {
483 Ok(filters) => filters,
484 Err(_) => return Vec::new(),
485 };
486 let search_root = canonicalize_or_normalize(search_root);
487 let mut entries = self
488 .files
489 .iter()
490 .filter(|file| !file.path.as_os_str().is_empty())
491 .filter(|file| is_within_search_root(&search_root, &file.path))
492 .filter(|file| filters.matches(&self.project_root, &file.path))
493 .map(|file| (file.path.clone(), file.modified))
494 .collect::<Vec<_>>();
495
496 entries.sort_by(|(left_path, left_mtime), (right_path, right_mtime)| {
497 right_mtime
498 .cmp(left_mtime)
499 .then_with(|| left_path.cmp(right_path))
500 });
501
502 entries.into_iter().map(|(path, _)| path).collect()
503 }
504
505 pub fn candidates(&self, query: &RegexQuery) -> Vec<u32> {
506 if query.and_trigrams.is_empty() && query.or_groups.is_empty() {
507 return self.active_file_ids();
508 }
509
510 let mut and_trigrams = query.and_trigrams.clone();
511 and_trigrams.sort_unstable_by_key(|trigram| self.postings.get(trigram).map_or(0, Vec::len));
512
513 let mut current: Option<Vec<u32>> = None;
514
515 for trigram in and_trigrams {
516 let filter = query.and_filters.get(&trigram).copied();
517 let matches = self.postings_for_trigram(trigram, filter);
518 current = Some(match current.take() {
519 Some(existing) => intersect_sorted_ids(&existing, &matches),
520 None => matches,
521 });
522
523 if current.as_ref().is_some_and(|ids| ids.is_empty()) {
524 break;
525 }
526 }
527
528 let mut current = current.unwrap_or_else(|| self.active_file_ids());
529
530 for (index, group) in query.or_groups.iter().enumerate() {
531 let mut group_matches = Vec::new();
532 let filters = query.or_filters.get(index);
533
534 for trigram in group {
535 let filter = filters.and_then(|filters| filters.get(trigram).copied());
536 let matches = self.postings_for_trigram(*trigram, filter);
537 if group_matches.is_empty() {
538 group_matches = matches;
539 } else {
540 group_matches = union_sorted_ids(&group_matches, &matches);
541 }
542 }
543
544 current = intersect_sorted_ids(¤t, &group_matches);
545 if current.is_empty() {
546 break;
547 }
548 }
549
550 let mut unindexed = self
551 .unindexed_files
552 .iter()
553 .copied()
554 .filter(|file_id| self.is_active_file(*file_id))
555 .collect::<Vec<_>>();
556 if !unindexed.is_empty() {
557 unindexed.sort_unstable();
558 current = union_sorted_ids(¤t, &unindexed);
559 }
560
561 current
562 }
563
564 pub fn write_to_disk(&self, cache_dir: &Path, git_head: Option<&str>) {
565 if fs::create_dir_all(cache_dir).is_err() {
566 return;
567 }
568
569 let postings_path = cache_dir.join("postings.bin");
570 let lookup_path = cache_dir.join("lookup.bin");
571 let tmp_postings = cache_dir.join("postings.bin.tmp");
572 let tmp_lookup = cache_dir.join("lookup.bin.tmp");
573
574 let active_ids = self.active_file_ids();
575 let mut id_map = HashMap::new();
576 for (new_id, old_id) in active_ids.iter().enumerate() {
577 let Ok(new_id_u32) = u32::try_from(new_id) else {
578 return;
579 };
580 id_map.insert(*old_id, new_id_u32);
581 }
582
583 let write_result = (|| -> std::io::Result<()> {
584 let mut postings_writer = BufWriter::new(File::create(&tmp_postings)?);
585
586 postings_writer.write_all(INDEX_MAGIC)?;
587 write_u32(&mut postings_writer, INDEX_VERSION)?;
588
589 let head = git_head.unwrap_or_default();
590 let root = self.project_root.to_string_lossy();
591 let head_len = u32::try_from(head.len())
592 .map_err(|_| std::io::Error::other("git head too large to cache"))?;
593 let root_len = u32::try_from(root.len())
594 .map_err(|_| std::io::Error::other("project root too large to cache"))?;
595 let file_count = u32::try_from(active_ids.len())
596 .map_err(|_| std::io::Error::other("too many files to cache"))?;
597
598 write_u32(&mut postings_writer, head_len)?;
599 write_u32(&mut postings_writer, root_len)?;
600 write_u64(&mut postings_writer, self.max_file_size)?;
601 write_u32(&mut postings_writer, file_count)?;
602 postings_writer.write_all(head.as_bytes())?;
603 postings_writer.write_all(root.as_bytes())?;
604
605 for old_id in &active_ids {
606 let Some(file) = self.files.get(*old_id as usize) else {
607 return Err(std::io::Error::other("missing file entry for cache write"));
608 };
609 let path = relative_to_root(&self.project_root, &file.path);
610 let path = path.to_string_lossy();
611 let path_len = u32::try_from(path.len())
612 .map_err(|_| std::io::Error::other("cached path too large"))?;
613 let modified = file
614 .modified
615 .duration_since(UNIX_EPOCH)
616 .unwrap_or(Duration::ZERO);
617 let unindexed = if self.unindexed_files.contains(old_id) {
618 1u8
619 } else {
620 0u8
621 };
622
623 postings_writer.write_all(&[unindexed])?;
624 write_u32(&mut postings_writer, path_len)?;
625 write_u64(&mut postings_writer, file.size)?;
626 write_u64(&mut postings_writer, modified.as_secs())?;
627 write_u32(&mut postings_writer, modified.subsec_nanos())?;
628 postings_writer.write_all(path.as_bytes())?;
629 }
630
631 let mut lookup_entries = Vec::new();
632 let mut postings_blob = Vec::new();
633 let mut sorted_postings: Vec<_> = self.postings.iter().collect();
634 sorted_postings.sort_by_key(|(trigram, _)| **trigram);
635
636 for (trigram, postings) in sorted_postings {
637 let offset = u64::try_from(postings_blob.len())
638 .map_err(|_| std::io::Error::other("postings blob too large"))?;
639 let mut count = 0u32;
640
641 for posting in postings {
642 let Some(mapped_file_id) = id_map.get(&posting.file_id).copied() else {
643 continue;
644 };
645
646 postings_blob.extend_from_slice(&mapped_file_id.to_le_bytes());
647 postings_blob.push(posting.next_mask);
648 postings_blob.push(posting.loc_mask);
649 count = count.saturating_add(1);
650 }
651
652 if count > 0 {
653 lookup_entries.push((*trigram, offset, count));
654 }
655 }
656
657 write_u64(
658 &mut postings_writer,
659 u64::try_from(postings_blob.len())
660 .map_err(|_| std::io::Error::other("postings blob too large"))?,
661 )?;
662 postings_writer.write_all(&postings_blob)?;
663 write_crc32(&mut postings_writer, &tmp_postings)?;
664 postings_writer.flush()?;
665 drop(postings_writer);
666
667 let mut lookup_writer = BufWriter::new(File::create(&tmp_lookup)?);
668 let entry_count = u32::try_from(lookup_entries.len())
669 .map_err(|_| std::io::Error::other("too many lookup entries to cache"))?;
670
671 lookup_writer.write_all(LOOKUP_MAGIC)?;
672 write_u32(&mut lookup_writer, INDEX_VERSION)?;
673 write_u32(&mut lookup_writer, entry_count)?;
674
675 for (trigram, offset, count) in lookup_entries {
676 write_u32(&mut lookup_writer, trigram)?;
677 write_u64(&mut lookup_writer, offset)?;
678 write_u32(&mut lookup_writer, count)?;
679 }
680
681 write_crc32(&mut lookup_writer, &tmp_lookup)?;
682 lookup_writer.flush()?;
683 drop(lookup_writer);
684
685 fs::rename(&tmp_postings, &postings_path)?;
686 fs::rename(&tmp_lookup, &lookup_path)?;
687
688 Ok(())
689 })();
690
691 if write_result.is_err() {
692 let _ = fs::remove_file(&tmp_postings);
693 let _ = fs::remove_file(&tmp_lookup);
694 }
695 }
696
697 pub fn read_from_disk(cache_dir: &Path) -> Option<Self> {
698 let postings_path = cache_dir.join("postings.bin");
699 let lookup_path = cache_dir.join("lookup.bin");
700
701 let mut postings_reader = BufReader::new(File::open(&postings_path).ok()?);
702 let mut lookup_reader = BufReader::new(File::open(&lookup_path).ok()?);
703 let postings_len_total =
704 usize::try_from(postings_reader.get_ref().metadata().ok()?.len()).ok()?;
705 let lookup_len_total =
706 usize::try_from(lookup_reader.get_ref().metadata().ok()?.len()).ok()?;
707 if postings_len_total < 4 || lookup_len_total < 4 {
708 return None;
709 }
710 verify_crc32(&postings_path).ok()?;
711 verify_crc32(&lookup_path).ok()?;
712
713 let mut magic = [0u8; 8];
714 postings_reader.read_exact(&mut magic).ok()?;
715 if &magic != INDEX_MAGIC {
716 return None;
717 }
718 if read_u32(&mut postings_reader).ok()? != INDEX_VERSION {
719 return None;
720 }
721
722 let head_len = read_u32(&mut postings_reader).ok()? as usize;
723 let root_len = read_u32(&mut postings_reader).ok()? as usize;
724 let max_file_size = read_u64(&mut postings_reader).ok()?;
725 let file_count = read_u32(&mut postings_reader).ok()? as usize;
726 if file_count > MAX_ENTRIES {
727 return None;
728 }
729 let postings_body_len = postings_len_total.checked_sub(4)?;
730 let lookup_body_len = lookup_len_total.checked_sub(4)?;
731
732 let remaining_postings = remaining_bytes(&mut postings_reader, postings_body_len)?;
733 let minimum_file_bytes = file_count.checked_mul(MIN_FILE_ENTRY_BYTES)?;
734 if minimum_file_bytes > remaining_postings {
735 return None;
736 }
737
738 if head_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
739 return None;
740 }
741 let mut head_bytes = vec![0u8; head_len];
742 postings_reader.read_exact(&mut head_bytes).ok()?;
743 let git_head = String::from_utf8(head_bytes)
744 .ok()
745 .filter(|head| !head.is_empty());
746
747 if root_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
748 return None;
749 }
750 let mut root_bytes = vec![0u8; root_len];
751 postings_reader.read_exact(&mut root_bytes).ok()?;
752 let project_root = PathBuf::from(String::from_utf8(root_bytes).ok()?);
753
754 let mut files = Vec::with_capacity(file_count);
755 let mut path_to_id = HashMap::new();
756 let mut unindexed_files = HashSet::new();
757
758 for file_id in 0..file_count {
759 let mut unindexed = [0u8; 1];
760 postings_reader.read_exact(&mut unindexed).ok()?;
761 let path_len = read_u32(&mut postings_reader).ok()? as usize;
762 let size = read_u64(&mut postings_reader).ok()?;
763 let secs = read_u64(&mut postings_reader).ok()?;
764 let nanos = read_u32(&mut postings_reader).ok()?;
765 if nanos >= 1_000_000_000 {
766 return None;
767 }
768 if path_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
769 return None;
770 }
771 let mut path_bytes = vec![0u8; path_len];
772 postings_reader.read_exact(&mut path_bytes).ok()?;
773 let relative_path = PathBuf::from(String::from_utf8(path_bytes).ok()?);
774 let full_path = project_root.join(relative_path);
775 let file_id_u32 = u32::try_from(file_id).ok()?;
776
777 files.push(FileEntry {
778 path: full_path.clone(),
779 size,
780 modified: UNIX_EPOCH + Duration::new(secs, nanos),
781 });
782 path_to_id.insert(full_path, file_id_u32);
783 if unindexed[0] == 1 {
784 unindexed_files.insert(file_id_u32);
785 }
786 }
787
788 let postings_len = read_u64(&mut postings_reader).ok()? as usize;
789 let max_postings_bytes = MAX_ENTRIES.checked_mul(POSTING_BYTES)?;
790 if postings_len > max_postings_bytes {
791 return None;
792 }
793 if postings_len > remaining_bytes(&mut postings_reader, postings_body_len)? {
794 return None;
795 }
796 let mut postings_blob = vec![0u8; postings_len];
797 postings_reader.read_exact(&mut postings_blob).ok()?;
798
799 let mut lookup_magic = [0u8; 8];
800 lookup_reader.read_exact(&mut lookup_magic).ok()?;
801 if &lookup_magic != LOOKUP_MAGIC {
802 return None;
803 }
804 if read_u32(&mut lookup_reader).ok()? != INDEX_VERSION {
805 return None;
806 }
807 let entry_count = read_u32(&mut lookup_reader).ok()? as usize;
808 if entry_count > MAX_ENTRIES {
809 return None;
810 }
811 let remaining_lookup = remaining_bytes(&mut lookup_reader, lookup_body_len)?;
812 let minimum_lookup_bytes = entry_count.checked_mul(LOOKUP_ENTRY_BYTES)?;
813 if minimum_lookup_bytes > remaining_lookup {
814 return None;
815 }
816
817 let mut postings = HashMap::new();
818 let mut file_trigrams: HashMap<u32, Vec<u32>> = HashMap::new();
819
820 for _ in 0..entry_count {
821 let trigram = read_u32(&mut lookup_reader).ok()?;
822 let offset = read_u64(&mut lookup_reader).ok()? as usize;
823 let count = read_u32(&mut lookup_reader).ok()? as usize;
824 if count > MAX_ENTRIES {
825 return None;
826 }
827 let bytes_len = count.checked_mul(POSTING_BYTES)?;
828 let end = offset.checked_add(bytes_len)?;
829 if end > postings_blob.len() {
830 return None;
831 }
832
833 let mut trigram_postings = Vec::with_capacity(count);
834 for chunk in postings_blob[offset..end].chunks_exact(6) {
835 let file_id = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
836 let posting = Posting {
837 file_id,
838 next_mask: chunk[4],
839 loc_mask: chunk[5],
840 };
841 trigram_postings.push(posting.clone());
842 file_trigrams.entry(file_id).or_default().push(trigram);
843 }
844 postings.insert(trigram, trigram_postings);
845 }
846
847 Some(SearchIndex {
848 postings,
849 files,
850 path_to_id,
851 ready: true,
852 project_root,
853 git_head,
854 max_file_size,
855 file_trigrams,
856 unindexed_files,
857 })
858 }
859
860 pub(crate) fn stored_git_head(&self) -> Option<&str> {
861 self.git_head.as_deref()
862 }
863
864 pub(crate) fn set_ready(&mut self, ready: bool) {
865 self.ready = ready;
866 }
867
868 pub(crate) fn rebuild_or_refresh(
869 root: &Path,
870 max_file_size: u64,
871 current_head: Option<String>,
872 baseline: Option<SearchIndex>,
873 ) -> Self {
874 if current_head.is_none() {
875 return SearchIndex::build_with_limit(root, max_file_size);
876 }
877
878 if let Some(mut baseline) = baseline {
879 baseline.project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
880 baseline.max_file_size = max_file_size;
881
882 if baseline.git_head == current_head {
883 verify_file_mtimes(&mut baseline);
887 baseline.ready = true;
888 return baseline;
889 }
890
891 if let (Some(previous), Some(current)) =
892 (baseline.git_head.clone(), current_head.clone())
893 {
894 let project_root = baseline.project_root.clone();
895 if apply_git_diff_updates(&mut baseline, &project_root, &previous, ¤t) {
896 baseline.git_head = Some(current);
897 baseline.ready = true;
898 return baseline;
899 }
900 }
901 }
902
903 SearchIndex::build_with_limit(root, max_file_size)
904 }
905
906 fn allocate_file_id(&mut self, path: &Path, size_hint: u64) -> Option<u32> {
907 let file_id = u32::try_from(self.files.len()).ok()?;
908 let metadata = fs::metadata(path).ok();
909 let size = metadata
910 .as_ref()
911 .map_or(size_hint, |metadata| metadata.len());
912 let modified = metadata
913 .and_then(|metadata| metadata.modified().ok())
914 .unwrap_or(UNIX_EPOCH);
915
916 self.files.push(FileEntry {
917 path: path.to_path_buf(),
918 size,
919 modified,
920 });
921 self.path_to_id.insert(path.to_path_buf(), file_id);
922 Some(file_id)
923 }
924
925 fn track_unindexed_file(&mut self, path: &Path, metadata: &fs::Metadata) {
926 let Some(file_id) = self.allocate_file_id(path, metadata.len()) else {
927 return;
928 };
929 self.unindexed_files.insert(file_id);
930 self.file_trigrams.insert(file_id, Vec::new());
931 }
932
933 fn active_file_ids(&self) -> Vec<u32> {
934 let mut ids: Vec<u32> = self.path_to_id.values().copied().collect();
935 ids.sort_unstable();
936 ids
937 }
938
939 fn is_active_file(&self, file_id: u32) -> bool {
940 self.files
941 .get(file_id as usize)
942 .map(|file| !file.path.as_os_str().is_empty())
943 .unwrap_or(false)
944 }
945
946 fn postings_for_trigram(&self, trigram: u32, filter: Option<PostingFilter>) -> Vec<u32> {
947 let Some(postings) = self.postings.get(&trigram) else {
948 return Vec::new();
949 };
950
951 let mut matches = Vec::with_capacity(postings.len());
952
953 for posting in postings {
954 if let Some(filter) = filter {
955 if filter.next_mask != 0 && posting.next_mask & filter.next_mask == 0 {
958 continue;
959 }
960 }
965 if self.is_active_file(posting.file_id) {
966 matches.push(posting.file_id);
967 }
968 }
969
970 matches
971 }
972}
973
974fn search_candidate_file(
975 file: &FileEntry,
976 matcher: &SearchMatcher,
977 max_results: usize,
978 stop_after: usize,
979 total_matches: &AtomicUsize,
980 files_searched: &AtomicUsize,
981 files_with_matches: &AtomicUsize,
982 truncated: &AtomicBool,
983) -> Vec<SharedGrepMatch> {
984 if should_stop_search(truncated, total_matches, stop_after) {
985 return Vec::new();
986 }
987
988 let content = match read_indexed_file_bytes(&file.path) {
989 Some(content) => content,
990 None => return Vec::new(),
991 };
992 if is_binary_bytes(&content) {
999 return Vec::new();
1000 }
1001 files_searched.fetch_add(1, Ordering::Relaxed);
1002
1003 let shared_path = Arc::new(file.path.clone());
1004 let mut matches = Vec::new();
1005 let mut line_starts = None;
1006 let mut seen_lines = HashSet::new();
1007 let mut matched_this_file = false;
1008
1009 match matcher {
1010 SearchMatcher::Literal(LiteralSearch::CaseSensitive(needle)) => {
1011 let finder = memchr::memmem::Finder::new(needle);
1012 let mut start = 0;
1013
1014 while let Some(position) = finder.find(&content[start..]) {
1015 if should_stop_search(truncated, total_matches, stop_after) {
1016 break;
1017 }
1018
1019 let offset = start + position;
1020 start = offset + 1;
1021
1022 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1023 let (line, column, line_text) = line_details_bytes(&content, line_starts, offset);
1024 if !seen_lines.insert(line) {
1025 continue;
1026 }
1027
1028 matched_this_file = true;
1029 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1030 if match_number > max_results {
1031 truncated.store(true, Ordering::Relaxed);
1032 break;
1033 }
1034
1035 let end = offset + needle.len();
1036 matches.push(SharedGrepMatch {
1037 file: shared_path.clone(),
1038 line,
1039 column,
1040 line_text,
1041 match_text: String::from_utf8_lossy(&content[offset..end]).into_owned(),
1042 });
1043 }
1044 }
1045 SearchMatcher::Literal(LiteralSearch::AsciiCaseInsensitive(needle)) => {
1046 let search_content = content.to_ascii_lowercase();
1047 let finder = memchr::memmem::Finder::new(needle);
1048 let mut start = 0;
1049
1050 while let Some(position) = finder.find(&search_content[start..]) {
1051 if should_stop_search(truncated, total_matches, stop_after) {
1052 break;
1053 }
1054
1055 let offset = start + position;
1056 start = offset + 1;
1057
1058 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1059 let (line, column, line_text) = line_details_bytes(&content, line_starts, offset);
1060 if !seen_lines.insert(line) {
1061 continue;
1062 }
1063
1064 matched_this_file = true;
1065 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1066 if match_number > max_results {
1067 truncated.store(true, Ordering::Relaxed);
1068 break;
1069 }
1070
1071 let end = offset + needle.len();
1072 matches.push(SharedGrepMatch {
1073 file: shared_path.clone(),
1074 line,
1075 column,
1076 line_text,
1077 match_text: String::from_utf8_lossy(&content[offset..end]).into_owned(),
1078 });
1079 }
1080 }
1081 SearchMatcher::Regex(regex) => {
1082 for matched in regex.find_iter(&content) {
1083 if should_stop_search(truncated, total_matches, stop_after) {
1084 break;
1085 }
1086
1087 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1088 let (line, column, line_text) =
1089 line_details_bytes(&content, line_starts, matched.start());
1090 if !seen_lines.insert(line) {
1091 continue;
1092 }
1093
1094 matched_this_file = true;
1095 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1096 if match_number > max_results {
1097 truncated.store(true, Ordering::Relaxed);
1098 break;
1099 }
1100
1101 matches.push(SharedGrepMatch {
1102 file: shared_path.clone(),
1103 line,
1104 column,
1105 line_text,
1106 match_text: String::from_utf8_lossy(matched.as_bytes()).into_owned(),
1107 });
1108 }
1109 }
1110 }
1111
1112 if matched_this_file {
1113 files_with_matches.fetch_add(1, Ordering::Relaxed);
1114 }
1115
1116 matches
1117}
1118
1119fn should_stop_search(
1120 truncated: &AtomicBool,
1121 total_matches: &AtomicUsize,
1122 stop_after: usize,
1123) -> bool {
1124 truncated.load(Ordering::Relaxed) && total_matches.load(Ordering::Relaxed) >= stop_after
1125}
1126
1127fn intersect_sorted_ids(left: &[u32], right: &[u32]) -> Vec<u32> {
1128 let mut merged = Vec::with_capacity(left.len().min(right.len()));
1129 let mut left_index = 0;
1130 let mut right_index = 0;
1131
1132 while left_index < left.len() && right_index < right.len() {
1133 match left[left_index].cmp(&right[right_index]) {
1134 std::cmp::Ordering::Less => left_index += 1,
1135 std::cmp::Ordering::Greater => right_index += 1,
1136 std::cmp::Ordering::Equal => {
1137 merged.push(left[left_index]);
1138 left_index += 1;
1139 right_index += 1;
1140 }
1141 }
1142 }
1143
1144 merged
1145}
1146
1147fn union_sorted_ids(left: &[u32], right: &[u32]) -> Vec<u32> {
1148 let mut merged = Vec::with_capacity(left.len() + right.len());
1149 let mut left_index = 0;
1150 let mut right_index = 0;
1151
1152 while left_index < left.len() && right_index < right.len() {
1153 match left[left_index].cmp(&right[right_index]) {
1154 std::cmp::Ordering::Less => {
1155 merged.push(left[left_index]);
1156 left_index += 1;
1157 }
1158 std::cmp::Ordering::Greater => {
1159 merged.push(right[right_index]);
1160 right_index += 1;
1161 }
1162 std::cmp::Ordering::Equal => {
1163 merged.push(left[left_index]);
1164 left_index += 1;
1165 right_index += 1;
1166 }
1167 }
1168 }
1169
1170 merged.extend_from_slice(&left[left_index..]);
1171 merged.extend_from_slice(&right[right_index..]);
1172 merged
1173}
1174
1175pub fn decompose_regex(pattern: &str) -> RegexQuery {
1176 let hir = match regex_syntax::parse(pattern) {
1177 Ok(hir) => hir,
1178 Err(_) => return RegexQuery::default(),
1179 };
1180
1181 let build = build_query(&hir);
1182 build.into_query()
1183}
1184
1185pub fn pack_trigram(a: u8, b: u8, c: u8) -> u32 {
1186 ((a as u32) << 16) | ((b as u32) << 8) | c as u32
1187}
1188
1189pub fn normalize_char(c: u8) -> u8 {
1190 c.to_ascii_lowercase()
1191}
1192
1193pub fn extract_trigrams(content: &[u8]) -> Vec<(u32, u8, usize)> {
1194 if content.len() < 3 {
1195 return Vec::new();
1196 }
1197
1198 let mut trigrams = Vec::with_capacity(content.len().saturating_sub(2));
1199 for start in 0..=content.len() - 3 {
1200 let trigram = pack_trigram(
1201 normalize_char(content[start]),
1202 normalize_char(content[start + 1]),
1203 normalize_char(content[start + 2]),
1204 );
1205 let next_char = content.get(start + 3).copied().unwrap_or(EOF_SENTINEL);
1206 trigrams.push((trigram, next_char, start));
1207 }
1208 trigrams
1209}
1210
1211pub fn resolve_cache_dir(project_root: &Path, storage_dir: Option<&Path>) -> PathBuf {
1212 if let Some(override_dir) = std::env::var_os("AFT_CACHE_DIR") {
1214 return PathBuf::from(override_dir)
1215 .join("index")
1216 .join(project_cache_key(project_root));
1217 }
1218 if let Some(dir) = storage_dir {
1220 return dir.join("index").join(project_cache_key(project_root));
1221 }
1222 let home = std::env::var_os("HOME")
1227 .or_else(|| std::env::var_os("USERPROFILE"))
1228 .map(PathBuf::from)
1229 .unwrap_or_else(std::env::temp_dir);
1230 home.join(".cache")
1231 .join("aft")
1232 .join("index")
1233 .join(project_cache_key(project_root))
1234}
1235
1236pub(crate) fn build_path_filters(
1237 include: &[String],
1238 exclude: &[String],
1239) -> Result<PathFilters, String> {
1240 Ok(PathFilters {
1241 includes: build_globset(include)?,
1242 excludes: build_globset(exclude)?,
1243 })
1244}
1245
1246pub(crate) fn walk_project_files(root: &Path, filters: &PathFilters) -> Vec<PathBuf> {
1247 walk_project_files_from(root, root, filters)
1248}
1249
1250pub(crate) fn walk_project_files_from(
1251 filter_root: &Path,
1252 search_root: &Path,
1253 filters: &PathFilters,
1254) -> Vec<PathBuf> {
1255 let mut builder = WalkBuilder::new(search_root);
1256 builder
1257 .hidden(false)
1258 .git_ignore(true)
1259 .git_global(true)
1260 .git_exclude(true)
1261 .filter_entry(|entry| {
1262 let name = entry.file_name().to_string_lossy();
1263 if entry.file_type().map_or(false, |ft| ft.is_dir()) {
1264 return !matches!(
1265 name.as_ref(),
1266 "node_modules"
1267 | "target"
1268 | "venv"
1269 | ".venv"
1270 | ".git"
1271 | "__pycache__"
1272 | ".tox"
1273 | "dist"
1274 | "build"
1275 );
1276 }
1277 true
1278 });
1279
1280 let mut files = Vec::new();
1281 for entry in builder.build().filter_map(|entry| entry.ok()) {
1282 if !entry
1283 .file_type()
1284 .map_or(false, |file_type| file_type.is_file())
1285 {
1286 continue;
1287 }
1288 let path = entry.into_path();
1289 if filters.matches(filter_root, &path) {
1290 files.push(path);
1291 }
1292 }
1293
1294 sort_paths_by_mtime_desc(&mut files);
1295 files
1296}
1297
1298pub(crate) fn read_searchable_text(path: &Path) -> Option<String> {
1299 let bytes = fs::read(path).ok()?;
1300 if is_binary_bytes(&bytes) {
1301 return None;
1302 }
1303 String::from_utf8(bytes).ok()
1304}
1305
1306fn read_indexed_file_bytes(path: &Path) -> Option<Vec<u8>> {
1307 fs::read(path).ok()
1308}
1309
1310pub(crate) fn relative_to_root(root: &Path, path: &Path) -> PathBuf {
1311 path.strip_prefix(root)
1312 .map(PathBuf::from)
1313 .unwrap_or_else(|_| path.to_path_buf())
1314}
1315
1316pub(crate) fn sort_paths_by_mtime_desc(paths: &mut [PathBuf]) {
1329 use std::collections::HashMap;
1330 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::with_capacity(paths.len());
1331 for path in paths.iter() {
1332 mtimes
1333 .entry(path.clone())
1334 .or_insert_with(|| path_modified_time(path));
1335 }
1336 paths.sort_by(|left, right| {
1337 let left_mtime = mtimes.get(left).and_then(|v| *v);
1338 let right_mtime = mtimes.get(right).and_then(|v| *v);
1339 right_mtime.cmp(&left_mtime).then_with(|| left.cmp(right))
1340 });
1341}
1342
1343pub(crate) fn sort_grep_matches_by_mtime_desc(matches: &mut [GrepMatch], project_root: &Path) {
1346 use std::collections::HashMap;
1347 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::new();
1348 for m in matches.iter() {
1349 mtimes.entry(m.file.clone()).or_insert_with(|| {
1350 let resolved = resolve_match_path(project_root, &m.file);
1351 path_modified_time(&resolved)
1352 });
1353 }
1354 matches.sort_by(|left, right| {
1355 let left_mtime = mtimes.get(&left.file).and_then(|v| *v);
1356 let right_mtime = mtimes.get(&right.file).and_then(|v| *v);
1357 right_mtime
1358 .cmp(&left_mtime)
1359 .then_with(|| left.file.cmp(&right.file))
1360 .then_with(|| left.line.cmp(&right.line))
1361 .then_with(|| left.column.cmp(&right.column))
1362 });
1363}
1364
1365fn sort_shared_grep_matches_by_cached_mtime_desc<F>(
1370 matches: &mut [SharedGrepMatch],
1371 modified_for_path: F,
1372) where
1373 F: Fn(&Path) -> Option<SystemTime>,
1374{
1375 use std::collections::HashMap;
1376 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::with_capacity(matches.len());
1377 for m in matches.iter() {
1378 let path = m.file.as_path().to_path_buf();
1379 mtimes
1380 .entry(path.clone())
1381 .or_insert_with(|| modified_for_path(&path));
1382 }
1383 matches.sort_by(|left, right| {
1384 let left_mtime = mtimes.get(left.file.as_path()).and_then(|v| *v);
1385 let right_mtime = mtimes.get(right.file.as_path()).and_then(|v| *v);
1386 right_mtime
1387 .cmp(&left_mtime)
1388 .then_with(|| left.file.as_path().cmp(right.file.as_path()))
1389 .then_with(|| left.line.cmp(&right.line))
1390 .then_with(|| left.column.cmp(&right.column))
1391 });
1392}
1393
1394pub(crate) fn resolve_search_scope(project_root: &Path, path: Option<&str>) -> SearchScope {
1395 let resolved_project_root = canonicalize_or_normalize(project_root);
1396 let root = match path {
1397 Some(path) => {
1398 let path = PathBuf::from(path);
1399 if path.is_absolute() {
1400 canonicalize_or_normalize(&path)
1401 } else {
1402 normalize_path(&resolved_project_root.join(path))
1403 }
1404 }
1405 None => resolved_project_root.clone(),
1406 };
1407
1408 let use_index = is_within_search_root(&resolved_project_root, &root);
1409 SearchScope { root, use_index }
1410}
1411
1412pub(crate) fn is_binary_bytes(content: &[u8]) -> bool {
1413 content_inspector::inspect(content).is_binary()
1414}
1415
1416pub(crate) fn current_git_head(root: &Path) -> Option<String> {
1417 run_git(root, &["rev-parse", "HEAD"])
1418}
1419
1420pub fn project_cache_key(project_root: &Path) -> String {
1421 use sha2::{Digest, Sha256};
1422
1423 let mut hasher = Sha256::new();
1424
1425 if let Some(root_commit) = run_git(project_root, &["rev-list", "--max-parents=0", "HEAD"]) {
1426 hasher.update(root_commit.as_bytes());
1429 } else {
1430 let canonical_root = canonicalize_or_normalize(project_root);
1432 hasher.update(canonical_root.to_string_lossy().as_bytes());
1433 }
1434
1435 let digest = format!("{:x}", hasher.finalize());
1436 digest[..16].to_string()
1437}
1438
1439impl PathFilters {
1440 fn matches(&self, root: &Path, path: &Path) -> bool {
1441 let relative = to_glob_path(&relative_to_root(root, path));
1442 if self
1443 .includes
1444 .as_ref()
1445 .is_some_and(|includes| !includes.is_match(&relative))
1446 {
1447 return false;
1448 }
1449 if self
1450 .excludes
1451 .as_ref()
1452 .is_some_and(|excludes| excludes.is_match(&relative))
1453 {
1454 return false;
1455 }
1456 true
1457 }
1458}
1459
1460fn canonicalize_or_normalize(path: &Path) -> PathBuf {
1461 fs::canonicalize(path).unwrap_or_else(|_| normalize_path(path))
1462}
1463
1464fn resolve_match_path(project_root: &Path, path: &Path) -> PathBuf {
1465 if path.is_absolute() {
1466 path.to_path_buf()
1467 } else {
1468 project_root.join(path)
1469 }
1470}
1471
1472fn path_modified_time(path: &Path) -> Option<SystemTime> {
1473 fs::metadata(path)
1474 .and_then(|metadata| metadata.modified())
1475 .ok()
1476}
1477
1478fn normalize_path(path: &Path) -> PathBuf {
1479 let mut result = PathBuf::new();
1480 for component in path.components() {
1481 match component {
1482 Component::ParentDir => {
1483 if !result.pop() {
1484 result.push(component);
1485 }
1486 }
1487 Component::CurDir => {}
1488 _ => result.push(component),
1489 }
1490 }
1491 result
1492}
1493
1494fn verify_file_mtimes(index: &mut SearchIndex) {
1497 let mut stale_paths = Vec::new();
1499 for entry in &index.files {
1500 if entry.path.as_os_str().is_empty() {
1501 continue; }
1503 match fs::metadata(&entry.path) {
1504 Ok(meta) => {
1505 let current_mtime = meta.modified().unwrap_or(UNIX_EPOCH);
1506 if current_mtime != entry.modified || meta.len() != entry.size {
1507 stale_paths.push(entry.path.clone());
1508 }
1509 }
1510 Err(_) => {
1511 stale_paths.push(entry.path.clone());
1513 }
1514 }
1515 }
1516
1517 for path in &stale_paths {
1519 index.update_file(path);
1520 }
1521
1522 let filters = PathFilters::default();
1524 for path in walk_project_files(&index.project_root, &filters) {
1525 if !index.path_to_id.contains_key(&path) {
1526 index.update_file(&path);
1527 }
1528 }
1529
1530 if !stale_paths.is_empty() {
1531 log::info!(
1532 "search index: refreshed {} stale file(s) from disk cache",
1533 stale_paths.len()
1534 );
1535 }
1536}
1537
1538fn is_within_search_root(search_root: &Path, path: &Path) -> bool {
1539 path.starts_with(search_root)
1540}
1541
1542impl QueryBuild {
1543 fn into_query(self) -> RegexQuery {
1544 let mut query = RegexQuery::default();
1545
1546 for run in self.and_runs {
1547 add_run_to_and_query(&mut query, &run);
1548 }
1549
1550 for group in self.or_groups {
1551 let mut trigrams = BTreeSet::new();
1552 let mut filters = HashMap::new();
1553 for run in group {
1554 for (trigram, filter) in trigram_filters(&run) {
1555 trigrams.insert(trigram);
1556 merge_filter(filters.entry(trigram).or_default(), filter);
1557 }
1558 }
1559 if !trigrams.is_empty() {
1560 query.or_groups.push(trigrams.into_iter().collect());
1561 query.or_filters.push(filters);
1562 }
1563 }
1564
1565 query
1566 }
1567}
1568
1569fn build_query(hir: &Hir) -> QueryBuild {
1570 match hir.kind() {
1571 HirKind::Literal(literal) => {
1572 if literal.0.len() >= 3 {
1573 QueryBuild {
1574 and_runs: vec![literal.0.to_vec()],
1575 or_groups: Vec::new(),
1576 }
1577 } else {
1578 QueryBuild::default()
1579 }
1580 }
1581 HirKind::Capture(capture) => build_query(&capture.sub),
1582 HirKind::Concat(parts) => {
1583 let mut build = QueryBuild::default();
1584 for part in parts {
1585 let part_build = build_query(part);
1586 build.and_runs.extend(part_build.and_runs);
1587 build.or_groups.extend(part_build.or_groups);
1588 }
1589 build
1590 }
1591 HirKind::Alternation(parts) => {
1592 let mut group = Vec::new();
1593 for part in parts {
1594 let Some(mut choices) = guaranteed_run_choices(part) else {
1595 return QueryBuild::default();
1596 };
1597 group.append(&mut choices);
1598 }
1599 if group.is_empty() {
1600 QueryBuild::default()
1601 } else {
1602 QueryBuild {
1603 and_runs: Vec::new(),
1604 or_groups: vec![group],
1605 }
1606 }
1607 }
1608 HirKind::Repetition(repetition) => {
1609 if repetition.min == 0 {
1610 QueryBuild::default()
1611 } else {
1612 build_query(&repetition.sub)
1613 }
1614 }
1615 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => QueryBuild::default(),
1616 }
1617}
1618
1619fn guaranteed_run_choices(hir: &Hir) -> Option<Vec<Vec<u8>>> {
1620 match hir.kind() {
1621 HirKind::Literal(literal) => {
1622 if literal.0.len() >= 3 {
1623 Some(vec![literal.0.to_vec()])
1624 } else {
1625 None
1626 }
1627 }
1628 HirKind::Capture(capture) => guaranteed_run_choices(&capture.sub),
1629 HirKind::Concat(parts) => {
1630 let mut runs = Vec::new();
1631 for part in parts {
1632 if let Some(mut part_runs) = guaranteed_run_choices(part) {
1633 runs.append(&mut part_runs);
1634 }
1635 }
1636 if runs.is_empty() {
1637 None
1638 } else {
1639 Some(runs)
1640 }
1641 }
1642 HirKind::Alternation(parts) => {
1643 let mut runs = Vec::new();
1644 for part in parts {
1645 let Some(mut part_runs) = guaranteed_run_choices(part) else {
1646 return None;
1647 };
1648 runs.append(&mut part_runs);
1649 }
1650 if runs.is_empty() {
1651 None
1652 } else {
1653 Some(runs)
1654 }
1655 }
1656 HirKind::Repetition(repetition) => {
1657 if repetition.min == 0 {
1658 None
1659 } else {
1660 guaranteed_run_choices(&repetition.sub)
1661 }
1662 }
1663 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => None,
1664 }
1665}
1666
1667fn add_run_to_and_query(query: &mut RegexQuery, run: &[u8]) {
1668 for (trigram, filter) in trigram_filters(run) {
1669 if !query.and_trigrams.contains(&trigram) {
1670 query.and_trigrams.push(trigram);
1671 }
1672 merge_filter(query.and_filters.entry(trigram).or_default(), filter);
1673 }
1674}
1675
1676fn trigram_filters(run: &[u8]) -> Vec<(u32, PostingFilter)> {
1677 let mut filters: BTreeMap<u32, PostingFilter> = BTreeMap::new();
1678 for (trigram, next_char, position) in extract_trigrams(run) {
1679 let entry: &mut PostingFilter = filters.entry(trigram).or_default();
1680 if next_char != EOF_SENTINEL {
1681 entry.next_mask |= mask_for_next_char(next_char);
1682 }
1683 entry.loc_mask |= mask_for_position(position);
1684 }
1685 filters.into_iter().collect()
1686}
1687
1688fn merge_filter(target: &mut PostingFilter, filter: PostingFilter) {
1689 target.next_mask |= filter.next_mask;
1690 target.loc_mask |= filter.loc_mask;
1691}
1692
1693fn mask_for_next_char(next_char: u8) -> u8 {
1694 let bit = (normalize_char(next_char).wrapping_mul(31) & 7) as u32;
1695 1u8 << bit
1696}
1697
1698fn mask_for_position(position: usize) -> u8 {
1699 1u8 << (position % 8)
1700}
1701
1702fn build_globset(patterns: &[String]) -> Result<Option<GlobSet>, String> {
1703 if patterns.is_empty() {
1704 return Ok(None);
1705 }
1706
1707 let mut builder = GlobSetBuilder::new();
1708 for pattern in patterns {
1709 let glob = Glob::new(pattern).map_err(|error| error.to_string())?;
1710 builder.add(glob);
1711 }
1712 builder.build().map(Some).map_err(|error| error.to_string())
1713}
1714
1715fn read_u32<R: Read>(reader: &mut R) -> std::io::Result<u32> {
1716 let mut buffer = [0u8; 4];
1717 reader.read_exact(&mut buffer)?;
1718 Ok(u32::from_le_bytes(buffer))
1719}
1720
1721fn read_u64<R: Read>(reader: &mut R) -> std::io::Result<u64> {
1722 let mut buffer = [0u8; 8];
1723 reader.read_exact(&mut buffer)?;
1724 Ok(u64::from_le_bytes(buffer))
1725}
1726
1727fn write_u32<W: Write>(writer: &mut W, value: u32) -> std::io::Result<()> {
1728 writer.write_all(&value.to_le_bytes())
1729}
1730
1731fn write_u64<W: Write>(writer: &mut W, value: u64) -> std::io::Result<()> {
1732 writer.write_all(&value.to_le_bytes())
1733}
1734
1735fn write_crc32(writer: &mut BufWriter<File>, path: &Path) -> std::io::Result<()> {
1736 writer.flush()?;
1737 let body = std::fs::read(path)?;
1738 let checksum = crc32fast::hash(&body);
1739 writer.write_all(&checksum.to_le_bytes())
1740}
1741
1742fn verify_crc32(path: &Path) -> std::io::Result<()> {
1743 let bytes = std::fs::read(path)?;
1744 let Some((body, stored)) = bytes.split_last_chunk::<4>() else {
1745 return Err(std::io::Error::other("search index checksum missing"));
1746 };
1747 let expected = u32::from_le_bytes(*stored);
1748 let actual = crc32fast::hash(body);
1749 if actual != expected {
1750 return Err(std::io::Error::other("search index checksum mismatch"));
1751 }
1752 Ok(())
1753}
1754
1755fn remaining_bytes<R: Seek>(reader: &mut R, total_len: usize) -> Option<usize> {
1756 let pos = usize::try_from(reader.stream_position().ok()?).ok()?;
1757 total_len.checked_sub(pos)
1758}
1759
1760fn run_git(root: &Path, args: &[&str]) -> Option<String> {
1761 let output = Command::new("git")
1762 .arg("-C")
1763 .arg(root)
1764 .args(args)
1765 .output()
1766 .ok()?;
1767 if !output.status.success() {
1768 return None;
1769 }
1770 let value = String::from_utf8(output.stdout).ok()?;
1771 let value = value.trim().to_string();
1772 if value.is_empty() {
1773 None
1774 } else {
1775 Some(value)
1776 }
1777}
1778
1779fn apply_git_diff_updates(index: &mut SearchIndex, root: &Path, from: &str, to: &str) -> bool {
1780 let diff_range = format!("{}..{}", from, to);
1781 let output = match Command::new("git")
1782 .arg("-C")
1783 .arg(root)
1784 .args(["diff", "--name-only", &diff_range])
1785 .output()
1786 {
1787 Ok(output) => output,
1788 Err(_) => return false,
1789 };
1790
1791 if !output.status.success() {
1792 return false;
1793 }
1794
1795 let Ok(paths) = String::from_utf8(output.stdout) else {
1796 return false;
1797 };
1798
1799 for relative_path in paths.lines().map(str::trim).filter(|path| !path.is_empty()) {
1800 let path = root.join(relative_path);
1801 if path.exists() {
1802 index.update_file(&path);
1803 } else {
1804 index.remove_file(&path);
1805 }
1806 }
1807
1808 true
1809}
1810
1811fn is_binary_path(path: &Path, size: u64) -> bool {
1812 if size == 0 {
1813 return false;
1814 }
1815
1816 let mut file = match File::open(path) {
1817 Ok(file) => file,
1818 Err(_) => return true,
1819 };
1820
1821 let mut preview = vec![0u8; PREVIEW_BYTES.min(size as usize)];
1822 match file.read(&mut preview) {
1823 Ok(read) => is_binary_bytes(&preview[..read]),
1824 Err(_) => true,
1825 }
1826}
1827
1828fn line_starts_bytes(content: &[u8]) -> Vec<usize> {
1829 let mut starts = vec![0usize];
1830 for (index, byte) in content.iter().copied().enumerate() {
1831 if byte == b'\n' {
1832 starts.push(index + 1);
1833 }
1834 }
1835 starts
1836}
1837
1838fn line_details_bytes(content: &[u8], line_starts: &[usize], offset: usize) -> (u32, u32, String) {
1839 let line_index = match line_starts.binary_search(&offset) {
1840 Ok(index) => index,
1841 Err(index) => index.saturating_sub(1),
1842 };
1843 let line_start = line_starts.get(line_index).copied().unwrap_or(0);
1844 let line_end = content[line_start..]
1845 .iter()
1846 .position(|byte| *byte == b'\n')
1847 .map(|length| line_start + length)
1848 .unwrap_or(content.len());
1849 let mut line_slice = &content[line_start..line_end];
1850 if line_slice.ends_with(b"\r") {
1851 line_slice = &line_slice[..line_slice.len() - 1];
1852 }
1853 let line_text = String::from_utf8_lossy(line_slice).into_owned();
1854 let column = String::from_utf8_lossy(&content[line_start..offset])
1855 .chars()
1856 .count() as u32
1857 + 1;
1858 (line_index as u32 + 1, column, line_text)
1859}
1860
1861fn to_glob_path(path: &Path) -> String {
1862 path.to_string_lossy().replace('\\', "/")
1863}
1864
1865#[cfg(test)]
1866mod tests {
1867 use std::process::Command;
1868
1869 use super::*;
1870
1871 #[test]
1872 fn extract_trigrams_tracks_next_char_and_position() {
1873 let trigrams = extract_trigrams(b"Rust");
1874 assert_eq!(trigrams.len(), 2);
1875 assert_eq!(trigrams[0], (pack_trigram(b'r', b'u', b's'), b't', 0));
1876 assert_eq!(
1877 trigrams[1],
1878 (pack_trigram(b'u', b's', b't'), EOF_SENTINEL, 1)
1879 );
1880 }
1881
1882 #[test]
1883 fn decompose_regex_extracts_literals_and_alternations() {
1884 let query = decompose_regex("abc(def|ghi)xyz");
1885 assert!(query.and_trigrams.contains(&pack_trigram(b'a', b'b', b'c')));
1886 assert!(query.and_trigrams.contains(&pack_trigram(b'x', b'y', b'z')));
1887 assert_eq!(query.or_groups.len(), 1);
1888 assert!(query.or_groups[0].contains(&pack_trigram(b'd', b'e', b'f')));
1889 assert!(query.or_groups[0].contains(&pack_trigram(b'g', b'h', b'i')));
1890 }
1891
1892 #[test]
1893 fn candidates_intersect_posting_lists() {
1894 let mut index = SearchIndex::new();
1895 let dir = tempfile::tempdir().expect("create temp dir");
1896 let alpha = dir.path().join("alpha.txt");
1897 let beta = dir.path().join("beta.txt");
1898 fs::write(&alpha, "abcdef").expect("write alpha");
1899 fs::write(&beta, "abcxyz").expect("write beta");
1900 index.project_root = dir.path().to_path_buf();
1901 index.index_file(&alpha, b"abcdef");
1902 index.index_file(&beta, b"abcxyz");
1903
1904 let query = RegexQuery {
1905 and_trigrams: vec![
1906 pack_trigram(b'a', b'b', b'c'),
1907 pack_trigram(b'd', b'e', b'f'),
1908 ],
1909 ..RegexQuery::default()
1910 };
1911
1912 let candidates = index.candidates(&query);
1913 assert_eq!(candidates.len(), 1);
1914 assert_eq!(index.files[candidates[0] as usize].path, alpha);
1915 }
1916
1917 #[test]
1918 fn candidates_apply_bloom_filters() {
1919 let mut index = SearchIndex::new();
1920 let dir = tempfile::tempdir().expect("create temp dir");
1921 let file = dir.path().join("sample.txt");
1922 fs::write(&file, "abcd efgh").expect("write sample");
1923 index.project_root = dir.path().to_path_buf();
1924 index.index_file(&file, b"abcd efgh");
1925
1926 let trigram = pack_trigram(b'a', b'b', b'c');
1927 let matching_filter = PostingFilter {
1928 next_mask: mask_for_next_char(b'd'),
1929 loc_mask: mask_for_position(0),
1930 };
1931 let non_matching_filter = PostingFilter {
1932 next_mask: mask_for_next_char(b'z'),
1933 loc_mask: mask_for_position(0),
1934 };
1935
1936 assert_eq!(
1937 index
1938 .postings_for_trigram(trigram, Some(matching_filter))
1939 .len(),
1940 1
1941 );
1942 assert!(index
1943 .postings_for_trigram(trigram, Some(non_matching_filter))
1944 .is_empty());
1945 }
1946
1947 #[test]
1948 fn disk_round_trip_preserves_postings_and_files() {
1949 let dir = tempfile::tempdir().expect("create temp dir");
1950 let project = dir.path().join("project");
1951 fs::create_dir_all(&project).expect("create project dir");
1952 let file = project.join("src.txt");
1953 fs::write(&file, "abcdef").expect("write source");
1954
1955 let mut index = SearchIndex::build(&project);
1956 index.git_head = Some("deadbeef".to_string());
1957 let cache_dir = dir.path().join("cache");
1958 index.write_to_disk(&cache_dir, index.git_head.as_deref());
1959
1960 let loaded = SearchIndex::read_from_disk(&cache_dir).expect("load index from disk");
1961 assert_eq!(loaded.stored_git_head(), Some("deadbeef"));
1962 assert_eq!(loaded.files.len(), 1);
1963 assert_eq!(
1964 relative_to_root(&loaded.project_root, &loaded.files[0].path),
1965 PathBuf::from("src.txt")
1966 );
1967 assert_eq!(loaded.postings.len(), index.postings.len());
1968 assert!(loaded
1969 .postings
1970 .contains_key(&pack_trigram(b'a', b'b', b'c')));
1971 }
1972
1973 #[test]
1974 fn read_from_disk_rejects_corrupt_postings_checksum() {
1975 let dir = tempfile::tempdir().expect("create temp dir");
1976 let project = dir.path().join("project");
1977 fs::create_dir_all(&project).expect("create project dir");
1978 fs::write(project.join("src.txt"), "abcdef").expect("write source");
1979
1980 let index = SearchIndex::build(&project);
1981 let cache_dir = dir.path().join("cache");
1982 index.write_to_disk(&cache_dir, None);
1983
1984 let postings_path = cache_dir.join("postings.bin");
1985 let mut bytes = fs::read(&postings_path).expect("read postings");
1986 let middle = bytes.len() / 2;
1987 bytes[middle] ^= 0xff;
1988 fs::write(&postings_path, bytes).expect("write corrupted postings");
1989
1990 assert!(SearchIndex::read_from_disk(&cache_dir).is_none());
1991 }
1992
1993 #[test]
1994 fn write_to_disk_uses_temp_files_and_cleans_them_up() {
1995 let dir = tempfile::tempdir().expect("create temp dir");
1996 let project = dir.path().join("project");
1997 fs::create_dir_all(&project).expect("create project dir");
1998 fs::write(project.join("src.txt"), "abcdef").expect("write source");
1999
2000 let index = SearchIndex::build(&project);
2001 let cache_dir = dir.path().join("cache");
2002 index.write_to_disk(&cache_dir, None);
2003
2004 assert!(cache_dir.join("postings.bin").is_file());
2005 assert!(cache_dir.join("lookup.bin").is_file());
2006 assert!(!cache_dir.join("postings.bin.tmp").exists());
2007 assert!(!cache_dir.join("lookup.bin.tmp").exists());
2008 }
2009
2010 #[test]
2011 fn project_cache_key_includes_checkout_path() {
2012 let dir = tempfile::tempdir().expect("create temp dir");
2013 let source = dir.path().join("source");
2014 fs::create_dir_all(&source).expect("create source repo dir");
2015 fs::write(source.join("tracked.txt"), "content\n").expect("write tracked file");
2016
2017 assert!(Command::new("git")
2018 .current_dir(&source)
2019 .args(["init"])
2020 .status()
2021 .expect("init git repo")
2022 .success());
2023 assert!(Command::new("git")
2024 .current_dir(&source)
2025 .args(["add", "."])
2026 .status()
2027 .expect("git add")
2028 .success());
2029 assert!(Command::new("git")
2030 .current_dir(&source)
2031 .args([
2032 "-c",
2033 "user.name=AFT Tests",
2034 "-c",
2035 "user.email=aft-tests@example.com",
2036 "commit",
2037 "-m",
2038 "initial",
2039 ])
2040 .status()
2041 .expect("git commit")
2042 .success());
2043
2044 let clone = dir.path().join("clone");
2045 assert!(Command::new("git")
2046 .args(["clone", "--quiet"])
2047 .arg(&source)
2048 .arg(&clone)
2049 .status()
2050 .expect("git clone")
2051 .success());
2052
2053 let source_key = project_cache_key(&source);
2054 let clone_key = project_cache_key(&clone);
2055
2056 assert_eq!(source_key.len(), 16);
2057 assert_eq!(clone_key.len(), 16);
2058 assert_eq!(source_key, clone_key);
2060 }
2061
2062 #[test]
2063 fn resolve_search_scope_disables_index_for_external_path() {
2064 let dir = tempfile::tempdir().expect("create temp dir");
2065 let project = dir.path().join("project");
2066 let outside = dir.path().join("outside");
2067 fs::create_dir_all(&project).expect("create project dir");
2068 fs::create_dir_all(&outside).expect("create outside dir");
2069
2070 let scope = resolve_search_scope(&project, outside.to_str());
2071
2072 assert_eq!(
2073 scope.root,
2074 fs::canonicalize(&outside).expect("canonicalize outside")
2075 );
2076 assert!(!scope.use_index);
2077 }
2078
2079 #[test]
2080 fn grep_filters_matches_to_search_root() {
2081 let dir = tempfile::tempdir().expect("create temp dir");
2082 let project = dir.path().join("project");
2083 let src = project.join("src");
2084 let docs = project.join("docs");
2085 fs::create_dir_all(&src).expect("create src dir");
2086 fs::create_dir_all(&docs).expect("create docs dir");
2087 fs::write(src.join("main.rs"), "pub struct SearchIndex;\n").expect("write src file");
2088 fs::write(docs.join("guide.md"), "SearchIndex guide\n").expect("write docs file");
2089
2090 let index = SearchIndex::build(&project);
2091 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 10);
2092
2093 assert_eq!(result.files_searched, 1);
2094 assert_eq!(result.files_with_matches, 1);
2095 assert_eq!(result.matches.len(), 1);
2096 let expected = fs::canonicalize(src.join("main.rs")).expect("canonicalize");
2098 assert_eq!(result.matches[0].file, expected);
2099 }
2100
2101 #[test]
2102 fn grep_deduplicates_multiple_matches_on_same_line() {
2103 let dir = tempfile::tempdir().expect("create temp dir");
2104 let project = dir.path().join("project");
2105 let src = project.join("src");
2106 fs::create_dir_all(&src).expect("create src dir");
2107 fs::write(src.join("main.rs"), "SearchIndex SearchIndex\n").expect("write src file");
2108
2109 let index = SearchIndex::build(&project);
2110 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 10);
2111
2112 assert_eq!(result.total_matches, 1);
2113 assert_eq!(result.matches.len(), 1);
2114 }
2115
2116 #[test]
2117 fn grep_reports_total_matches_before_truncation() {
2118 let dir = tempfile::tempdir().expect("create temp dir");
2119 let project = dir.path().join("project");
2120 let src = project.join("src");
2121 fs::create_dir_all(&src).expect("create src dir");
2122 fs::write(src.join("main.rs"), "SearchIndex\nSearchIndex\n").expect("write src file");
2123
2124 let index = SearchIndex::build(&project);
2125 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 1);
2126
2127 assert_eq!(result.total_matches, 2);
2128 assert_eq!(result.matches.len(), 1);
2129 assert!(result.truncated);
2130 }
2131
2132 #[test]
2133 fn glob_filters_results_to_search_root() {
2134 let dir = tempfile::tempdir().expect("create temp dir");
2135 let project = dir.path().join("project");
2136 let src = project.join("src");
2137 let scripts = project.join("scripts");
2138 fs::create_dir_all(&src).expect("create src dir");
2139 fs::create_dir_all(&scripts).expect("create scripts dir");
2140 fs::write(src.join("main.rs"), "pub fn main() {}\n").expect("write src file");
2141 fs::write(scripts.join("tool.rs"), "pub fn tool() {}\n").expect("write scripts file");
2142
2143 let index = SearchIndex::build(&project);
2144 let files = index.glob("**/*.rs", &src);
2145
2146 assert_eq!(
2147 files,
2148 vec![fs::canonicalize(src.join("main.rs")).expect("canonicalize src file")]
2149 );
2150 }
2151
2152 #[test]
2153 fn glob_includes_hidden_and_binary_files() {
2154 let dir = tempfile::tempdir().expect("create temp dir");
2155 let project = dir.path().join("project");
2156 let hidden_dir = project.join(".hidden");
2157 fs::create_dir_all(&hidden_dir).expect("create hidden dir");
2158 let hidden_file = hidden_dir.join("data.bin");
2159 fs::write(&hidden_file, [0u8, 159, 146, 150]).expect("write binary file");
2160
2161 let index = SearchIndex::build(&project);
2162 let files = index.glob("**/*.bin", &project);
2163
2164 assert_eq!(
2165 files,
2166 vec![fs::canonicalize(hidden_file).expect("canonicalize binary file")]
2167 );
2168 }
2169
2170 #[test]
2171 fn read_from_disk_rejects_invalid_nanos() {
2172 let dir = tempfile::tempdir().expect("create temp dir");
2173 let cache_dir = dir.path().join("cache");
2174 fs::create_dir_all(&cache_dir).expect("create cache dir");
2175
2176 let mut postings = Vec::new();
2177 postings.extend_from_slice(INDEX_MAGIC);
2178 postings.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2179 postings.extend_from_slice(&0u32.to_le_bytes());
2180 postings.extend_from_slice(&1u32.to_le_bytes());
2181 postings.extend_from_slice(&DEFAULT_MAX_FILE_SIZE.to_le_bytes());
2182 postings.extend_from_slice(&1u32.to_le_bytes());
2183 postings.extend_from_slice(b"/");
2184 postings.push(0u8);
2185 postings.extend_from_slice(&1u32.to_le_bytes());
2186 postings.extend_from_slice(&0u64.to_le_bytes());
2187 postings.extend_from_slice(&0u64.to_le_bytes());
2188 postings.extend_from_slice(&1_000_000_000u32.to_le_bytes());
2189 postings.extend_from_slice(b"a");
2190 postings.extend_from_slice(&0u64.to_le_bytes());
2191
2192 let mut lookup = Vec::new();
2193 lookup.extend_from_slice(LOOKUP_MAGIC);
2194 lookup.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2195 lookup.extend_from_slice(&0u32.to_le_bytes());
2196
2197 fs::write(cache_dir.join("postings.bin"), postings).expect("write postings");
2198 fs::write(cache_dir.join("lookup.bin"), lookup).expect("write lookup");
2199
2200 assert!(SearchIndex::read_from_disk(&cache_dir).is_none());
2201 }
2202
2203 #[test]
2218 fn sort_paths_by_mtime_desc_does_not_panic_on_missing_files() {
2219 let dir = tempfile::tempdir().expect("create tempdir");
2223 let mut paths: Vec<PathBuf> = Vec::new();
2224 for i in 0..30 {
2225 let path = if i % 2 == 0 {
2227 let p = dir.path().join(format!("real-{i}.rs"));
2228 fs::write(&p, format!("// {i}\n")).expect("write");
2229 p
2230 } else {
2231 dir.path().join(format!("missing-{i}.rs"))
2232 };
2233 paths.push(path);
2234 }
2235
2236 for _ in 0..50 {
2239 let mut copy = paths.clone();
2240 sort_paths_by_mtime_desc(&mut copy);
2241 assert_eq!(copy.len(), paths.len());
2242 }
2243 }
2244
2245 #[test]
2249 fn sort_grep_matches_by_mtime_desc_does_not_panic_on_missing_files() {
2250 let dir = tempfile::tempdir().expect("create tempdir");
2251 let mut matches: Vec<GrepMatch> = Vec::new();
2252 for i in 0..30 {
2253 let file = if i % 2 == 0 {
2254 let p = dir.path().join(format!("real-{i}.rs"));
2255 fs::write(&p, format!("// {i}\n")).expect("write");
2256 p
2257 } else {
2258 dir.path().join(format!("missing-{i}.rs"))
2259 };
2260 matches.push(GrepMatch {
2261 file,
2262 line: u32::try_from(i).unwrap_or(0),
2263 column: 0,
2264 line_text: format!("match {i}"),
2265 match_text: format!("match {i}"),
2266 });
2267 }
2268
2269 for _ in 0..50 {
2270 let mut copy = matches.clone();
2271 sort_grep_matches_by_mtime_desc(&mut copy, dir.path());
2272 assert_eq!(copy.len(), matches.len());
2273 }
2274 }
2275}