1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2use std::fs::{self, File};
3use std::io::{BufReader, BufWriter, Read, Seek, Write};
4use std::path::{Component, Path, PathBuf};
5use std::process::Command;
6use std::sync::{
7 atomic::{AtomicBool, AtomicUsize, Ordering},
8 Arc,
9};
10use std::time::{Duration, SystemTime, UNIX_EPOCH};
11
12use globset::{Glob, GlobSet, GlobSetBuilder};
13use ignore::WalkBuilder;
14use rayon::prelude::*;
15use regex::bytes::{Regex, RegexBuilder};
16use regex_syntax::hir::{Hir, HirKind};
17
18const DEFAULT_MAX_FILE_SIZE: u64 = 1_048_576;
19const INDEX_MAGIC: &[u8; 8] = b"AFTIDX01";
20const LOOKUP_MAGIC: &[u8; 8] = b"AFTLKP01";
21const INDEX_VERSION: u32 = 1;
22const PREVIEW_BYTES: usize = 8 * 1024;
23const EOF_SENTINEL: u8 = 0;
24const MAX_ENTRIES: usize = 10_000_000;
25const MIN_FILE_ENTRY_BYTES: usize = 25;
26const LOOKUP_ENTRY_BYTES: usize = 16;
27const POSTING_BYTES: usize = 6;
28
29#[derive(Clone, Debug)]
30pub struct SearchIndex {
31 pub postings: HashMap<u32, Vec<Posting>>,
32 pub files: Vec<FileEntry>,
33 pub path_to_id: HashMap<PathBuf, u32>,
34 pub ready: bool,
35 project_root: PathBuf,
36 git_head: Option<String>,
37 max_file_size: u64,
38 file_trigrams: HashMap<u32, Vec<u32>>,
39 unindexed_files: HashSet<u32>,
40}
41
42impl SearchIndex {
43 pub fn file_count(&self) -> usize {
45 self.files.len()
46 }
47
48 pub fn trigram_count(&self) -> usize {
50 self.postings.len()
51 }
52}
53
54#[derive(Clone, Debug, PartialEq, Eq)]
55pub struct Posting {
56 pub file_id: u32,
57 pub next_mask: u8,
58 pub loc_mask: u8,
59}
60
61#[derive(Clone, Debug)]
62pub struct FileEntry {
63 pub path: PathBuf,
64 pub size: u64,
65 pub modified: SystemTime,
66}
67
68#[derive(Clone, Debug, PartialEq, Eq)]
69pub struct GrepMatch {
70 pub file: PathBuf,
71 pub line: u32,
72 pub column: u32,
73 pub line_text: String,
74 pub match_text: String,
75}
76
77#[derive(Clone, Debug)]
78pub struct GrepResult {
79 pub matches: Vec<GrepMatch>,
80 pub total_matches: usize,
81 pub files_searched: usize,
82 pub files_with_matches: usize,
83 pub index_status: IndexStatus,
84 pub truncated: bool,
85}
86
87#[derive(Clone, Copy, Debug, PartialEq, Eq)]
88pub enum IndexStatus {
89 Ready,
90 Building,
91 Fallback,
92}
93
94impl IndexStatus {
95 pub fn as_str(&self) -> &'static str {
96 match self {
97 IndexStatus::Ready => "Ready",
98 IndexStatus::Building => "Building",
99 IndexStatus::Fallback => "Fallback",
100 }
101 }
102}
103
104#[derive(Clone, Debug, Default)]
105pub struct RegexQuery {
106 pub and_trigrams: Vec<u32>,
107 pub or_groups: Vec<Vec<u32>>,
108 pub(crate) and_filters: HashMap<u32, PostingFilter>,
109 pub(crate) or_filters: Vec<HashMap<u32, PostingFilter>>,
110}
111
112#[derive(Clone, Copy, Debug, Default)]
113pub(crate) struct PostingFilter {
114 next_mask: u8,
115 loc_mask: u8,
116}
117
118#[derive(Clone, Debug, Default)]
119struct QueryBuild {
120 and_runs: Vec<Vec<u8>>,
121 or_groups: Vec<Vec<Vec<u8>>>,
122}
123
124#[derive(Clone, Debug, Default)]
125pub(crate) struct PathFilters {
126 includes: Option<GlobSet>,
127 excludes: Option<GlobSet>,
128}
129
130#[derive(Clone, Debug)]
131pub(crate) struct SearchScope {
132 pub root: PathBuf,
133 pub use_index: bool,
134}
135
136#[derive(Clone, Debug)]
137struct SharedGrepMatch {
138 file: Arc<PathBuf>,
139 line: u32,
140 column: u32,
141 line_text: String,
142 match_text: String,
143}
144
145#[derive(Clone, Debug)]
146enum SearchMatcher {
147 Literal(LiteralSearch),
148 Regex(Regex),
149}
150
151#[derive(Clone, Debug)]
152enum LiteralSearch {
153 CaseSensitive(Vec<u8>),
154 AsciiCaseInsensitive(Vec<u8>),
155}
156
157impl SearchIndex {
158 pub fn new() -> Self {
159 SearchIndex {
160 postings: HashMap::new(),
161 files: Vec::new(),
162 path_to_id: HashMap::new(),
163 ready: false,
164 project_root: PathBuf::new(),
165 git_head: None,
166 max_file_size: DEFAULT_MAX_FILE_SIZE,
167 file_trigrams: HashMap::new(),
168 unindexed_files: HashSet::new(),
169 }
170 }
171
172 pub fn build(root: &Path) -> Self {
173 Self::build_with_limit(root, DEFAULT_MAX_FILE_SIZE)
174 }
175
176 pub(crate) fn build_with_limit(root: &Path, max_file_size: u64) -> Self {
177 let project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
178 let mut index = SearchIndex {
179 project_root: project_root.clone(),
180 max_file_size,
181 ..SearchIndex::new()
182 };
183
184 let filters = PathFilters::default();
185 for path in walk_project_files(&project_root, &filters) {
186 index.update_file(&path);
187 }
188
189 index.git_head = current_git_head(&project_root);
190 index.ready = true;
191 index
192 }
193
194 pub fn index_file(&mut self, path: &Path, content: &[u8]) {
195 self.remove_file(path);
196
197 let file_id = match self.allocate_file_id(path, content.len() as u64) {
198 Some(file_id) => file_id,
199 None => return,
200 };
201
202 let mut trigram_map: BTreeMap<u32, PostingFilter> = BTreeMap::new();
203 for (trigram, next_char, position) in extract_trigrams(content) {
204 let entry = trigram_map.entry(trigram).or_default();
205 entry.next_mask |= mask_for_next_char(next_char);
206 entry.loc_mask |= mask_for_position(position);
207 }
208
209 let mut file_trigrams = Vec::with_capacity(trigram_map.len());
210 for (trigram, filter) in trigram_map {
211 let postings = self.postings.entry(trigram).or_default();
212 postings.push(Posting {
213 file_id,
214 next_mask: filter.next_mask,
215 loc_mask: filter.loc_mask,
216 });
217 if postings.len() > 1
221 && postings[postings.len() - 2].file_id > postings[postings.len() - 1].file_id
222 {
223 postings.sort_unstable_by_key(|p| p.file_id);
224 }
225 file_trigrams.push(trigram);
226 }
227
228 self.file_trigrams.insert(file_id, file_trigrams);
229 self.unindexed_files.remove(&file_id);
230 }
231
232 pub fn remove_file(&mut self, path: &Path) {
233 let Some(file_id) = self.path_to_id.remove(path) else {
234 return;
235 };
236
237 if let Some(trigrams) = self.file_trigrams.remove(&file_id) {
238 for trigram in trigrams {
239 let should_remove = if let Some(postings) = self.postings.get_mut(&trigram) {
240 postings.retain(|posting| posting.file_id != file_id);
241 postings.is_empty()
242 } else {
243 false
244 };
245
246 if should_remove {
247 self.postings.remove(&trigram);
248 }
249 }
250 }
251
252 self.unindexed_files.remove(&file_id);
253 if let Some(file) = self.files.get_mut(file_id as usize) {
254 file.path = PathBuf::new();
255 file.size = 0;
256 file.modified = UNIX_EPOCH;
257 }
258 }
259
260 pub fn update_file(&mut self, path: &Path) {
261 self.remove_file(path);
262
263 let metadata = match fs::metadata(path) {
264 Ok(metadata) if metadata.is_file() => metadata,
265 _ => return,
266 };
267
268 if is_binary_path(path, metadata.len()) {
269 return;
270 }
271
272 if metadata.len() > self.max_file_size {
273 self.track_unindexed_file(path, &metadata);
274 return;
275 }
276
277 let content = match fs::read(path) {
278 Ok(content) => content,
279 Err(_) => return,
280 };
281
282 if is_binary_bytes(&content) {
283 return;
284 }
285
286 self.index_file(path, &content);
287 }
288
289 pub fn grep(
290 &self,
291 pattern: &str,
292 case_sensitive: bool,
293 include: &[String],
294 exclude: &[String],
295 search_root: &Path,
296 max_results: usize,
297 ) -> GrepResult {
298 self.search_grep(
299 pattern,
300 case_sensitive,
301 include,
302 exclude,
303 search_root,
304 max_results,
305 )
306 }
307
308 pub fn search_grep(
309 &self,
310 pattern: &str,
311 case_sensitive: bool,
312 include: &[String],
313 exclude: &[String],
314 search_root: &Path,
315 max_results: usize,
316 ) -> GrepResult {
317 let is_literal = !pattern.chars().any(|c| {
320 matches!(
321 c,
322 '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|' | '^' | '$' | '\\'
323 )
324 });
325
326 let literal_search = if is_literal {
327 if case_sensitive {
328 Some(LiteralSearch::CaseSensitive(pattern.as_bytes().to_vec()))
329 } else if pattern.is_ascii() {
330 Some(LiteralSearch::AsciiCaseInsensitive(
331 pattern
332 .as_bytes()
333 .iter()
334 .map(|byte| byte.to_ascii_lowercase())
335 .collect(),
336 ))
337 } else {
338 None
339 }
340 } else {
341 None
342 };
343
344 let regex = if literal_search.is_some() {
346 None
347 } else {
348 let regex_pattern = if is_literal {
349 regex::escape(pattern)
350 } else {
351 pattern.to_string()
352 };
353 let mut builder = RegexBuilder::new(®ex_pattern);
354 builder.case_insensitive(!case_sensitive);
355 builder.multi_line(true);
357 match builder.build() {
358 Ok(r) => Some(r),
359 Err(_) => {
360 return GrepResult {
361 matches: Vec::new(),
362 total_matches: 0,
363 files_searched: 0,
364 files_with_matches: 0,
365 index_status: if self.ready {
366 IndexStatus::Ready
367 } else {
368 IndexStatus::Building
369 },
370 truncated: false,
371 };
372 }
373 }
374 };
375
376 let matcher = if let Some(literal_search) = literal_search {
377 SearchMatcher::Literal(literal_search)
378 } else {
379 SearchMatcher::Regex(
380 regex.expect("regex should exist when literal matcher is unavailable"),
381 )
382 };
383
384 let filters = match build_path_filters(include, exclude) {
385 Ok(filters) => filters,
386 Err(_) => PathFilters::default(),
387 };
388 let search_root = canonicalize_or_normalize(search_root);
389
390 let query = decompose_regex(pattern);
391 let candidate_ids = self.candidates(&query);
392
393 let candidate_files: Vec<&FileEntry> = candidate_ids
394 .into_iter()
395 .filter_map(|file_id| self.files.get(file_id as usize))
396 .filter(|file| !file.path.as_os_str().is_empty())
397 .filter(|file| is_within_search_root(&search_root, &file.path))
398 .filter(|file| filters.matches(&self.project_root, &file.path))
399 .collect();
400
401 let total_matches = AtomicUsize::new(0);
402 let files_searched = AtomicUsize::new(0);
403 let files_with_matches = AtomicUsize::new(0);
404 let truncated = AtomicBool::new(false);
405 let stop_after = max_results.saturating_mul(2);
406
407 let mut matches = if candidate_files.len() > 10 {
408 candidate_files
409 .par_iter()
410 .map(|file| {
411 search_candidate_file(
412 file,
413 &matcher,
414 max_results,
415 stop_after,
416 &total_matches,
417 &files_searched,
418 &files_with_matches,
419 &truncated,
420 )
421 })
422 .reduce(Vec::new, |mut left, mut right| {
423 left.append(&mut right);
424 left
425 })
426 } else {
427 let mut matches = Vec::new();
428 for file in candidate_files {
429 matches.extend(search_candidate_file(
430 file,
431 &matcher,
432 max_results,
433 stop_after,
434 &total_matches,
435 &files_searched,
436 &files_with_matches,
437 &truncated,
438 ));
439
440 if should_stop_search(&truncated, &total_matches, stop_after) {
441 break;
442 }
443 }
444 matches
445 };
446
447 sort_shared_grep_matches_by_cached_mtime_desc(&mut matches, |path| {
448 self.path_to_id
449 .get(path)
450 .and_then(|file_id| self.files.get(*file_id as usize))
451 .map(|file| file.modified)
452 });
453
454 let matches = matches
455 .into_iter()
456 .map(|matched| GrepMatch {
457 file: matched.file.as_ref().clone(),
458 line: matched.line,
459 column: matched.column,
460 line_text: matched.line_text,
461 match_text: matched.match_text,
462 })
463 .collect();
464
465 GrepResult {
466 total_matches: total_matches.load(Ordering::Relaxed),
467 matches,
468 files_searched: files_searched.load(Ordering::Relaxed),
469 files_with_matches: files_with_matches.load(Ordering::Relaxed),
470 index_status: if self.ready {
471 IndexStatus::Ready
472 } else {
473 IndexStatus::Building
474 },
475 truncated: truncated.load(Ordering::Relaxed),
476 }
477 }
478
479 pub fn glob(&self, pattern: &str, search_root: &Path) -> Vec<PathBuf> {
480 let filters = match build_path_filters(&[pattern.to_string()], &[]) {
481 Ok(filters) => filters,
482 Err(_) => return Vec::new(),
483 };
484 let search_root = canonicalize_or_normalize(search_root);
485 let filter_root = if search_root.starts_with(&self.project_root) {
486 &self.project_root
487 } else {
488 &search_root
489 };
490
491 let mut paths = walk_project_files_from(filter_root, &search_root, &filters);
492 sort_paths_by_mtime_desc(&mut paths);
493 paths
494 }
495
496 pub fn candidates(&self, query: &RegexQuery) -> Vec<u32> {
497 if query.and_trigrams.is_empty() && query.or_groups.is_empty() {
498 return self.active_file_ids();
499 }
500
501 let mut and_trigrams = query.and_trigrams.clone();
502 and_trigrams.sort_unstable_by_key(|trigram| self.postings.get(trigram).map_or(0, Vec::len));
503
504 let mut current: Option<Vec<u32>> = None;
505
506 for trigram in and_trigrams {
507 let filter = query.and_filters.get(&trigram).copied();
508 let matches = self.postings_for_trigram(trigram, filter);
509 current = Some(match current.take() {
510 Some(existing) => intersect_sorted_ids(&existing, &matches),
511 None => matches,
512 });
513
514 if current.as_ref().is_some_and(|ids| ids.is_empty()) {
515 break;
516 }
517 }
518
519 let mut current = current.unwrap_or_else(|| self.active_file_ids());
520
521 for (index, group) in query.or_groups.iter().enumerate() {
522 let mut group_matches = Vec::new();
523 let filters = query.or_filters.get(index);
524
525 for trigram in group {
526 let filter = filters.and_then(|filters| filters.get(trigram).copied());
527 let matches = self.postings_for_trigram(*trigram, filter);
528 if group_matches.is_empty() {
529 group_matches = matches;
530 } else {
531 group_matches = union_sorted_ids(&group_matches, &matches);
532 }
533 }
534
535 current = intersect_sorted_ids(¤t, &group_matches);
536 if current.is_empty() {
537 break;
538 }
539 }
540
541 let mut unindexed = self
542 .unindexed_files
543 .iter()
544 .copied()
545 .filter(|file_id| self.is_active_file(*file_id))
546 .collect::<Vec<_>>();
547 if !unindexed.is_empty() {
548 unindexed.sort_unstable();
549 current = union_sorted_ids(¤t, &unindexed);
550 }
551
552 current
553 }
554
555 pub fn write_to_disk(&self, cache_dir: &Path, git_head: Option<&str>) {
556 if fs::create_dir_all(cache_dir).is_err() {
557 return;
558 }
559
560 let postings_path = cache_dir.join("postings.bin");
561 let lookup_path = cache_dir.join("lookup.bin");
562 let tmp_postings = cache_dir.join("postings.bin.tmp");
563 let tmp_lookup = cache_dir.join("lookup.bin.tmp");
564
565 let active_ids = self.active_file_ids();
566 let mut id_map = HashMap::new();
567 for (new_id, old_id) in active_ids.iter().enumerate() {
568 let Ok(new_id_u32) = u32::try_from(new_id) else {
569 return;
570 };
571 id_map.insert(*old_id, new_id_u32);
572 }
573
574 let write_result = (|| -> std::io::Result<()> {
575 let mut postings_writer = BufWriter::new(File::create(&tmp_postings)?);
576
577 postings_writer.write_all(INDEX_MAGIC)?;
578 write_u32(&mut postings_writer, INDEX_VERSION)?;
579
580 let head = git_head.unwrap_or_default();
581 let root = self.project_root.to_string_lossy();
582 let head_len = u32::try_from(head.len())
583 .map_err(|_| std::io::Error::other("git head too large to cache"))?;
584 let root_len = u32::try_from(root.len())
585 .map_err(|_| std::io::Error::other("project root too large to cache"))?;
586 let file_count = u32::try_from(active_ids.len())
587 .map_err(|_| std::io::Error::other("too many files to cache"))?;
588
589 write_u32(&mut postings_writer, head_len)?;
590 write_u32(&mut postings_writer, root_len)?;
591 write_u64(&mut postings_writer, self.max_file_size)?;
592 write_u32(&mut postings_writer, file_count)?;
593 postings_writer.write_all(head.as_bytes())?;
594 postings_writer.write_all(root.as_bytes())?;
595
596 for old_id in &active_ids {
597 let Some(file) = self.files.get(*old_id as usize) else {
598 return Err(std::io::Error::other("missing file entry for cache write"));
599 };
600 let path = relative_to_root(&self.project_root, &file.path);
601 let path = path.to_string_lossy();
602 let path_len = u32::try_from(path.len())
603 .map_err(|_| std::io::Error::other("cached path too large"))?;
604 let modified = file
605 .modified
606 .duration_since(UNIX_EPOCH)
607 .unwrap_or(Duration::ZERO);
608 let unindexed = if self.unindexed_files.contains(old_id) {
609 1u8
610 } else {
611 0u8
612 };
613
614 postings_writer.write_all(&[unindexed])?;
615 write_u32(&mut postings_writer, path_len)?;
616 write_u64(&mut postings_writer, file.size)?;
617 write_u64(&mut postings_writer, modified.as_secs())?;
618 write_u32(&mut postings_writer, modified.subsec_nanos())?;
619 postings_writer.write_all(path.as_bytes())?;
620 }
621
622 let mut lookup_entries = Vec::new();
623 let mut postings_blob = Vec::new();
624 let mut sorted_postings: Vec<_> = self.postings.iter().collect();
625 sorted_postings.sort_by_key(|(trigram, _)| **trigram);
626
627 for (trigram, postings) in sorted_postings {
628 let offset = u64::try_from(postings_blob.len())
629 .map_err(|_| std::io::Error::other("postings blob too large"))?;
630 let mut count = 0u32;
631
632 for posting in postings {
633 let Some(mapped_file_id) = id_map.get(&posting.file_id).copied() else {
634 continue;
635 };
636
637 postings_blob.extend_from_slice(&mapped_file_id.to_le_bytes());
638 postings_blob.push(posting.next_mask);
639 postings_blob.push(posting.loc_mask);
640 count = count.saturating_add(1);
641 }
642
643 if count > 0 {
644 lookup_entries.push((*trigram, offset, count));
645 }
646 }
647
648 write_u64(
649 &mut postings_writer,
650 u64::try_from(postings_blob.len())
651 .map_err(|_| std::io::Error::other("postings blob too large"))?,
652 )?;
653 postings_writer.write_all(&postings_blob)?;
654 postings_writer.flush()?;
655 drop(postings_writer);
656
657 let mut lookup_writer = BufWriter::new(File::create(&tmp_lookup)?);
658 let entry_count = u32::try_from(lookup_entries.len())
659 .map_err(|_| std::io::Error::other("too many lookup entries to cache"))?;
660
661 lookup_writer.write_all(LOOKUP_MAGIC)?;
662 write_u32(&mut lookup_writer, INDEX_VERSION)?;
663 write_u32(&mut lookup_writer, entry_count)?;
664
665 for (trigram, offset, count) in lookup_entries {
666 write_u32(&mut lookup_writer, trigram)?;
667 write_u64(&mut lookup_writer, offset)?;
668 write_u32(&mut lookup_writer, count)?;
669 }
670
671 lookup_writer.flush()?;
672 drop(lookup_writer);
673
674 fs::rename(&tmp_postings, &postings_path)?;
675 fs::rename(&tmp_lookup, &lookup_path)?;
676
677 Ok(())
678 })();
679
680 if write_result.is_err() {
681 let _ = fs::remove_file(&tmp_postings);
682 let _ = fs::remove_file(&tmp_lookup);
683 }
684 }
685
686 pub fn read_from_disk(cache_dir: &Path) -> Option<Self> {
687 let postings_path = cache_dir.join("postings.bin");
688 let lookup_path = cache_dir.join("lookup.bin");
689
690 let mut postings_reader = BufReader::new(File::open(postings_path).ok()?);
691 let mut lookup_reader = BufReader::new(File::open(lookup_path).ok()?);
692 let postings_len_total =
693 usize::try_from(postings_reader.get_ref().metadata().ok()?.len()).ok()?;
694 let lookup_len_total =
695 usize::try_from(lookup_reader.get_ref().metadata().ok()?.len()).ok()?;
696
697 let mut magic = [0u8; 8];
698 postings_reader.read_exact(&mut magic).ok()?;
699 if &magic != INDEX_MAGIC {
700 return None;
701 }
702 if read_u32(&mut postings_reader).ok()? != INDEX_VERSION {
703 return None;
704 }
705
706 let head_len = read_u32(&mut postings_reader).ok()? as usize;
707 let root_len = read_u32(&mut postings_reader).ok()? as usize;
708 let max_file_size = read_u64(&mut postings_reader).ok()?;
709 let file_count = read_u32(&mut postings_reader).ok()? as usize;
710 if file_count > MAX_ENTRIES {
711 return None;
712 }
713 let remaining_postings = remaining_bytes(&mut postings_reader, postings_len_total)?;
714 let minimum_file_bytes = file_count.checked_mul(MIN_FILE_ENTRY_BYTES)?;
715 if minimum_file_bytes > remaining_postings {
716 return None;
717 }
718
719 if head_len > remaining_bytes(&mut postings_reader, postings_len_total)? {
720 return None;
721 }
722 let mut head_bytes = vec![0u8; head_len];
723 postings_reader.read_exact(&mut head_bytes).ok()?;
724 let git_head = String::from_utf8(head_bytes)
725 .ok()
726 .filter(|head| !head.is_empty());
727
728 if root_len > remaining_bytes(&mut postings_reader, postings_len_total)? {
729 return None;
730 }
731 let mut root_bytes = vec![0u8; root_len];
732 postings_reader.read_exact(&mut root_bytes).ok()?;
733 let project_root = PathBuf::from(String::from_utf8(root_bytes).ok()?);
734
735 let mut files = Vec::with_capacity(file_count);
736 let mut path_to_id = HashMap::new();
737 let mut unindexed_files = HashSet::new();
738
739 for file_id in 0..file_count {
740 let mut unindexed = [0u8; 1];
741 postings_reader.read_exact(&mut unindexed).ok()?;
742 let path_len = read_u32(&mut postings_reader).ok()? as usize;
743 let size = read_u64(&mut postings_reader).ok()?;
744 let secs = read_u64(&mut postings_reader).ok()?;
745 let nanos = read_u32(&mut postings_reader).ok()?;
746 if nanos >= 1_000_000_000 {
747 return None;
748 }
749 if path_len > remaining_bytes(&mut postings_reader, postings_len_total)? {
750 return None;
751 }
752 let mut path_bytes = vec![0u8; path_len];
753 postings_reader.read_exact(&mut path_bytes).ok()?;
754 let relative_path = PathBuf::from(String::from_utf8(path_bytes).ok()?);
755 let full_path = project_root.join(relative_path);
756 let file_id_u32 = u32::try_from(file_id).ok()?;
757
758 files.push(FileEntry {
759 path: full_path.clone(),
760 size,
761 modified: UNIX_EPOCH + Duration::new(secs, nanos),
762 });
763 path_to_id.insert(full_path, file_id_u32);
764 if unindexed[0] == 1 {
765 unindexed_files.insert(file_id_u32);
766 }
767 }
768
769 let postings_len = read_u64(&mut postings_reader).ok()? as usize;
770 let max_postings_bytes = MAX_ENTRIES.checked_mul(POSTING_BYTES)?;
771 if postings_len > max_postings_bytes {
772 return None;
773 }
774 if postings_len > remaining_bytes(&mut postings_reader, postings_len_total)? {
775 return None;
776 }
777 let mut postings_blob = vec![0u8; postings_len];
778 postings_reader.read_exact(&mut postings_blob).ok()?;
779
780 let mut lookup_magic = [0u8; 8];
781 lookup_reader.read_exact(&mut lookup_magic).ok()?;
782 if &lookup_magic != LOOKUP_MAGIC {
783 return None;
784 }
785 if read_u32(&mut lookup_reader).ok()? != INDEX_VERSION {
786 return None;
787 }
788 let entry_count = read_u32(&mut lookup_reader).ok()? as usize;
789 if entry_count > MAX_ENTRIES {
790 return None;
791 }
792 let remaining_lookup = remaining_bytes(&mut lookup_reader, lookup_len_total)?;
793 let minimum_lookup_bytes = entry_count.checked_mul(LOOKUP_ENTRY_BYTES)?;
794 if minimum_lookup_bytes > remaining_lookup {
795 return None;
796 }
797
798 let mut postings = HashMap::new();
799 let mut file_trigrams: HashMap<u32, Vec<u32>> = HashMap::new();
800
801 for _ in 0..entry_count {
802 let trigram = read_u32(&mut lookup_reader).ok()?;
803 let offset = read_u64(&mut lookup_reader).ok()? as usize;
804 let count = read_u32(&mut lookup_reader).ok()? as usize;
805 if count > MAX_ENTRIES {
806 return None;
807 }
808 let bytes_len = count.checked_mul(POSTING_BYTES)?;
809 let end = offset.checked_add(bytes_len)?;
810 if end > postings_blob.len() {
811 return None;
812 }
813
814 let mut trigram_postings = Vec::with_capacity(count);
815 for chunk in postings_blob[offset..end].chunks_exact(6) {
816 let file_id = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
817 let posting = Posting {
818 file_id,
819 next_mask: chunk[4],
820 loc_mask: chunk[5],
821 };
822 trigram_postings.push(posting.clone());
823 file_trigrams.entry(file_id).or_default().push(trigram);
824 }
825 postings.insert(trigram, trigram_postings);
826 }
827
828 Some(SearchIndex {
829 postings,
830 files,
831 path_to_id,
832 ready: true,
833 project_root,
834 git_head,
835 max_file_size,
836 file_trigrams,
837 unindexed_files,
838 })
839 }
840
841 pub(crate) fn stored_git_head(&self) -> Option<&str> {
842 self.git_head.as_deref()
843 }
844
845 pub(crate) fn set_ready(&mut self, ready: bool) {
846 self.ready = ready;
847 }
848
849 pub(crate) fn rebuild_or_refresh(
850 root: &Path,
851 max_file_size: u64,
852 current_head: Option<String>,
853 baseline: Option<SearchIndex>,
854 ) -> Self {
855 if current_head.is_none() {
856 return SearchIndex::build_with_limit(root, max_file_size);
857 }
858
859 if let Some(mut baseline) = baseline {
860 baseline.project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
861 baseline.max_file_size = max_file_size;
862
863 if baseline.git_head == current_head {
864 verify_file_mtimes(&mut baseline);
868 baseline.ready = true;
869 return baseline;
870 }
871
872 if let (Some(previous), Some(current)) =
873 (baseline.git_head.clone(), current_head.clone())
874 {
875 let project_root = baseline.project_root.clone();
876 if apply_git_diff_updates(&mut baseline, &project_root, &previous, ¤t) {
877 baseline.git_head = Some(current);
878 baseline.ready = true;
879 return baseline;
880 }
881 }
882 }
883
884 SearchIndex::build_with_limit(root, max_file_size)
885 }
886
887 fn allocate_file_id(&mut self, path: &Path, size_hint: u64) -> Option<u32> {
888 let file_id = u32::try_from(self.files.len()).ok()?;
889 let metadata = fs::metadata(path).ok();
890 let size = metadata
891 .as_ref()
892 .map_or(size_hint, |metadata| metadata.len());
893 let modified = metadata
894 .and_then(|metadata| metadata.modified().ok())
895 .unwrap_or(UNIX_EPOCH);
896
897 self.files.push(FileEntry {
898 path: path.to_path_buf(),
899 size,
900 modified,
901 });
902 self.path_to_id.insert(path.to_path_buf(), file_id);
903 Some(file_id)
904 }
905
906 fn track_unindexed_file(&mut self, path: &Path, metadata: &fs::Metadata) {
907 let Some(file_id) = self.allocate_file_id(path, metadata.len()) else {
908 return;
909 };
910 self.unindexed_files.insert(file_id);
911 self.file_trigrams.insert(file_id, Vec::new());
912 }
913
914 fn active_file_ids(&self) -> Vec<u32> {
915 let mut ids: Vec<u32> = self.path_to_id.values().copied().collect();
916 ids.sort_unstable();
917 ids
918 }
919
920 fn is_active_file(&self, file_id: u32) -> bool {
921 self.files
922 .get(file_id as usize)
923 .map(|file| !file.path.as_os_str().is_empty())
924 .unwrap_or(false)
925 }
926
927 fn postings_for_trigram(&self, trigram: u32, filter: Option<PostingFilter>) -> Vec<u32> {
928 let Some(postings) = self.postings.get(&trigram) else {
929 return Vec::new();
930 };
931
932 let mut matches = Vec::with_capacity(postings.len());
933
934 for posting in postings {
935 if let Some(filter) = filter {
936 if filter.next_mask != 0 && posting.next_mask & filter.next_mask == 0 {
939 continue;
940 }
941 }
946 if self.is_active_file(posting.file_id) {
947 matches.push(posting.file_id);
948 }
949 }
950
951 matches
952 }
953}
954
955fn search_candidate_file(
956 file: &FileEntry,
957 matcher: &SearchMatcher,
958 max_results: usize,
959 stop_after: usize,
960 total_matches: &AtomicUsize,
961 files_searched: &AtomicUsize,
962 files_with_matches: &AtomicUsize,
963 truncated: &AtomicBool,
964) -> Vec<SharedGrepMatch> {
965 if should_stop_search(truncated, total_matches, stop_after) {
966 return Vec::new();
967 }
968
969 let content = match read_indexed_file_bytes(&file.path) {
970 Some(content) => content,
971 None => return Vec::new(),
972 };
973 files_searched.fetch_add(1, Ordering::Relaxed);
974
975 let shared_path = Arc::new(file.path.clone());
976 let mut matches = Vec::new();
977 let mut line_starts = None;
978 let mut seen_lines = HashSet::new();
979 let mut matched_this_file = false;
980
981 match matcher {
982 SearchMatcher::Literal(LiteralSearch::CaseSensitive(needle)) => {
983 let finder = memchr::memmem::Finder::new(needle);
984 let mut start = 0;
985
986 while let Some(position) = finder.find(&content[start..]) {
987 if should_stop_search(truncated, total_matches, stop_after) {
988 break;
989 }
990
991 let offset = start + position;
992 start = offset + 1;
993
994 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
995 let (line, column, line_text) = line_details_bytes(&content, line_starts, offset);
996 if !seen_lines.insert(line) {
997 continue;
998 }
999
1000 matched_this_file = true;
1001 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1002 if match_number > max_results {
1003 truncated.store(true, Ordering::Relaxed);
1004 break;
1005 }
1006
1007 let end = offset + needle.len();
1008 matches.push(SharedGrepMatch {
1009 file: shared_path.clone(),
1010 line,
1011 column,
1012 line_text,
1013 match_text: String::from_utf8_lossy(&content[offset..end]).into_owned(),
1014 });
1015 }
1016 }
1017 SearchMatcher::Literal(LiteralSearch::AsciiCaseInsensitive(needle)) => {
1018 let search_content = content.to_ascii_lowercase();
1019 let finder = memchr::memmem::Finder::new(needle);
1020 let mut start = 0;
1021
1022 while let Some(position) = finder.find(&search_content[start..]) {
1023 if should_stop_search(truncated, total_matches, stop_after) {
1024 break;
1025 }
1026
1027 let offset = start + position;
1028 start = offset + 1;
1029
1030 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1031 let (line, column, line_text) = line_details_bytes(&content, line_starts, offset);
1032 if !seen_lines.insert(line) {
1033 continue;
1034 }
1035
1036 matched_this_file = true;
1037 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1038 if match_number > max_results {
1039 truncated.store(true, Ordering::Relaxed);
1040 break;
1041 }
1042
1043 let end = offset + needle.len();
1044 matches.push(SharedGrepMatch {
1045 file: shared_path.clone(),
1046 line,
1047 column,
1048 line_text,
1049 match_text: String::from_utf8_lossy(&content[offset..end]).into_owned(),
1050 });
1051 }
1052 }
1053 SearchMatcher::Regex(regex) => {
1054 for matched in regex.find_iter(&content) {
1055 if should_stop_search(truncated, total_matches, stop_after) {
1056 break;
1057 }
1058
1059 let line_starts = line_starts.get_or_insert_with(|| line_starts_bytes(&content));
1060 let (line, column, line_text) =
1061 line_details_bytes(&content, line_starts, matched.start());
1062 if !seen_lines.insert(line) {
1063 continue;
1064 }
1065
1066 matched_this_file = true;
1067 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
1068 if match_number > max_results {
1069 truncated.store(true, Ordering::Relaxed);
1070 break;
1071 }
1072
1073 matches.push(SharedGrepMatch {
1074 file: shared_path.clone(),
1075 line,
1076 column,
1077 line_text,
1078 match_text: String::from_utf8_lossy(matched.as_bytes()).into_owned(),
1079 });
1080 }
1081 }
1082 }
1083
1084 if matched_this_file {
1085 files_with_matches.fetch_add(1, Ordering::Relaxed);
1086 }
1087
1088 matches
1089}
1090
1091fn should_stop_search(
1092 truncated: &AtomicBool,
1093 total_matches: &AtomicUsize,
1094 stop_after: usize,
1095) -> bool {
1096 truncated.load(Ordering::Relaxed) && total_matches.load(Ordering::Relaxed) >= stop_after
1097}
1098
1099fn intersect_sorted_ids(left: &[u32], right: &[u32]) -> Vec<u32> {
1100 let mut merged = Vec::with_capacity(left.len().min(right.len()));
1101 let mut left_index = 0;
1102 let mut right_index = 0;
1103
1104 while left_index < left.len() && right_index < right.len() {
1105 match left[left_index].cmp(&right[right_index]) {
1106 std::cmp::Ordering::Less => left_index += 1,
1107 std::cmp::Ordering::Greater => right_index += 1,
1108 std::cmp::Ordering::Equal => {
1109 merged.push(left[left_index]);
1110 left_index += 1;
1111 right_index += 1;
1112 }
1113 }
1114 }
1115
1116 merged
1117}
1118
1119fn union_sorted_ids(left: &[u32], right: &[u32]) -> Vec<u32> {
1120 let mut merged = Vec::with_capacity(left.len() + right.len());
1121 let mut left_index = 0;
1122 let mut right_index = 0;
1123
1124 while left_index < left.len() && right_index < right.len() {
1125 match left[left_index].cmp(&right[right_index]) {
1126 std::cmp::Ordering::Less => {
1127 merged.push(left[left_index]);
1128 left_index += 1;
1129 }
1130 std::cmp::Ordering::Greater => {
1131 merged.push(right[right_index]);
1132 right_index += 1;
1133 }
1134 std::cmp::Ordering::Equal => {
1135 merged.push(left[left_index]);
1136 left_index += 1;
1137 right_index += 1;
1138 }
1139 }
1140 }
1141
1142 merged.extend_from_slice(&left[left_index..]);
1143 merged.extend_from_slice(&right[right_index..]);
1144 merged
1145}
1146
1147pub fn decompose_regex(pattern: &str) -> RegexQuery {
1148 let hir = match regex_syntax::parse(pattern) {
1149 Ok(hir) => hir,
1150 Err(_) => return RegexQuery::default(),
1151 };
1152
1153 let build = build_query(&hir);
1154 build.into_query()
1155}
1156
1157pub fn pack_trigram(a: u8, b: u8, c: u8) -> u32 {
1158 ((a as u32) << 16) | ((b as u32) << 8) | c as u32
1159}
1160
1161pub fn normalize_char(c: u8) -> u8 {
1162 c.to_ascii_lowercase()
1163}
1164
1165pub fn extract_trigrams(content: &[u8]) -> Vec<(u32, u8, usize)> {
1166 if content.len() < 3 {
1167 return Vec::new();
1168 }
1169
1170 let mut trigrams = Vec::with_capacity(content.len().saturating_sub(2));
1171 for start in 0..=content.len() - 3 {
1172 let trigram = pack_trigram(
1173 normalize_char(content[start]),
1174 normalize_char(content[start + 1]),
1175 normalize_char(content[start + 2]),
1176 );
1177 let next_char = content.get(start + 3).copied().unwrap_or(EOF_SENTINEL);
1178 trigrams.push((trigram, next_char, start));
1179 }
1180 trigrams
1181}
1182
1183pub fn resolve_cache_dir(project_root: &Path, storage_dir: Option<&Path>) -> PathBuf {
1184 if let Some(override_dir) = std::env::var_os("AFT_CACHE_DIR") {
1186 return PathBuf::from(override_dir)
1187 .join("index")
1188 .join(project_cache_key(project_root));
1189 }
1190 if let Some(dir) = storage_dir {
1192 return dir.join("index").join(project_cache_key(project_root));
1193 }
1194 let home = std::env::var_os("HOME")
1196 .map(PathBuf::from)
1197 .unwrap_or_else(|| PathBuf::from("."));
1198 home.join(".cache")
1199 .join("aft")
1200 .join("index")
1201 .join(project_cache_key(project_root))
1202}
1203
1204pub(crate) fn build_path_filters(
1205 include: &[String],
1206 exclude: &[String],
1207) -> Result<PathFilters, String> {
1208 Ok(PathFilters {
1209 includes: build_globset(include)?,
1210 excludes: build_globset(exclude)?,
1211 })
1212}
1213
1214pub(crate) fn walk_project_files(root: &Path, filters: &PathFilters) -> Vec<PathBuf> {
1215 walk_project_files_from(root, root, filters)
1216}
1217
1218pub(crate) fn walk_project_files_from(
1219 filter_root: &Path,
1220 search_root: &Path,
1221 filters: &PathFilters,
1222) -> Vec<PathBuf> {
1223 let mut builder = WalkBuilder::new(search_root);
1224 builder
1225 .hidden(false)
1226 .git_ignore(true)
1227 .git_global(true)
1228 .git_exclude(true)
1229 .filter_entry(|entry| {
1230 let name = entry.file_name().to_string_lossy();
1231 if entry.file_type().map_or(false, |ft| ft.is_dir()) {
1232 return !matches!(
1233 name.as_ref(),
1234 "node_modules"
1235 | "target"
1236 | "venv"
1237 | ".venv"
1238 | ".git"
1239 | "__pycache__"
1240 | ".tox"
1241 | "dist"
1242 | "build"
1243 );
1244 }
1245 true
1246 });
1247
1248 let mut files = Vec::new();
1249 for entry in builder.build().filter_map(|entry| entry.ok()) {
1250 if !entry
1251 .file_type()
1252 .map_or(false, |file_type| file_type.is_file())
1253 {
1254 continue;
1255 }
1256 let path = entry.into_path();
1257 if filters.matches(filter_root, &path) {
1258 files.push(path);
1259 }
1260 }
1261
1262 sort_paths_by_mtime_desc(&mut files);
1263 files
1264}
1265
1266pub(crate) fn read_searchable_text(path: &Path) -> Option<String> {
1267 let bytes = fs::read(path).ok()?;
1268 if is_binary_bytes(&bytes) {
1269 return None;
1270 }
1271 String::from_utf8(bytes).ok()
1272}
1273
1274fn read_indexed_file_bytes(path: &Path) -> Option<Vec<u8>> {
1275 fs::read(path).ok()
1276}
1277
1278pub(crate) fn relative_to_root(root: &Path, path: &Path) -> PathBuf {
1279 path.strip_prefix(root)
1280 .map(PathBuf::from)
1281 .unwrap_or_else(|_| path.to_path_buf())
1282}
1283
1284pub(crate) fn sort_paths_by_mtime_desc(paths: &mut [PathBuf]) {
1297 use std::collections::HashMap;
1298 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::with_capacity(paths.len());
1299 for path in paths.iter() {
1300 mtimes
1301 .entry(path.clone())
1302 .or_insert_with(|| path_modified_time(path));
1303 }
1304 paths.sort_by(|left, right| {
1305 let left_mtime = mtimes.get(left).and_then(|v| *v);
1306 let right_mtime = mtimes.get(right).and_then(|v| *v);
1307 right_mtime.cmp(&left_mtime).then_with(|| left.cmp(right))
1308 });
1309}
1310
1311pub(crate) fn sort_grep_matches_by_mtime_desc(matches: &mut [GrepMatch], project_root: &Path) {
1314 use std::collections::HashMap;
1315 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::new();
1316 for m in matches.iter() {
1317 mtimes.entry(m.file.clone()).or_insert_with(|| {
1318 let resolved = resolve_match_path(project_root, &m.file);
1319 path_modified_time(&resolved)
1320 });
1321 }
1322 matches.sort_by(|left, right| {
1323 let left_mtime = mtimes.get(&left.file).and_then(|v| *v);
1324 let right_mtime = mtimes.get(&right.file).and_then(|v| *v);
1325 right_mtime
1326 .cmp(&left_mtime)
1327 .then_with(|| left.file.cmp(&right.file))
1328 .then_with(|| left.line.cmp(&right.line))
1329 .then_with(|| left.column.cmp(&right.column))
1330 });
1331}
1332
1333fn sort_shared_grep_matches_by_cached_mtime_desc<F>(
1338 matches: &mut [SharedGrepMatch],
1339 modified_for_path: F,
1340) where
1341 F: Fn(&Path) -> Option<SystemTime>,
1342{
1343 use std::collections::HashMap;
1344 let mut mtimes: HashMap<PathBuf, Option<SystemTime>> = HashMap::with_capacity(matches.len());
1345 for m in matches.iter() {
1346 let path = m.file.as_path().to_path_buf();
1347 mtimes
1348 .entry(path.clone())
1349 .or_insert_with(|| modified_for_path(&path));
1350 }
1351 matches.sort_by(|left, right| {
1352 let left_mtime = mtimes.get(left.file.as_path()).and_then(|v| *v);
1353 let right_mtime = mtimes.get(right.file.as_path()).and_then(|v| *v);
1354 right_mtime
1355 .cmp(&left_mtime)
1356 .then_with(|| left.file.as_path().cmp(right.file.as_path()))
1357 .then_with(|| left.line.cmp(&right.line))
1358 .then_with(|| left.column.cmp(&right.column))
1359 });
1360}
1361
1362pub(crate) fn resolve_search_scope(project_root: &Path, path: Option<&str>) -> SearchScope {
1363 let resolved_project_root = canonicalize_or_normalize(project_root);
1364 let root = match path {
1365 Some(path) => {
1366 let path = PathBuf::from(path);
1367 if path.is_absolute() {
1368 canonicalize_or_normalize(&path)
1369 } else {
1370 normalize_path(&resolved_project_root.join(path))
1371 }
1372 }
1373 None => resolved_project_root.clone(),
1374 };
1375
1376 let use_index = is_within_search_root(&resolved_project_root, &root);
1377 SearchScope { root, use_index }
1378}
1379
1380pub(crate) fn is_binary_bytes(content: &[u8]) -> bool {
1381 content_inspector::inspect(content).is_binary()
1382}
1383
1384pub(crate) fn current_git_head(root: &Path) -> Option<String> {
1385 run_git(root, &["rev-parse", "HEAD"])
1386}
1387
1388pub(crate) fn project_cache_key(project_root: &Path) -> String {
1389 use sha2::{Digest, Sha256};
1390
1391 let mut hasher = Sha256::new();
1392
1393 if let Some(root_commit) = run_git(project_root, &["rev-list", "--max-parents=0", "HEAD"]) {
1394 hasher.update(root_commit.as_bytes());
1397 } else {
1398 let canonical_root = canonicalize_or_normalize(project_root);
1400 hasher.update(canonical_root.to_string_lossy().as_bytes());
1401 }
1402
1403 let digest = format!("{:x}", hasher.finalize());
1404 digest[..16].to_string()
1405}
1406
1407impl PathFilters {
1408 fn matches(&self, root: &Path, path: &Path) -> bool {
1409 let relative = to_glob_path(&relative_to_root(root, path));
1410 if self
1411 .includes
1412 .as_ref()
1413 .is_some_and(|includes| !includes.is_match(&relative))
1414 {
1415 return false;
1416 }
1417 if self
1418 .excludes
1419 .as_ref()
1420 .is_some_and(|excludes| excludes.is_match(&relative))
1421 {
1422 return false;
1423 }
1424 true
1425 }
1426}
1427
1428fn canonicalize_or_normalize(path: &Path) -> PathBuf {
1429 fs::canonicalize(path).unwrap_or_else(|_| normalize_path(path))
1430}
1431
1432fn resolve_match_path(project_root: &Path, path: &Path) -> PathBuf {
1433 if path.is_absolute() {
1434 path.to_path_buf()
1435 } else {
1436 project_root.join(path)
1437 }
1438}
1439
1440fn path_modified_time(path: &Path) -> Option<SystemTime> {
1441 fs::metadata(path)
1442 .and_then(|metadata| metadata.modified())
1443 .ok()
1444}
1445
1446fn normalize_path(path: &Path) -> PathBuf {
1447 let mut result = PathBuf::new();
1448 for component in path.components() {
1449 match component {
1450 Component::ParentDir => {
1451 if !result.pop() {
1452 result.push(component);
1453 }
1454 }
1455 Component::CurDir => {}
1456 _ => result.push(component),
1457 }
1458 }
1459 result
1460}
1461
1462fn verify_file_mtimes(index: &mut SearchIndex) {
1465 let mut stale_paths = Vec::new();
1467 for entry in &index.files {
1468 if entry.path.as_os_str().is_empty() {
1469 continue; }
1471 match fs::metadata(&entry.path) {
1472 Ok(meta) => {
1473 let current_mtime = meta.modified().unwrap_or(UNIX_EPOCH);
1474 if current_mtime != entry.modified || meta.len() != entry.size {
1475 stale_paths.push(entry.path.clone());
1476 }
1477 }
1478 Err(_) => {
1479 stale_paths.push(entry.path.clone());
1481 }
1482 }
1483 }
1484
1485 for path in &stale_paths {
1487 index.update_file(path);
1488 }
1489
1490 let filters = PathFilters::default();
1492 for path in walk_project_files(&index.project_root, &filters) {
1493 if !index.path_to_id.contains_key(&path) {
1494 index.update_file(&path);
1495 }
1496 }
1497
1498 if !stale_paths.is_empty() {
1499 log::info!(
1500 "[aft] search index: refreshed {} stale file(s) from disk cache",
1501 stale_paths.len()
1502 );
1503 }
1504}
1505
1506fn is_within_search_root(search_root: &Path, path: &Path) -> bool {
1507 path.starts_with(search_root)
1508}
1509
1510impl QueryBuild {
1511 fn into_query(self) -> RegexQuery {
1512 let mut query = RegexQuery::default();
1513
1514 for run in self.and_runs {
1515 add_run_to_and_query(&mut query, &run);
1516 }
1517
1518 for group in self.or_groups {
1519 let mut trigrams = BTreeSet::new();
1520 let mut filters = HashMap::new();
1521 for run in group {
1522 for (trigram, filter) in trigram_filters(&run) {
1523 trigrams.insert(trigram);
1524 merge_filter(filters.entry(trigram).or_default(), filter);
1525 }
1526 }
1527 if !trigrams.is_empty() {
1528 query.or_groups.push(trigrams.into_iter().collect());
1529 query.or_filters.push(filters);
1530 }
1531 }
1532
1533 query
1534 }
1535}
1536
1537fn build_query(hir: &Hir) -> QueryBuild {
1538 match hir.kind() {
1539 HirKind::Literal(literal) => {
1540 if literal.0.len() >= 3 {
1541 QueryBuild {
1542 and_runs: vec![literal.0.to_vec()],
1543 or_groups: Vec::new(),
1544 }
1545 } else {
1546 QueryBuild::default()
1547 }
1548 }
1549 HirKind::Capture(capture) => build_query(&capture.sub),
1550 HirKind::Concat(parts) => {
1551 let mut build = QueryBuild::default();
1552 for part in parts {
1553 let part_build = build_query(part);
1554 build.and_runs.extend(part_build.and_runs);
1555 build.or_groups.extend(part_build.or_groups);
1556 }
1557 build
1558 }
1559 HirKind::Alternation(parts) => {
1560 let mut group = Vec::new();
1561 for part in parts {
1562 let Some(mut choices) = guaranteed_run_choices(part) else {
1563 return QueryBuild::default();
1564 };
1565 group.append(&mut choices);
1566 }
1567 if group.is_empty() {
1568 QueryBuild::default()
1569 } else {
1570 QueryBuild {
1571 and_runs: Vec::new(),
1572 or_groups: vec![group],
1573 }
1574 }
1575 }
1576 HirKind::Repetition(repetition) => {
1577 if repetition.min == 0 {
1578 QueryBuild::default()
1579 } else {
1580 build_query(&repetition.sub)
1581 }
1582 }
1583 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => QueryBuild::default(),
1584 }
1585}
1586
1587fn guaranteed_run_choices(hir: &Hir) -> Option<Vec<Vec<u8>>> {
1588 match hir.kind() {
1589 HirKind::Literal(literal) => {
1590 if literal.0.len() >= 3 {
1591 Some(vec![literal.0.to_vec()])
1592 } else {
1593 None
1594 }
1595 }
1596 HirKind::Capture(capture) => guaranteed_run_choices(&capture.sub),
1597 HirKind::Concat(parts) => {
1598 let mut runs = Vec::new();
1599 for part in parts {
1600 if let Some(mut part_runs) = guaranteed_run_choices(part) {
1601 runs.append(&mut part_runs);
1602 }
1603 }
1604 if runs.is_empty() {
1605 None
1606 } else {
1607 Some(runs)
1608 }
1609 }
1610 HirKind::Alternation(parts) => {
1611 let mut runs = Vec::new();
1612 for part in parts {
1613 let Some(mut part_runs) = guaranteed_run_choices(part) else {
1614 return None;
1615 };
1616 runs.append(&mut part_runs);
1617 }
1618 if runs.is_empty() {
1619 None
1620 } else {
1621 Some(runs)
1622 }
1623 }
1624 HirKind::Repetition(repetition) => {
1625 if repetition.min == 0 {
1626 None
1627 } else {
1628 guaranteed_run_choices(&repetition.sub)
1629 }
1630 }
1631 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => None,
1632 }
1633}
1634
1635fn add_run_to_and_query(query: &mut RegexQuery, run: &[u8]) {
1636 for (trigram, filter) in trigram_filters(run) {
1637 if !query.and_trigrams.contains(&trigram) {
1638 query.and_trigrams.push(trigram);
1639 }
1640 merge_filter(query.and_filters.entry(trigram).or_default(), filter);
1641 }
1642}
1643
1644fn trigram_filters(run: &[u8]) -> Vec<(u32, PostingFilter)> {
1645 let mut filters: BTreeMap<u32, PostingFilter> = BTreeMap::new();
1646 for (trigram, next_char, position) in extract_trigrams(run) {
1647 let entry: &mut PostingFilter = filters.entry(trigram).or_default();
1648 if next_char != EOF_SENTINEL {
1649 entry.next_mask |= mask_for_next_char(next_char);
1650 }
1651 entry.loc_mask |= mask_for_position(position);
1652 }
1653 filters.into_iter().collect()
1654}
1655
1656fn merge_filter(target: &mut PostingFilter, filter: PostingFilter) {
1657 target.next_mask |= filter.next_mask;
1658 target.loc_mask |= filter.loc_mask;
1659}
1660
1661fn mask_for_next_char(next_char: u8) -> u8 {
1662 let bit = (normalize_char(next_char).wrapping_mul(31) & 7) as u32;
1663 1u8 << bit
1664}
1665
1666fn mask_for_position(position: usize) -> u8 {
1667 1u8 << (position % 8)
1668}
1669
1670fn build_globset(patterns: &[String]) -> Result<Option<GlobSet>, String> {
1671 if patterns.is_empty() {
1672 return Ok(None);
1673 }
1674
1675 let mut builder = GlobSetBuilder::new();
1676 for pattern in patterns {
1677 let glob = Glob::new(pattern).map_err(|error| error.to_string())?;
1678 builder.add(glob);
1679 }
1680 builder.build().map(Some).map_err(|error| error.to_string())
1681}
1682
1683fn read_u32<R: Read>(reader: &mut R) -> std::io::Result<u32> {
1684 let mut buffer = [0u8; 4];
1685 reader.read_exact(&mut buffer)?;
1686 Ok(u32::from_le_bytes(buffer))
1687}
1688
1689fn read_u64<R: Read>(reader: &mut R) -> std::io::Result<u64> {
1690 let mut buffer = [0u8; 8];
1691 reader.read_exact(&mut buffer)?;
1692 Ok(u64::from_le_bytes(buffer))
1693}
1694
1695fn write_u32<W: Write>(writer: &mut W, value: u32) -> std::io::Result<()> {
1696 writer.write_all(&value.to_le_bytes())
1697}
1698
1699fn write_u64<W: Write>(writer: &mut W, value: u64) -> std::io::Result<()> {
1700 writer.write_all(&value.to_le_bytes())
1701}
1702
1703fn remaining_bytes<R: Seek>(reader: &mut R, total_len: usize) -> Option<usize> {
1704 let pos = usize::try_from(reader.stream_position().ok()?).ok()?;
1705 total_len.checked_sub(pos)
1706}
1707
1708fn run_git(root: &Path, args: &[&str]) -> Option<String> {
1709 let output = Command::new("git")
1710 .arg("-C")
1711 .arg(root)
1712 .args(args)
1713 .output()
1714 .ok()?;
1715 if !output.status.success() {
1716 return None;
1717 }
1718 let value = String::from_utf8(output.stdout).ok()?;
1719 let value = value.trim().to_string();
1720 if value.is_empty() {
1721 None
1722 } else {
1723 Some(value)
1724 }
1725}
1726
1727fn apply_git_diff_updates(index: &mut SearchIndex, root: &Path, from: &str, to: &str) -> bool {
1728 let diff_range = format!("{}..{}", from, to);
1729 let output = match Command::new("git")
1730 .arg("-C")
1731 .arg(root)
1732 .args(["diff", "--name-only", &diff_range])
1733 .output()
1734 {
1735 Ok(output) => output,
1736 Err(_) => return false,
1737 };
1738
1739 if !output.status.success() {
1740 return false;
1741 }
1742
1743 let Ok(paths) = String::from_utf8(output.stdout) else {
1744 return false;
1745 };
1746
1747 for relative_path in paths.lines().map(str::trim).filter(|path| !path.is_empty()) {
1748 let path = root.join(relative_path);
1749 if path.exists() {
1750 index.update_file(&path);
1751 } else {
1752 index.remove_file(&path);
1753 }
1754 }
1755
1756 true
1757}
1758
1759fn is_binary_path(path: &Path, size: u64) -> bool {
1760 if size == 0 {
1761 return false;
1762 }
1763
1764 let mut file = match File::open(path) {
1765 Ok(file) => file,
1766 Err(_) => return true,
1767 };
1768
1769 let mut preview = vec![0u8; PREVIEW_BYTES.min(size as usize)];
1770 match file.read(&mut preview) {
1771 Ok(read) => is_binary_bytes(&preview[..read]),
1772 Err(_) => true,
1773 }
1774}
1775
1776fn line_starts_bytes(content: &[u8]) -> Vec<usize> {
1777 let mut starts = vec![0usize];
1778 for (index, byte) in content.iter().copied().enumerate() {
1779 if byte == b'\n' {
1780 starts.push(index + 1);
1781 }
1782 }
1783 starts
1784}
1785
1786fn line_details_bytes(content: &[u8], line_starts: &[usize], offset: usize) -> (u32, u32, String) {
1787 let line_index = match line_starts.binary_search(&offset) {
1788 Ok(index) => index,
1789 Err(index) => index.saturating_sub(1),
1790 };
1791 let line_start = line_starts.get(line_index).copied().unwrap_or(0);
1792 let line_end = content[line_start..]
1793 .iter()
1794 .position(|byte| *byte == b'\n')
1795 .map(|length| line_start + length)
1796 .unwrap_or(content.len());
1797 let mut line_slice = &content[line_start..line_end];
1798 if line_slice.ends_with(b"\r") {
1799 line_slice = &line_slice[..line_slice.len() - 1];
1800 }
1801 let line_text = String::from_utf8_lossy(line_slice).into_owned();
1802 let column = String::from_utf8_lossy(&content[line_start..offset])
1803 .chars()
1804 .count() as u32
1805 + 1;
1806 (line_index as u32 + 1, column, line_text)
1807}
1808
1809fn to_glob_path(path: &Path) -> String {
1810 path.to_string_lossy().replace('\\', "/")
1811}
1812
1813#[cfg(test)]
1814mod tests {
1815 use std::process::Command;
1816
1817 use super::*;
1818
1819 #[test]
1820 fn extract_trigrams_tracks_next_char_and_position() {
1821 let trigrams = extract_trigrams(b"Rust");
1822 assert_eq!(trigrams.len(), 2);
1823 assert_eq!(trigrams[0], (pack_trigram(b'r', b'u', b's'), b't', 0));
1824 assert_eq!(
1825 trigrams[1],
1826 (pack_trigram(b'u', b's', b't'), EOF_SENTINEL, 1)
1827 );
1828 }
1829
1830 #[test]
1831 fn decompose_regex_extracts_literals_and_alternations() {
1832 let query = decompose_regex("abc(def|ghi)xyz");
1833 assert!(query.and_trigrams.contains(&pack_trigram(b'a', b'b', b'c')));
1834 assert!(query.and_trigrams.contains(&pack_trigram(b'x', b'y', b'z')));
1835 assert_eq!(query.or_groups.len(), 1);
1836 assert!(query.or_groups[0].contains(&pack_trigram(b'd', b'e', b'f')));
1837 assert!(query.or_groups[0].contains(&pack_trigram(b'g', b'h', b'i')));
1838 }
1839
1840 #[test]
1841 fn candidates_intersect_posting_lists() {
1842 let mut index = SearchIndex::new();
1843 let dir = tempfile::tempdir().expect("create temp dir");
1844 let alpha = dir.path().join("alpha.txt");
1845 let beta = dir.path().join("beta.txt");
1846 fs::write(&alpha, "abcdef").expect("write alpha");
1847 fs::write(&beta, "abcxyz").expect("write beta");
1848 index.project_root = dir.path().to_path_buf();
1849 index.index_file(&alpha, b"abcdef");
1850 index.index_file(&beta, b"abcxyz");
1851
1852 let query = RegexQuery {
1853 and_trigrams: vec![
1854 pack_trigram(b'a', b'b', b'c'),
1855 pack_trigram(b'd', b'e', b'f'),
1856 ],
1857 ..RegexQuery::default()
1858 };
1859
1860 let candidates = index.candidates(&query);
1861 assert_eq!(candidates.len(), 1);
1862 assert_eq!(index.files[candidates[0] as usize].path, alpha);
1863 }
1864
1865 #[test]
1866 fn candidates_apply_bloom_filters() {
1867 let mut index = SearchIndex::new();
1868 let dir = tempfile::tempdir().expect("create temp dir");
1869 let file = dir.path().join("sample.txt");
1870 fs::write(&file, "abcd efgh").expect("write sample");
1871 index.project_root = dir.path().to_path_buf();
1872 index.index_file(&file, b"abcd efgh");
1873
1874 let trigram = pack_trigram(b'a', b'b', b'c');
1875 let matching_filter = PostingFilter {
1876 next_mask: mask_for_next_char(b'd'),
1877 loc_mask: mask_for_position(0),
1878 };
1879 let non_matching_filter = PostingFilter {
1880 next_mask: mask_for_next_char(b'z'),
1881 loc_mask: mask_for_position(0),
1882 };
1883
1884 assert_eq!(
1885 index
1886 .postings_for_trigram(trigram, Some(matching_filter))
1887 .len(),
1888 1
1889 );
1890 assert!(index
1891 .postings_for_trigram(trigram, Some(non_matching_filter))
1892 .is_empty());
1893 }
1894
1895 #[test]
1896 fn disk_round_trip_preserves_postings_and_files() {
1897 let dir = tempfile::tempdir().expect("create temp dir");
1898 let project = dir.path().join("project");
1899 fs::create_dir_all(&project).expect("create project dir");
1900 let file = project.join("src.txt");
1901 fs::write(&file, "abcdef").expect("write source");
1902
1903 let mut index = SearchIndex::build(&project);
1904 index.git_head = Some("deadbeef".to_string());
1905 let cache_dir = dir.path().join("cache");
1906 index.write_to_disk(&cache_dir, index.git_head.as_deref());
1907
1908 let loaded = SearchIndex::read_from_disk(&cache_dir).expect("load index from disk");
1909 assert_eq!(loaded.stored_git_head(), Some("deadbeef"));
1910 assert_eq!(loaded.files.len(), 1);
1911 assert_eq!(
1912 relative_to_root(&loaded.project_root, &loaded.files[0].path),
1913 PathBuf::from("src.txt")
1914 );
1915 assert_eq!(loaded.postings.len(), index.postings.len());
1916 assert!(loaded
1917 .postings
1918 .contains_key(&pack_trigram(b'a', b'b', b'c')));
1919 }
1920
1921 #[test]
1922 fn write_to_disk_uses_temp_files_and_cleans_them_up() {
1923 let dir = tempfile::tempdir().expect("create temp dir");
1924 let project = dir.path().join("project");
1925 fs::create_dir_all(&project).expect("create project dir");
1926 fs::write(project.join("src.txt"), "abcdef").expect("write source");
1927
1928 let index = SearchIndex::build(&project);
1929 let cache_dir = dir.path().join("cache");
1930 index.write_to_disk(&cache_dir, None);
1931
1932 assert!(cache_dir.join("postings.bin").is_file());
1933 assert!(cache_dir.join("lookup.bin").is_file());
1934 assert!(!cache_dir.join("postings.bin.tmp").exists());
1935 assert!(!cache_dir.join("lookup.bin.tmp").exists());
1936 }
1937
1938 #[test]
1939 fn project_cache_key_includes_checkout_path() {
1940 let dir = tempfile::tempdir().expect("create temp dir");
1941 let source = dir.path().join("source");
1942 fs::create_dir_all(&source).expect("create source repo dir");
1943 fs::write(source.join("tracked.txt"), "content\n").expect("write tracked file");
1944
1945 assert!(Command::new("git")
1946 .current_dir(&source)
1947 .args(["init"])
1948 .status()
1949 .expect("init git repo")
1950 .success());
1951 assert!(Command::new("git")
1952 .current_dir(&source)
1953 .args(["add", "."])
1954 .status()
1955 .expect("git add")
1956 .success());
1957 assert!(Command::new("git")
1958 .current_dir(&source)
1959 .args([
1960 "-c",
1961 "user.name=AFT Tests",
1962 "-c",
1963 "user.email=aft-tests@example.com",
1964 "commit",
1965 "-m",
1966 "initial",
1967 ])
1968 .status()
1969 .expect("git commit")
1970 .success());
1971
1972 let clone = dir.path().join("clone");
1973 assert!(Command::new("git")
1974 .args(["clone", "--quiet"])
1975 .arg(&source)
1976 .arg(&clone)
1977 .status()
1978 .expect("git clone")
1979 .success());
1980
1981 let source_key = project_cache_key(&source);
1982 let clone_key = project_cache_key(&clone);
1983
1984 assert_eq!(source_key.len(), 16);
1985 assert_eq!(clone_key.len(), 16);
1986 assert_eq!(source_key, clone_key);
1988 }
1989
1990 #[test]
1991 fn resolve_search_scope_disables_index_for_external_path() {
1992 let dir = tempfile::tempdir().expect("create temp dir");
1993 let project = dir.path().join("project");
1994 let outside = dir.path().join("outside");
1995 fs::create_dir_all(&project).expect("create project dir");
1996 fs::create_dir_all(&outside).expect("create outside dir");
1997
1998 let scope = resolve_search_scope(&project, outside.to_str());
1999
2000 assert_eq!(
2001 scope.root,
2002 fs::canonicalize(&outside).expect("canonicalize outside")
2003 );
2004 assert!(!scope.use_index);
2005 }
2006
2007 #[test]
2008 fn grep_filters_matches_to_search_root() {
2009 let dir = tempfile::tempdir().expect("create temp dir");
2010 let project = dir.path().join("project");
2011 let src = project.join("src");
2012 let docs = project.join("docs");
2013 fs::create_dir_all(&src).expect("create src dir");
2014 fs::create_dir_all(&docs).expect("create docs dir");
2015 fs::write(src.join("main.rs"), "pub struct SearchIndex;\n").expect("write src file");
2016 fs::write(docs.join("guide.md"), "SearchIndex guide\n").expect("write docs file");
2017
2018 let index = SearchIndex::build(&project);
2019 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 10);
2020
2021 assert_eq!(result.files_searched, 1);
2022 assert_eq!(result.files_with_matches, 1);
2023 assert_eq!(result.matches.len(), 1);
2024 let expected = fs::canonicalize(src.join("main.rs")).expect("canonicalize");
2026 assert_eq!(result.matches[0].file, expected);
2027 }
2028
2029 #[test]
2030 fn grep_deduplicates_multiple_matches_on_same_line() {
2031 let dir = tempfile::tempdir().expect("create temp dir");
2032 let project = dir.path().join("project");
2033 let src = project.join("src");
2034 fs::create_dir_all(&src).expect("create src dir");
2035 fs::write(src.join("main.rs"), "SearchIndex SearchIndex\n").expect("write src file");
2036
2037 let index = SearchIndex::build(&project);
2038 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 10);
2039
2040 assert_eq!(result.total_matches, 1);
2041 assert_eq!(result.matches.len(), 1);
2042 }
2043
2044 #[test]
2045 fn grep_reports_total_matches_before_truncation() {
2046 let dir = tempfile::tempdir().expect("create temp dir");
2047 let project = dir.path().join("project");
2048 let src = project.join("src");
2049 fs::create_dir_all(&src).expect("create src dir");
2050 fs::write(src.join("main.rs"), "SearchIndex\nSearchIndex\n").expect("write src file");
2051
2052 let index = SearchIndex::build(&project);
2053 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 1);
2054
2055 assert_eq!(result.total_matches, 2);
2056 assert_eq!(result.matches.len(), 1);
2057 assert!(result.truncated);
2058 }
2059
2060 #[test]
2061 fn glob_filters_results_to_search_root() {
2062 let dir = tempfile::tempdir().expect("create temp dir");
2063 let project = dir.path().join("project");
2064 let src = project.join("src");
2065 let scripts = project.join("scripts");
2066 fs::create_dir_all(&src).expect("create src dir");
2067 fs::create_dir_all(&scripts).expect("create scripts dir");
2068 fs::write(src.join("main.rs"), "pub fn main() {}\n").expect("write src file");
2069 fs::write(scripts.join("tool.rs"), "pub fn tool() {}\n").expect("write scripts file");
2070
2071 let index = SearchIndex::build(&project);
2072 let files = index.glob("**/*.rs", &src);
2073
2074 assert_eq!(
2075 files,
2076 vec![fs::canonicalize(src.join("main.rs")).expect("canonicalize src file")]
2077 );
2078 }
2079
2080 #[test]
2081 fn glob_includes_hidden_and_binary_files() {
2082 let dir = tempfile::tempdir().expect("create temp dir");
2083 let project = dir.path().join("project");
2084 let hidden_dir = project.join(".hidden");
2085 fs::create_dir_all(&hidden_dir).expect("create hidden dir");
2086 let hidden_file = hidden_dir.join("data.bin");
2087 fs::write(&hidden_file, [0u8, 159, 146, 150]).expect("write binary file");
2088
2089 let index = SearchIndex::build(&project);
2090 let files = index.glob("**/*.bin", &project);
2091
2092 assert_eq!(
2093 files,
2094 vec![fs::canonicalize(hidden_file).expect("canonicalize binary file")]
2095 );
2096 }
2097
2098 #[test]
2099 fn read_from_disk_rejects_invalid_nanos() {
2100 let dir = tempfile::tempdir().expect("create temp dir");
2101 let cache_dir = dir.path().join("cache");
2102 fs::create_dir_all(&cache_dir).expect("create cache dir");
2103
2104 let mut postings = Vec::new();
2105 postings.extend_from_slice(INDEX_MAGIC);
2106 postings.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2107 postings.extend_from_slice(&0u32.to_le_bytes());
2108 postings.extend_from_slice(&1u32.to_le_bytes());
2109 postings.extend_from_slice(&DEFAULT_MAX_FILE_SIZE.to_le_bytes());
2110 postings.extend_from_slice(&1u32.to_le_bytes());
2111 postings.extend_from_slice(b"/");
2112 postings.push(0u8);
2113 postings.extend_from_slice(&1u32.to_le_bytes());
2114 postings.extend_from_slice(&0u64.to_le_bytes());
2115 postings.extend_from_slice(&0u64.to_le_bytes());
2116 postings.extend_from_slice(&1_000_000_000u32.to_le_bytes());
2117 postings.extend_from_slice(b"a");
2118 postings.extend_from_slice(&0u64.to_le_bytes());
2119
2120 let mut lookup = Vec::new();
2121 lookup.extend_from_slice(LOOKUP_MAGIC);
2122 lookup.extend_from_slice(&INDEX_VERSION.to_le_bytes());
2123 lookup.extend_from_slice(&0u32.to_le_bytes());
2124
2125 fs::write(cache_dir.join("postings.bin"), postings).expect("write postings");
2126 fs::write(cache_dir.join("lookup.bin"), lookup).expect("write lookup");
2127
2128 assert!(SearchIndex::read_from_disk(&cache_dir).is_none());
2129 }
2130
2131 #[test]
2146 fn sort_paths_by_mtime_desc_does_not_panic_on_missing_files() {
2147 let dir = tempfile::tempdir().expect("create tempdir");
2151 let mut paths: Vec<PathBuf> = Vec::new();
2152 for i in 0..30 {
2153 let path = if i % 2 == 0 {
2155 let p = dir.path().join(format!("real-{i}.rs"));
2156 fs::write(&p, format!("// {i}\n")).expect("write");
2157 p
2158 } else {
2159 dir.path().join(format!("missing-{i}.rs"))
2160 };
2161 paths.push(path);
2162 }
2163
2164 for _ in 0..50 {
2167 let mut copy = paths.clone();
2168 sort_paths_by_mtime_desc(&mut copy);
2169 assert_eq!(copy.len(), paths.len());
2170 }
2171 }
2172
2173 #[test]
2177 fn sort_grep_matches_by_mtime_desc_does_not_panic_on_missing_files() {
2178 let dir = tempfile::tempdir().expect("create tempdir");
2179 let mut matches: Vec<GrepMatch> = Vec::new();
2180 for i in 0..30 {
2181 let file = if i % 2 == 0 {
2182 let p = dir.path().join(format!("real-{i}.rs"));
2183 fs::write(&p, format!("// {i}\n")).expect("write");
2184 p
2185 } else {
2186 dir.path().join(format!("missing-{i}.rs"))
2187 };
2188 matches.push(GrepMatch {
2189 file,
2190 line: u32::try_from(i).unwrap_or(0),
2191 column: 0,
2192 line_text: format!("match {i}"),
2193 match_text: format!("match {i}"),
2194 });
2195 }
2196
2197 for _ in 0..50 {
2198 let mut copy = matches.clone();
2199 sort_grep_matches_by_mtime_desc(&mut copy, dir.path());
2200 assert_eq!(copy.len(), matches.len());
2201 }
2202 }
2203}