1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2use std::fs::{self, File};
3use std::io::{BufReader, BufWriter, Read, Write};
4use std::path::{Component, Path, PathBuf};
5use std::process::Command;
6use std::time::{Duration, SystemTime, UNIX_EPOCH};
7
8use globset::{Glob, GlobSet, GlobSetBuilder};
9use ignore::WalkBuilder;
10use regex::RegexBuilder;
11use regex_syntax::hir::{Hir, HirKind};
12
13const DEFAULT_MAX_FILE_SIZE: u64 = 1_048_576;
14const INDEX_MAGIC: &[u8; 8] = b"AFTIDX01";
15const LOOKUP_MAGIC: &[u8; 8] = b"AFTLKP01";
16const INDEX_VERSION: u32 = 1;
17const PREVIEW_BYTES: usize = 8 * 1024;
18const EOF_SENTINEL: u8 = 0;
19
20#[derive(Clone, Debug)]
21pub struct SearchIndex {
22 pub postings: HashMap<u32, Vec<Posting>>,
23 pub files: Vec<FileEntry>,
24 pub path_to_id: HashMap<PathBuf, u32>,
25 pub ready: bool,
26 project_root: PathBuf,
27 git_head: Option<String>,
28 max_file_size: u64,
29 file_trigrams: HashMap<u32, Vec<u32>>,
30 unindexed_files: HashSet<u32>,
31}
32
33#[derive(Clone, Debug, PartialEq, Eq)]
34pub struct Posting {
35 pub file_id: u32,
36 pub next_mask: u8,
37 pub loc_mask: u8,
38}
39
40#[derive(Clone, Debug)]
41pub struct FileEntry {
42 pub path: PathBuf,
43 pub size: u64,
44 pub modified: SystemTime,
45}
46
47#[derive(Clone, Debug, PartialEq, Eq)]
48pub struct GrepMatch {
49 pub file: PathBuf,
50 pub line: u32,
51 pub column: u32,
52 pub line_text: String,
53 pub match_text: String,
54}
55
56#[derive(Clone, Debug)]
57pub struct GrepResult {
58 pub matches: Vec<GrepMatch>,
59 pub total_matches: usize,
60 pub files_searched: usize,
61 pub files_with_matches: usize,
62 pub index_status: IndexStatus,
63 pub truncated: bool,
64}
65
66#[derive(Clone, Copy, Debug, PartialEq, Eq)]
67pub enum IndexStatus {
68 Ready,
69 Building,
70 Fallback,
71}
72
73impl IndexStatus {
74 pub fn as_str(&self) -> &'static str {
75 match self {
76 IndexStatus::Ready => "Ready",
77 IndexStatus::Building => "Building",
78 IndexStatus::Fallback => "Fallback",
79 }
80 }
81}
82
83#[derive(Clone, Debug, Default)]
84pub struct RegexQuery {
85 pub and_trigrams: Vec<u32>,
86 pub or_groups: Vec<Vec<u32>>,
87 pub(crate) and_filters: HashMap<u32, PostingFilter>,
88 pub(crate) or_filters: Vec<HashMap<u32, PostingFilter>>,
89}
90
91#[derive(Clone, Copy, Debug, Default)]
92pub(crate) struct PostingFilter {
93 next_mask: u8,
94 loc_mask: u8,
95}
96
97#[derive(Clone, Debug, Default)]
98struct QueryBuild {
99 and_runs: Vec<Vec<u8>>,
100 or_groups: Vec<Vec<Vec<u8>>>,
101}
102
103#[derive(Clone, Debug, Default)]
104pub(crate) struct PathFilters {
105 includes: Option<GlobSet>,
106 excludes: Option<GlobSet>,
107}
108
109#[derive(Clone, Debug)]
110pub(crate) struct SearchScope {
111 pub root: PathBuf,
112 pub use_index: bool,
113}
114
115impl SearchIndex {
116 pub fn new() -> Self {
117 SearchIndex {
118 postings: HashMap::new(),
119 files: Vec::new(),
120 path_to_id: HashMap::new(),
121 ready: false,
122 project_root: PathBuf::new(),
123 git_head: None,
124 max_file_size: DEFAULT_MAX_FILE_SIZE,
125 file_trigrams: HashMap::new(),
126 unindexed_files: HashSet::new(),
127 }
128 }
129
130 pub fn build(root: &Path) -> Self {
131 Self::build_with_limit(root, DEFAULT_MAX_FILE_SIZE)
132 }
133
134 pub(crate) fn build_with_limit(root: &Path, max_file_size: u64) -> Self {
135 let project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
136 let mut index = SearchIndex {
137 project_root: project_root.clone(),
138 max_file_size,
139 ..SearchIndex::new()
140 };
141
142 let filters = PathFilters::default();
143 for path in walk_project_files(&project_root, &filters) {
144 index.update_file(&path);
145 }
146
147 index.git_head = current_git_head(&project_root);
148 index.ready = true;
149 index
150 }
151
152 pub fn index_file(&mut self, path: &Path, content: &[u8]) {
153 self.remove_file(path);
154
155 let file_id = match self.allocate_file_id(path, content.len() as u64) {
156 Some(file_id) => file_id,
157 None => return,
158 };
159
160 let mut trigram_map: BTreeMap<u32, PostingFilter> = BTreeMap::new();
161 for (trigram, next_char, position) in extract_trigrams(content) {
162 let entry = trigram_map.entry(trigram).or_default();
163 entry.next_mask |= mask_for_next_char(next_char);
164 entry.loc_mask |= mask_for_position(position);
165 }
166
167 let mut file_trigrams = Vec::with_capacity(trigram_map.len());
168 for (trigram, filter) in trigram_map {
169 self.postings.entry(trigram).or_default().push(Posting {
170 file_id,
171 next_mask: filter.next_mask,
172 loc_mask: filter.loc_mask,
173 });
174 file_trigrams.push(trigram);
175 }
176
177 for postings in self.postings.values_mut() {
178 postings.sort_by_key(|posting| posting.file_id);
179 }
180
181 self.file_trigrams.insert(file_id, file_trigrams);
182 self.unindexed_files.remove(&file_id);
183 }
184
185 pub fn remove_file(&mut self, path: &Path) {
186 let Some(file_id) = self.path_to_id.remove(path) else {
187 return;
188 };
189
190 if let Some(trigrams) = self.file_trigrams.remove(&file_id) {
191 for trigram in trigrams {
192 let should_remove = if let Some(postings) = self.postings.get_mut(&trigram) {
193 postings.retain(|posting| posting.file_id != file_id);
194 postings.is_empty()
195 } else {
196 false
197 };
198
199 if should_remove {
200 self.postings.remove(&trigram);
201 }
202 }
203 }
204
205 self.unindexed_files.remove(&file_id);
206 if let Some(file) = self.files.get_mut(file_id as usize) {
207 file.path = PathBuf::new();
208 file.size = 0;
209 file.modified = UNIX_EPOCH;
210 }
211 }
212
213 pub fn update_file(&mut self, path: &Path) {
214 self.remove_file(path);
215
216 let metadata = match fs::metadata(path) {
217 Ok(metadata) if metadata.is_file() => metadata,
218 _ => return,
219 };
220
221 if is_binary_path(path, metadata.len()) {
222 return;
223 }
224
225 if metadata.len() > self.max_file_size {
226 self.track_unindexed_file(path, &metadata);
227 return;
228 }
229
230 let content = match fs::read(path) {
231 Ok(content) => content,
232 Err(_) => return,
233 };
234
235 if is_binary_bytes(&content) {
236 return;
237 }
238
239 self.index_file(path, &content);
240 }
241
242 pub fn grep(
243 &self,
244 pattern: &str,
245 case_sensitive: bool,
246 include: &[String],
247 exclude: &[String],
248 search_root: &Path,
249 max_results: usize,
250 ) -> GrepResult {
251 self.search_grep(
252 pattern,
253 case_sensitive,
254 include,
255 exclude,
256 search_root,
257 max_results,
258 )
259 }
260
261 pub fn search_grep(
262 &self,
263 pattern: &str,
264 case_sensitive: bool,
265 include: &[String],
266 exclude: &[String],
267 search_root: &Path,
268 max_results: usize,
269 ) -> GrepResult {
270 let mut regex_builder = RegexBuilder::new(pattern);
271 regex_builder.case_insensitive(!case_sensitive);
272 let regex = match regex_builder.build() {
273 Ok(regex) => regex,
274 Err(_) => {
275 return GrepResult {
276 matches: Vec::new(),
277 total_matches: 0,
278 files_searched: 0,
279 files_with_matches: 0,
280 index_status: if self.ready {
281 IndexStatus::Ready
282 } else {
283 IndexStatus::Building
284 },
285 truncated: false,
286 };
287 }
288 };
289
290 let filters = match build_path_filters(include, exclude) {
291 Ok(filters) => filters,
292 Err(_) => PathFilters::default(),
293 };
294 let search_root = canonicalize_or_normalize(search_root);
295
296 let query = decompose_regex(pattern);
297 let candidate_ids = self.candidates(&query);
298
299 let mut matches = Vec::new();
300 let mut total_matches = 0usize;
301 let mut files_searched = 0usize;
302 let mut files_with_matches = 0usize;
303 let mut truncated = false;
304
305 for file_id in candidate_ids {
306 let Some(file) = self.files.get(file_id as usize) else {
307 continue;
308 };
309 if file.path.as_os_str().is_empty() {
310 continue;
311 }
312 if !is_within_search_root(&search_root, &file.path) {
313 continue;
314 }
315 if !filters.matches(&self.project_root, &file.path) {
316 continue;
317 }
318
319 let content = match read_searchable_text(&file.path) {
320 Some(content) => content,
321 None => continue,
322 };
323
324 files_searched += 1;
325 let line_starts = line_starts(&content);
326 let mut seen_lines = HashSet::new();
327 let mut matched_this_file = false;
328
329 for matched in regex.find_iter(&content) {
330 let (line, column, line_text) =
331 line_details(&content, &line_starts, matched.start());
332 if !seen_lines.insert(line) {
333 continue;
334 }
335
336 total_matches += 1;
337 if matches.len() < max_results {
338 matches.push(GrepMatch {
339 file: file.path.clone(),
340 line,
341 column,
342 line_text,
343 match_text: matched.as_str().to_string(),
344 });
345 } else {
346 truncated = true;
347 }
348 matched_this_file = true;
349 }
350
351 if matched_this_file {
352 files_with_matches += 1;
353 }
354 }
355
356 sort_grep_matches_by_mtime_desc(&mut matches, &self.project_root);
357
358 GrepResult {
359 total_matches,
360 matches,
361 files_searched,
362 files_with_matches,
363 index_status: if self.ready {
364 IndexStatus::Ready
365 } else {
366 IndexStatus::Building
367 },
368 truncated,
369 }
370 }
371
372 pub fn glob(&self, pattern: &str, search_root: &Path) -> Vec<PathBuf> {
373 let filters = match build_path_filters(&[pattern.to_string()], &[]) {
374 Ok(filters) => filters,
375 Err(_) => return Vec::new(),
376 };
377 let search_root = canonicalize_or_normalize(search_root);
378 let filter_root = if search_root.starts_with(&self.project_root) {
379 &self.project_root
380 } else {
381 &search_root
382 };
383
384 let mut paths = walk_project_files_from(filter_root, &search_root, &filters);
385 sort_paths_by_mtime_desc(&mut paths);
386 paths
387 }
388
389 pub fn candidates(&self, query: &RegexQuery) -> Vec<u32> {
390 if query.and_trigrams.is_empty() && query.or_groups.is_empty() {
391 return self.active_file_ids();
392 }
393
394 let mut current: Option<BTreeSet<u32>> = None;
395
396 for trigram in &query.and_trigrams {
397 let filter = query.and_filters.get(trigram).copied();
398 let matches = self.postings_for_trigram(*trigram, filter);
399 current = Some(match current.take() {
400 Some(existing) => existing.intersection(&matches).copied().collect(),
401 None => matches,
402 });
403
404 if current.as_ref().is_some_and(|set| set.is_empty()) {
405 break;
406 }
407 }
408
409 let mut current = current.unwrap_or_else(|| self.active_file_ids().into_iter().collect());
410
411 for (index, group) in query.or_groups.iter().enumerate() {
412 let mut group_matches = BTreeSet::new();
413 let filters = query.or_filters.get(index);
414
415 for trigram in group {
416 let filter = filters.and_then(|filters| filters.get(trigram).copied());
417 group_matches.extend(self.postings_for_trigram(*trigram, filter));
418 }
419
420 current = current.intersection(&group_matches).copied().collect();
421 if current.is_empty() {
422 break;
423 }
424 }
425
426 for file_id in &self.unindexed_files {
427 if self.is_active_file(*file_id) {
428 current.insert(*file_id);
429 }
430 }
431
432 current.into_iter().collect()
433 }
434
435 pub fn write_to_disk(&self, cache_dir: &Path, git_head: Option<&str>) {
436 if fs::create_dir_all(cache_dir).is_err() {
437 return;
438 }
439
440 let postings_path = cache_dir.join("postings.bin");
441 let lookup_path = cache_dir.join("lookup.bin");
442 let tmp_postings = cache_dir.join("postings.bin.tmp");
443 let tmp_lookup = cache_dir.join("lookup.bin.tmp");
444
445 let active_ids = self.active_file_ids();
446 let mut id_map = HashMap::new();
447 for (new_id, old_id) in active_ids.iter().enumerate() {
448 let Ok(new_id_u32) = u32::try_from(new_id) else {
449 return;
450 };
451 id_map.insert(*old_id, new_id_u32);
452 }
453
454 let write_result = (|| -> std::io::Result<()> {
455 let mut postings_writer = BufWriter::new(File::create(&tmp_postings)?);
456
457 postings_writer.write_all(INDEX_MAGIC)?;
458 write_u32(&mut postings_writer, INDEX_VERSION)?;
459
460 let head = git_head.unwrap_or_default();
461 let root = self.project_root.to_string_lossy();
462 let head_len = u32::try_from(head.len())
463 .map_err(|_| std::io::Error::other("git head too large to cache"))?;
464 let root_len = u32::try_from(root.len())
465 .map_err(|_| std::io::Error::other("project root too large to cache"))?;
466 let file_count = u32::try_from(active_ids.len())
467 .map_err(|_| std::io::Error::other("too many files to cache"))?;
468
469 write_u32(&mut postings_writer, head_len)?;
470 write_u32(&mut postings_writer, root_len)?;
471 write_u64(&mut postings_writer, self.max_file_size)?;
472 write_u32(&mut postings_writer, file_count)?;
473 postings_writer.write_all(head.as_bytes())?;
474 postings_writer.write_all(root.as_bytes())?;
475
476 for old_id in &active_ids {
477 let Some(file) = self.files.get(*old_id as usize) else {
478 return Err(std::io::Error::other("missing file entry for cache write"));
479 };
480 let path = relative_to_root(&self.project_root, &file.path);
481 let path = path.to_string_lossy();
482 let path_len = u32::try_from(path.len())
483 .map_err(|_| std::io::Error::other("cached path too large"))?;
484 let modified = file
485 .modified
486 .duration_since(UNIX_EPOCH)
487 .unwrap_or(Duration::ZERO);
488 let unindexed = if self.unindexed_files.contains(old_id) {
489 1u8
490 } else {
491 0u8
492 };
493
494 postings_writer.write_all(&[unindexed])?;
495 write_u32(&mut postings_writer, path_len)?;
496 write_u64(&mut postings_writer, file.size)?;
497 write_u64(&mut postings_writer, modified.as_secs())?;
498 write_u32(&mut postings_writer, modified.subsec_nanos())?;
499 postings_writer.write_all(path.as_bytes())?;
500 }
501
502 let mut lookup_entries = Vec::new();
503 let mut postings_blob = Vec::new();
504 let mut sorted_postings: Vec<_> = self.postings.iter().collect();
505 sorted_postings.sort_by_key(|(trigram, _)| **trigram);
506
507 for (trigram, postings) in sorted_postings {
508 let offset = u64::try_from(postings_blob.len())
509 .map_err(|_| std::io::Error::other("postings blob too large"))?;
510 let mut count = 0u32;
511
512 for posting in postings {
513 let Some(mapped_file_id) = id_map.get(&posting.file_id).copied() else {
514 continue;
515 };
516
517 postings_blob.extend_from_slice(&mapped_file_id.to_le_bytes());
518 postings_blob.push(posting.next_mask);
519 postings_blob.push(posting.loc_mask);
520 count = count.saturating_add(1);
521 }
522
523 if count > 0 {
524 lookup_entries.push((*trigram, offset, count));
525 }
526 }
527
528 write_u64(
529 &mut postings_writer,
530 u64::try_from(postings_blob.len())
531 .map_err(|_| std::io::Error::other("postings blob too large"))?,
532 )?;
533 postings_writer.write_all(&postings_blob)?;
534 postings_writer.flush()?;
535 drop(postings_writer);
536
537 let mut lookup_writer = BufWriter::new(File::create(&tmp_lookup)?);
538 let entry_count = u32::try_from(lookup_entries.len())
539 .map_err(|_| std::io::Error::other("too many lookup entries to cache"))?;
540
541 lookup_writer.write_all(LOOKUP_MAGIC)?;
542 write_u32(&mut lookup_writer, INDEX_VERSION)?;
543 write_u32(&mut lookup_writer, entry_count)?;
544
545 for (trigram, offset, count) in lookup_entries {
546 write_u32(&mut lookup_writer, trigram)?;
547 write_u64(&mut lookup_writer, offset)?;
548 write_u32(&mut lookup_writer, count)?;
549 }
550
551 lookup_writer.flush()?;
552 drop(lookup_writer);
553
554 fs::rename(&tmp_postings, &postings_path)?;
555 fs::rename(&tmp_lookup, &lookup_path)?;
556
557 Ok(())
558 })();
559
560 if write_result.is_err() {
561 let _ = fs::remove_file(&tmp_postings);
562 let _ = fs::remove_file(&tmp_lookup);
563 }
564 }
565
566 pub fn read_from_disk(cache_dir: &Path) -> Option<Self> {
567 let postings_path = cache_dir.join("postings.bin");
568 let lookup_path = cache_dir.join("lookup.bin");
569
570 let mut postings_reader = BufReader::new(File::open(postings_path).ok()?);
571 let mut lookup_reader = BufReader::new(File::open(lookup_path).ok()?);
572
573 let mut magic = [0u8; 8];
574 postings_reader.read_exact(&mut magic).ok()?;
575 if &magic != INDEX_MAGIC {
576 return None;
577 }
578 if read_u32(&mut postings_reader).ok()? != INDEX_VERSION {
579 return None;
580 }
581
582 let head_len = read_u32(&mut postings_reader).ok()? as usize;
583 let root_len = read_u32(&mut postings_reader).ok()? as usize;
584 let max_file_size = read_u64(&mut postings_reader).ok()?;
585 let file_count = read_u32(&mut postings_reader).ok()? as usize;
586
587 let mut head_bytes = vec![0u8; head_len];
588 postings_reader.read_exact(&mut head_bytes).ok()?;
589 let git_head = String::from_utf8(head_bytes)
590 .ok()
591 .filter(|head| !head.is_empty());
592
593 let mut root_bytes = vec![0u8; root_len];
594 postings_reader.read_exact(&mut root_bytes).ok()?;
595 let project_root = PathBuf::from(String::from_utf8(root_bytes).ok()?);
596
597 let mut files = Vec::with_capacity(file_count);
598 let mut path_to_id = HashMap::new();
599 let mut unindexed_files = HashSet::new();
600
601 for file_id in 0..file_count {
602 let mut unindexed = [0u8; 1];
603 postings_reader.read_exact(&mut unindexed).ok()?;
604 let path_len = read_u32(&mut postings_reader).ok()? as usize;
605 let size = read_u64(&mut postings_reader).ok()?;
606 let secs = read_u64(&mut postings_reader).ok()?;
607 let nanos = read_u32(&mut postings_reader).ok()?;
608 let mut path_bytes = vec![0u8; path_len];
609 postings_reader.read_exact(&mut path_bytes).ok()?;
610 let relative_path = PathBuf::from(String::from_utf8(path_bytes).ok()?);
611 let full_path = project_root.join(relative_path);
612 let file_id_u32 = u32::try_from(file_id).ok()?;
613
614 files.push(FileEntry {
615 path: full_path.clone(),
616 size,
617 modified: UNIX_EPOCH + Duration::new(secs, nanos),
618 });
619 path_to_id.insert(full_path, file_id_u32);
620 if unindexed[0] == 1 {
621 unindexed_files.insert(file_id_u32);
622 }
623 }
624
625 let postings_len = read_u64(&mut postings_reader).ok()? as usize;
626 let mut postings_blob = vec![0u8; postings_len];
627 postings_reader.read_exact(&mut postings_blob).ok()?;
628
629 let mut lookup_magic = [0u8; 8];
630 lookup_reader.read_exact(&mut lookup_magic).ok()?;
631 if &lookup_magic != LOOKUP_MAGIC {
632 return None;
633 }
634 if read_u32(&mut lookup_reader).ok()? != INDEX_VERSION {
635 return None;
636 }
637 let entry_count = read_u32(&mut lookup_reader).ok()? as usize;
638
639 let mut postings = HashMap::new();
640 let mut file_trigrams: HashMap<u32, Vec<u32>> = HashMap::new();
641
642 for _ in 0..entry_count {
643 let trigram = read_u32(&mut lookup_reader).ok()?;
644 let offset = read_u64(&mut lookup_reader).ok()? as usize;
645 let count = read_u32(&mut lookup_reader).ok()? as usize;
646 let bytes_len = count.checked_mul(6)?;
647 let end = offset.checked_add(bytes_len)?;
648 if end > postings_blob.len() {
649 return None;
650 }
651
652 let mut trigram_postings = Vec::with_capacity(count);
653 for chunk in postings_blob[offset..end].chunks_exact(6) {
654 let file_id = u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]);
655 let posting = Posting {
656 file_id,
657 next_mask: chunk[4],
658 loc_mask: chunk[5],
659 };
660 trigram_postings.push(posting.clone());
661 file_trigrams.entry(file_id).or_default().push(trigram);
662 }
663 postings.insert(trigram, trigram_postings);
664 }
665
666 Some(SearchIndex {
667 postings,
668 files,
669 path_to_id,
670 ready: true,
671 project_root,
672 git_head,
673 max_file_size,
674 file_trigrams,
675 unindexed_files,
676 })
677 }
678
679 pub(crate) fn stored_git_head(&self) -> Option<&str> {
680 self.git_head.as_deref()
681 }
682
683 pub(crate) fn set_ready(&mut self, ready: bool) {
684 self.ready = ready;
685 }
686
687 pub(crate) fn rebuild_or_refresh(
688 root: &Path,
689 max_file_size: u64,
690 current_head: Option<String>,
691 baseline: Option<SearchIndex>,
692 ) -> Self {
693 if current_head.is_none() {
694 return SearchIndex::build_with_limit(root, max_file_size);
695 }
696
697 if let Some(mut baseline) = baseline {
698 baseline.project_root = fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
699 baseline.max_file_size = max_file_size;
700
701 if baseline.git_head == current_head {
702 baseline.ready = true;
703 return baseline;
704 }
705
706 if let (Some(previous), Some(current)) =
707 (baseline.git_head.clone(), current_head.clone())
708 {
709 let project_root = baseline.project_root.clone();
710 if apply_git_diff_updates(&mut baseline, &project_root, &previous, ¤t) {
711 baseline.git_head = Some(current);
712 baseline.ready = true;
713 return baseline;
714 }
715 }
716 }
717
718 SearchIndex::build_with_limit(root, max_file_size)
719 }
720
721 fn allocate_file_id(&mut self, path: &Path, size_hint: u64) -> Option<u32> {
722 let file_id = u32::try_from(self.files.len()).ok()?;
723 let metadata = fs::metadata(path).ok();
724 let size = metadata
725 .as_ref()
726 .map_or(size_hint, |metadata| metadata.len());
727 let modified = metadata
728 .and_then(|metadata| metadata.modified().ok())
729 .unwrap_or(UNIX_EPOCH);
730
731 self.files.push(FileEntry {
732 path: path.to_path_buf(),
733 size,
734 modified,
735 });
736 self.path_to_id.insert(path.to_path_buf(), file_id);
737 Some(file_id)
738 }
739
740 fn track_unindexed_file(&mut self, path: &Path, metadata: &fs::Metadata) {
741 let Some(file_id) = self.allocate_file_id(path, metadata.len()) else {
742 return;
743 };
744 self.unindexed_files.insert(file_id);
745 self.file_trigrams.insert(file_id, Vec::new());
746 }
747
748 fn active_file_ids(&self) -> Vec<u32> {
749 let mut ids: Vec<u32> = self.path_to_id.values().copied().collect();
750 ids.sort_unstable();
751 ids
752 }
753
754 fn is_active_file(&self, file_id: u32) -> bool {
755 self.files
756 .get(file_id as usize)
757 .map(|file| !file.path.as_os_str().is_empty())
758 .unwrap_or(false)
759 }
760
761 fn postings_for_trigram(&self, trigram: u32, filter: Option<PostingFilter>) -> BTreeSet<u32> {
762 let mut matches = BTreeSet::new();
763 let Some(postings) = self.postings.get(&trigram) else {
764 return matches;
765 };
766
767 for posting in postings {
768 if let Some(filter) = filter {
769 if filter.next_mask != 0 && posting.next_mask & filter.next_mask == 0 {
772 continue;
773 }
774 }
779 if self.is_active_file(posting.file_id) {
780 matches.insert(posting.file_id);
781 }
782 }
783
784 matches
785 }
786}
787
788pub fn decompose_regex(pattern: &str) -> RegexQuery {
789 let hir = match regex_syntax::parse(pattern) {
790 Ok(hir) => hir,
791 Err(_) => return RegexQuery::default(),
792 };
793
794 let build = build_query(&hir);
795 build.into_query()
796}
797
798pub fn pack_trigram(a: u8, b: u8, c: u8) -> u32 {
799 ((a as u32) << 16) | ((b as u32) << 8) | c as u32
800}
801
802pub fn normalize_char(c: u8) -> u8 {
803 c.to_ascii_lowercase()
804}
805
806pub fn extract_trigrams(content: &[u8]) -> Vec<(u32, u8, usize)> {
807 if content.len() < 3 {
808 return Vec::new();
809 }
810
811 let mut trigrams = Vec::with_capacity(content.len().saturating_sub(2));
812 for start in 0..=content.len() - 3 {
813 let trigram = pack_trigram(
814 normalize_char(content[start]),
815 normalize_char(content[start + 1]),
816 normalize_char(content[start + 2]),
817 );
818 let next_char = content.get(start + 3).copied().unwrap_or(EOF_SENTINEL);
819 trigrams.push((trigram, next_char, start));
820 }
821 trigrams
822}
823
824pub fn resolve_cache_dir(project_root: &Path) -> PathBuf {
825 if let Some(override_dir) = std::env::var_os("AFT_CACHE_DIR") {
827 return PathBuf::from(override_dir)
828 .join("index")
829 .join(project_cache_key(project_root));
830 }
831 let home = std::env::var_os("HOME")
832 .map(PathBuf::from)
833 .unwrap_or_else(|| PathBuf::from("."));
834 home.join(".cache")
835 .join("aft")
836 .join("index")
837 .join(project_cache_key(project_root))
838}
839
840pub(crate) fn build_path_filters(
841 include: &[String],
842 exclude: &[String],
843) -> Result<PathFilters, String> {
844 Ok(PathFilters {
845 includes: build_globset(include)?,
846 excludes: build_globset(exclude)?,
847 })
848}
849
850pub(crate) fn walk_project_files(root: &Path, filters: &PathFilters) -> Vec<PathBuf> {
851 walk_project_files_from(root, root, filters)
852}
853
854pub(crate) fn walk_project_files_from(
855 filter_root: &Path,
856 search_root: &Path,
857 filters: &PathFilters,
858) -> Vec<PathBuf> {
859 let mut builder = WalkBuilder::new(search_root);
860 builder
861 .hidden(false)
862 .git_ignore(true)
863 .git_global(true)
864 .git_exclude(true)
865 .filter_entry(|entry| {
866 let name = entry.file_name().to_string_lossy();
867 if entry.file_type().map_or(false, |ft| ft.is_dir()) {
868 return !matches!(
869 name.as_ref(),
870 "node_modules"
871 | "target"
872 | "venv"
873 | ".venv"
874 | ".git"
875 | "__pycache__"
876 | ".tox"
877 | "dist"
878 | "build"
879 );
880 }
881 true
882 });
883
884 let mut files = Vec::new();
885 for entry in builder.build().filter_map(|entry| entry.ok()) {
886 if !entry
887 .file_type()
888 .map_or(false, |file_type| file_type.is_file())
889 {
890 continue;
891 }
892 let path = entry.into_path();
893 if filters.matches(filter_root, &path) {
894 files.push(path);
895 }
896 }
897
898 sort_paths_by_mtime_desc(&mut files);
899 files
900}
901
902pub(crate) fn read_searchable_text(path: &Path) -> Option<String> {
903 let bytes = fs::read(path).ok()?;
904 if is_binary_bytes(&bytes) {
905 return None;
906 }
907 String::from_utf8(bytes).ok()
908}
909
910pub(crate) fn relative_to_root(root: &Path, path: &Path) -> PathBuf {
911 path.strip_prefix(root)
912 .map(PathBuf::from)
913 .unwrap_or_else(|_| path.to_path_buf())
914}
915
916pub(crate) fn sort_paths_by_mtime_desc(paths: &mut [PathBuf]) {
917 paths.sort_by(|left, right| {
918 path_modified_time(right)
919 .cmp(&path_modified_time(left))
920 .then_with(|| left.cmp(right))
921 });
922}
923
924pub(crate) fn sort_grep_matches_by_mtime_desc(matches: &mut [GrepMatch], project_root: &Path) {
925 matches.sort_by(|left, right| {
926 let left_path = resolve_match_path(project_root, &left.file);
927 let right_path = resolve_match_path(project_root, &right.file);
928
929 path_modified_time(&right_path)
930 .cmp(&path_modified_time(&left_path))
931 .then_with(|| left.file.cmp(&right.file))
932 .then_with(|| left.line.cmp(&right.line))
933 .then_with(|| left.column.cmp(&right.column))
934 });
935}
936
937pub(crate) fn resolve_search_scope(project_root: &Path, path: Option<&str>) -> SearchScope {
938 let resolved_project_root = canonicalize_or_normalize(project_root);
939 let root = match path {
940 Some(path) => {
941 let path = PathBuf::from(path);
942 if path.is_absolute() {
943 canonicalize_or_normalize(&path)
944 } else {
945 normalize_path(&resolved_project_root.join(path))
946 }
947 }
948 None => resolved_project_root.clone(),
949 };
950
951 let use_index = is_within_search_root(&resolved_project_root, &root);
952 SearchScope { root, use_index }
953}
954
955pub(crate) fn is_binary_bytes(content: &[u8]) -> bool {
956 content_inspector::inspect(content).is_binary()
957}
958
959pub(crate) fn current_git_head(root: &Path) -> Option<String> {
960 run_git(root, &["rev-parse", "HEAD"])
961}
962
963pub(crate) fn project_cache_key(project_root: &Path) -> String {
964 use sha2::{Digest, Sha256};
965
966 let mut hasher = Sha256::new();
967
968 if let Some(root_commit) = run_git(project_root, &["rev-list", "--max-parents=0", "HEAD"]) {
969 hasher.update(root_commit.as_bytes());
972 } else {
973 let canonical_root = canonicalize_or_normalize(project_root);
975 hasher.update(canonical_root.to_string_lossy().as_bytes());
976 }
977
978 let digest = format!("{:x}", hasher.finalize());
979 digest[..16].to_string()
980}
981
982impl PathFilters {
983 fn matches(&self, root: &Path, path: &Path) -> bool {
984 let relative = to_glob_path(&relative_to_root(root, path));
985 if self
986 .includes
987 .as_ref()
988 .is_some_and(|includes| !includes.is_match(&relative))
989 {
990 return false;
991 }
992 if self
993 .excludes
994 .as_ref()
995 .is_some_and(|excludes| excludes.is_match(&relative))
996 {
997 return false;
998 }
999 true
1000 }
1001}
1002
1003fn canonicalize_or_normalize(path: &Path) -> PathBuf {
1004 fs::canonicalize(path).unwrap_or_else(|_| normalize_path(path))
1005}
1006
1007fn resolve_match_path(project_root: &Path, path: &Path) -> PathBuf {
1008 if path.is_absolute() {
1009 path.to_path_buf()
1010 } else {
1011 project_root.join(path)
1012 }
1013}
1014
1015fn path_modified_time(path: &Path) -> Option<SystemTime> {
1016 fs::metadata(path)
1017 .and_then(|metadata| metadata.modified())
1018 .ok()
1019}
1020
1021fn normalize_path(path: &Path) -> PathBuf {
1022 let mut result = PathBuf::new();
1023 for component in path.components() {
1024 match component {
1025 Component::ParentDir => {
1026 if !result.pop() {
1027 result.push(component);
1028 }
1029 }
1030 Component::CurDir => {}
1031 _ => result.push(component),
1032 }
1033 }
1034 result
1035}
1036
1037fn is_within_search_root(search_root: &Path, path: &Path) -> bool {
1038 path.starts_with(search_root)
1039}
1040
1041impl QueryBuild {
1042 fn into_query(self) -> RegexQuery {
1043 let mut query = RegexQuery::default();
1044
1045 for run in self.and_runs {
1046 add_run_to_and_query(&mut query, &run);
1047 }
1048
1049 for group in self.or_groups {
1050 let mut trigrams = BTreeSet::new();
1051 let mut filters = HashMap::new();
1052 for run in group {
1053 for (trigram, filter) in trigram_filters(&run) {
1054 trigrams.insert(trigram);
1055 merge_filter(filters.entry(trigram).or_default(), filter);
1056 }
1057 }
1058 if !trigrams.is_empty() {
1059 query.or_groups.push(trigrams.into_iter().collect());
1060 query.or_filters.push(filters);
1061 }
1062 }
1063
1064 query
1065 }
1066}
1067
1068fn build_query(hir: &Hir) -> QueryBuild {
1069 match hir.kind() {
1070 HirKind::Literal(literal) => {
1071 if literal.0.len() >= 3 {
1072 QueryBuild {
1073 and_runs: vec![literal.0.to_vec()],
1074 or_groups: Vec::new(),
1075 }
1076 } else {
1077 QueryBuild::default()
1078 }
1079 }
1080 HirKind::Capture(capture) => build_query(&capture.sub),
1081 HirKind::Concat(parts) => {
1082 let mut build = QueryBuild::default();
1083 for part in parts {
1084 let part_build = build_query(part);
1085 build.and_runs.extend(part_build.and_runs);
1086 build.or_groups.extend(part_build.or_groups);
1087 }
1088 build
1089 }
1090 HirKind::Alternation(parts) => {
1091 let mut group = Vec::new();
1092 for part in parts {
1093 let Some(mut choices) = guaranteed_run_choices(part) else {
1094 return QueryBuild::default();
1095 };
1096 group.append(&mut choices);
1097 }
1098 if group.is_empty() {
1099 QueryBuild::default()
1100 } else {
1101 QueryBuild {
1102 and_runs: Vec::new(),
1103 or_groups: vec![group],
1104 }
1105 }
1106 }
1107 HirKind::Repetition(repetition) => {
1108 if repetition.min == 0 {
1109 QueryBuild::default()
1110 } else {
1111 build_query(&repetition.sub)
1112 }
1113 }
1114 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => QueryBuild::default(),
1115 }
1116}
1117
1118fn guaranteed_run_choices(hir: &Hir) -> Option<Vec<Vec<u8>>> {
1119 match hir.kind() {
1120 HirKind::Literal(literal) => {
1121 if literal.0.len() >= 3 {
1122 Some(vec![literal.0.to_vec()])
1123 } else {
1124 None
1125 }
1126 }
1127 HirKind::Capture(capture) => guaranteed_run_choices(&capture.sub),
1128 HirKind::Concat(parts) => {
1129 let mut runs = Vec::new();
1130 for part in parts {
1131 if let Some(mut part_runs) = guaranteed_run_choices(part) {
1132 runs.append(&mut part_runs);
1133 }
1134 }
1135 if runs.is_empty() {
1136 None
1137 } else {
1138 Some(runs)
1139 }
1140 }
1141 HirKind::Alternation(parts) => {
1142 let mut runs = Vec::new();
1143 for part in parts {
1144 let Some(mut part_runs) = guaranteed_run_choices(part) else {
1145 return None;
1146 };
1147 runs.append(&mut part_runs);
1148 }
1149 if runs.is_empty() {
1150 None
1151 } else {
1152 Some(runs)
1153 }
1154 }
1155 HirKind::Repetition(repetition) => {
1156 if repetition.min == 0 {
1157 None
1158 } else {
1159 guaranteed_run_choices(&repetition.sub)
1160 }
1161 }
1162 HirKind::Empty | HirKind::Class(_) | HirKind::Look(_) => None,
1163 }
1164}
1165
1166fn add_run_to_and_query(query: &mut RegexQuery, run: &[u8]) {
1167 for (trigram, filter) in trigram_filters(run) {
1168 if !query.and_trigrams.contains(&trigram) {
1169 query.and_trigrams.push(trigram);
1170 }
1171 merge_filter(query.and_filters.entry(trigram).or_default(), filter);
1172 }
1173}
1174
1175fn trigram_filters(run: &[u8]) -> Vec<(u32, PostingFilter)> {
1176 let mut filters: BTreeMap<u32, PostingFilter> = BTreeMap::new();
1177 for (trigram, next_char, position) in extract_trigrams(run) {
1178 let entry: &mut PostingFilter = filters.entry(trigram).or_default();
1179 if next_char != EOF_SENTINEL {
1180 entry.next_mask |= mask_for_next_char(next_char);
1181 }
1182 entry.loc_mask |= mask_for_position(position);
1183 }
1184 filters.into_iter().collect()
1185}
1186
1187fn merge_filter(target: &mut PostingFilter, filter: PostingFilter) {
1188 target.next_mask |= filter.next_mask;
1189 target.loc_mask |= filter.loc_mask;
1190}
1191
1192fn mask_for_next_char(next_char: u8) -> u8 {
1193 let bit = (normalize_char(next_char).wrapping_mul(31) & 7) as u32;
1194 1u8 << bit
1195}
1196
1197fn mask_for_position(position: usize) -> u8 {
1198 1u8 << (position % 8)
1199}
1200
1201fn build_globset(patterns: &[String]) -> Result<Option<GlobSet>, String> {
1202 if patterns.is_empty() {
1203 return Ok(None);
1204 }
1205
1206 let mut builder = GlobSetBuilder::new();
1207 for pattern in patterns {
1208 let glob = Glob::new(pattern).map_err(|error| error.to_string())?;
1209 builder.add(glob);
1210 }
1211 builder.build().map(Some).map_err(|error| error.to_string())
1212}
1213
1214fn read_u32<R: Read>(reader: &mut R) -> std::io::Result<u32> {
1215 let mut buffer = [0u8; 4];
1216 reader.read_exact(&mut buffer)?;
1217 Ok(u32::from_le_bytes(buffer))
1218}
1219
1220fn read_u64<R: Read>(reader: &mut R) -> std::io::Result<u64> {
1221 let mut buffer = [0u8; 8];
1222 reader.read_exact(&mut buffer)?;
1223 Ok(u64::from_le_bytes(buffer))
1224}
1225
1226fn write_u32<W: Write>(writer: &mut W, value: u32) -> std::io::Result<()> {
1227 writer.write_all(&value.to_le_bytes())
1228}
1229
1230fn write_u64<W: Write>(writer: &mut W, value: u64) -> std::io::Result<()> {
1231 writer.write_all(&value.to_le_bytes())
1232}
1233
1234fn run_git(root: &Path, args: &[&str]) -> Option<String> {
1235 let output = Command::new("git")
1236 .arg("-C")
1237 .arg(root)
1238 .args(args)
1239 .output()
1240 .ok()?;
1241 if !output.status.success() {
1242 return None;
1243 }
1244 let value = String::from_utf8(output.stdout).ok()?;
1245 let value = value.trim().to_string();
1246 if value.is_empty() {
1247 None
1248 } else {
1249 Some(value)
1250 }
1251}
1252
1253fn apply_git_diff_updates(index: &mut SearchIndex, root: &Path, from: &str, to: &str) -> bool {
1254 let diff_range = format!("{}..{}", from, to);
1255 let output = match Command::new("git")
1256 .arg("-C")
1257 .arg(root)
1258 .args(["diff", "--name-only", &diff_range])
1259 .output()
1260 {
1261 Ok(output) => output,
1262 Err(_) => return false,
1263 };
1264
1265 if !output.status.success() {
1266 return false;
1267 }
1268
1269 let Ok(paths) = String::from_utf8(output.stdout) else {
1270 return false;
1271 };
1272
1273 for relative_path in paths.lines().map(str::trim).filter(|path| !path.is_empty()) {
1274 let path = root.join(relative_path);
1275 if path.exists() {
1276 index.update_file(&path);
1277 } else {
1278 index.remove_file(&path);
1279 }
1280 }
1281
1282 true
1283}
1284
1285fn is_binary_path(path: &Path, size: u64) -> bool {
1286 if size == 0 {
1287 return false;
1288 }
1289
1290 let mut file = match File::open(path) {
1291 Ok(file) => file,
1292 Err(_) => return true,
1293 };
1294
1295 let mut preview = vec![0u8; PREVIEW_BYTES.min(size as usize)];
1296 match file.read(&mut preview) {
1297 Ok(read) => is_binary_bytes(&preview[..read]),
1298 Err(_) => true,
1299 }
1300}
1301
1302fn line_starts(content: &str) -> Vec<usize> {
1303 let mut starts = vec![0usize];
1304 for (index, byte) in content.bytes().enumerate() {
1305 if byte == b'\n' {
1306 starts.push(index + 1);
1307 }
1308 }
1309 starts
1310}
1311
1312fn line_details(content: &str, line_starts: &[usize], offset: usize) -> (u32, u32, String) {
1313 let line_index = match line_starts.binary_search(&offset) {
1314 Ok(index) => index,
1315 Err(index) => index.saturating_sub(1),
1316 };
1317 let line_start = line_starts.get(line_index).copied().unwrap_or(0);
1318 let line_end = content[line_start..]
1319 .find('\n')
1320 .map(|length| line_start + length)
1321 .unwrap_or(content.len());
1322 let line_text = content[line_start..line_end]
1323 .trim_end_matches('\r')
1324 .to_string();
1325 let column = content[line_start..offset].chars().count() as u32 + 1;
1326 (line_index as u32 + 1, column, line_text)
1327}
1328
1329fn to_glob_path(path: &Path) -> String {
1330 path.to_string_lossy().replace('\\', "/")
1331}
1332
1333#[cfg(test)]
1334mod tests {
1335 use std::process::Command;
1336
1337 use super::*;
1338
1339 #[test]
1340 fn extract_trigrams_tracks_next_char_and_position() {
1341 let trigrams = extract_trigrams(b"Rust");
1342 assert_eq!(trigrams.len(), 2);
1343 assert_eq!(trigrams[0], (pack_trigram(b'r', b'u', b's'), b't', 0));
1344 assert_eq!(
1345 trigrams[1],
1346 (pack_trigram(b'u', b's', b't'), EOF_SENTINEL, 1)
1347 );
1348 }
1349
1350 #[test]
1351 fn decompose_regex_extracts_literals_and_alternations() {
1352 let query = decompose_regex("abc(def|ghi)xyz");
1353 assert!(query.and_trigrams.contains(&pack_trigram(b'a', b'b', b'c')));
1354 assert!(query.and_trigrams.contains(&pack_trigram(b'x', b'y', b'z')));
1355 assert_eq!(query.or_groups.len(), 1);
1356 assert!(query.or_groups[0].contains(&pack_trigram(b'd', b'e', b'f')));
1357 assert!(query.or_groups[0].contains(&pack_trigram(b'g', b'h', b'i')));
1358 }
1359
1360 #[test]
1361 fn candidates_intersect_posting_lists() {
1362 let mut index = SearchIndex::new();
1363 let dir = tempfile::tempdir().expect("create temp dir");
1364 let alpha = dir.path().join("alpha.txt");
1365 let beta = dir.path().join("beta.txt");
1366 fs::write(&alpha, "abcdef").expect("write alpha");
1367 fs::write(&beta, "abcxyz").expect("write beta");
1368 index.project_root = dir.path().to_path_buf();
1369 index.index_file(&alpha, b"abcdef");
1370 index.index_file(&beta, b"abcxyz");
1371
1372 let query = RegexQuery {
1373 and_trigrams: vec![
1374 pack_trigram(b'a', b'b', b'c'),
1375 pack_trigram(b'd', b'e', b'f'),
1376 ],
1377 ..RegexQuery::default()
1378 };
1379
1380 let candidates = index.candidates(&query);
1381 assert_eq!(candidates.len(), 1);
1382 assert_eq!(index.files[candidates[0] as usize].path, alpha);
1383 }
1384
1385 #[test]
1386 fn candidates_apply_bloom_filters() {
1387 let mut index = SearchIndex::new();
1388 let dir = tempfile::tempdir().expect("create temp dir");
1389 let file = dir.path().join("sample.txt");
1390 fs::write(&file, "abcd efgh").expect("write sample");
1391 index.project_root = dir.path().to_path_buf();
1392 index.index_file(&file, b"abcd efgh");
1393
1394 let trigram = pack_trigram(b'a', b'b', b'c');
1395 let matching_filter = PostingFilter {
1396 next_mask: mask_for_next_char(b'd'),
1397 loc_mask: mask_for_position(0),
1398 };
1399 let non_matching_filter = PostingFilter {
1400 next_mask: mask_for_next_char(b'z'),
1401 loc_mask: mask_for_position(0),
1402 };
1403
1404 assert_eq!(
1405 index
1406 .postings_for_trigram(trigram, Some(matching_filter))
1407 .len(),
1408 1
1409 );
1410 assert!(index
1411 .postings_for_trigram(trigram, Some(non_matching_filter))
1412 .is_empty());
1413 }
1414
1415 #[test]
1416 fn disk_round_trip_preserves_postings_and_files() {
1417 let dir = tempfile::tempdir().expect("create temp dir");
1418 let project = dir.path().join("project");
1419 fs::create_dir_all(&project).expect("create project dir");
1420 let file = project.join("src.txt");
1421 fs::write(&file, "abcdef").expect("write source");
1422
1423 let mut index = SearchIndex::build(&project);
1424 index.git_head = Some("deadbeef".to_string());
1425 let cache_dir = dir.path().join("cache");
1426 index.write_to_disk(&cache_dir, index.git_head.as_deref());
1427
1428 let loaded = SearchIndex::read_from_disk(&cache_dir).expect("load index from disk");
1429 assert_eq!(loaded.stored_git_head(), Some("deadbeef"));
1430 assert_eq!(loaded.files.len(), 1);
1431 assert_eq!(
1432 relative_to_root(&loaded.project_root, &loaded.files[0].path),
1433 PathBuf::from("src.txt")
1434 );
1435 assert_eq!(loaded.postings.len(), index.postings.len());
1436 assert!(loaded
1437 .postings
1438 .contains_key(&pack_trigram(b'a', b'b', b'c')));
1439 }
1440
1441 #[test]
1442 fn write_to_disk_uses_temp_files_and_cleans_them_up() {
1443 let dir = tempfile::tempdir().expect("create temp dir");
1444 let project = dir.path().join("project");
1445 fs::create_dir_all(&project).expect("create project dir");
1446 fs::write(project.join("src.txt"), "abcdef").expect("write source");
1447
1448 let index = SearchIndex::build(&project);
1449 let cache_dir = dir.path().join("cache");
1450 index.write_to_disk(&cache_dir, None);
1451
1452 assert!(cache_dir.join("postings.bin").is_file());
1453 assert!(cache_dir.join("lookup.bin").is_file());
1454 assert!(!cache_dir.join("postings.bin.tmp").exists());
1455 assert!(!cache_dir.join("lookup.bin.tmp").exists());
1456 }
1457
1458 #[test]
1459 fn project_cache_key_includes_checkout_path() {
1460 let dir = tempfile::tempdir().expect("create temp dir");
1461 let source = dir.path().join("source");
1462 fs::create_dir_all(&source).expect("create source repo dir");
1463 fs::write(source.join("tracked.txt"), "content\n").expect("write tracked file");
1464
1465 assert!(Command::new("git")
1466 .current_dir(&source)
1467 .args(["init"])
1468 .status()
1469 .expect("init git repo")
1470 .success());
1471 assert!(Command::new("git")
1472 .current_dir(&source)
1473 .args(["add", "."])
1474 .status()
1475 .expect("git add")
1476 .success());
1477 assert!(Command::new("git")
1478 .current_dir(&source)
1479 .args([
1480 "-c",
1481 "user.name=AFT Tests",
1482 "-c",
1483 "user.email=aft-tests@example.com",
1484 "commit",
1485 "-m",
1486 "initial",
1487 ])
1488 .status()
1489 .expect("git commit")
1490 .success());
1491
1492 let clone = dir.path().join("clone");
1493 assert!(Command::new("git")
1494 .args(["clone", "--quiet"])
1495 .arg(&source)
1496 .arg(&clone)
1497 .status()
1498 .expect("git clone")
1499 .success());
1500
1501 let source_key = project_cache_key(&source);
1502 let clone_key = project_cache_key(&clone);
1503
1504 assert_eq!(source_key.len(), 16);
1505 assert_eq!(clone_key.len(), 16);
1506 assert_eq!(source_key, clone_key);
1508 }
1509
1510 #[test]
1511 fn resolve_search_scope_disables_index_for_external_path() {
1512 let dir = tempfile::tempdir().expect("create temp dir");
1513 let project = dir.path().join("project");
1514 let outside = dir.path().join("outside");
1515 fs::create_dir_all(&project).expect("create project dir");
1516 fs::create_dir_all(&outside).expect("create outside dir");
1517
1518 let scope = resolve_search_scope(&project, outside.to_str());
1519
1520 assert_eq!(
1521 scope.root,
1522 fs::canonicalize(&outside).expect("canonicalize outside")
1523 );
1524 assert!(!scope.use_index);
1525 }
1526
1527 #[test]
1528 fn grep_filters_matches_to_search_root() {
1529 let dir = tempfile::tempdir().expect("create temp dir");
1530 let project = dir.path().join("project");
1531 let src = project.join("src");
1532 let docs = project.join("docs");
1533 fs::create_dir_all(&src).expect("create src dir");
1534 fs::create_dir_all(&docs).expect("create docs dir");
1535 fs::write(src.join("main.rs"), "pub struct SearchIndex;\n").expect("write src file");
1536 fs::write(docs.join("guide.md"), "SearchIndex guide\n").expect("write docs file");
1537
1538 let index = SearchIndex::build(&project);
1539 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 10);
1540
1541 assert_eq!(result.files_searched, 1);
1542 assert_eq!(result.files_with_matches, 1);
1543 assert_eq!(result.matches.len(), 1);
1544 let expected = fs::canonicalize(src.join("main.rs")).expect("canonicalize");
1546 assert_eq!(result.matches[0].file, expected);
1547 }
1548
1549 #[test]
1550 fn grep_deduplicates_multiple_matches_on_same_line() {
1551 let dir = tempfile::tempdir().expect("create temp dir");
1552 let project = dir.path().join("project");
1553 let src = project.join("src");
1554 fs::create_dir_all(&src).expect("create src dir");
1555 fs::write(src.join("main.rs"), "SearchIndex SearchIndex\n").expect("write src file");
1556
1557 let index = SearchIndex::build(&project);
1558 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 10);
1559
1560 assert_eq!(result.total_matches, 1);
1561 assert_eq!(result.matches.len(), 1);
1562 }
1563
1564 #[test]
1565 fn grep_reports_total_matches_before_truncation() {
1566 let dir = tempfile::tempdir().expect("create temp dir");
1567 let project = dir.path().join("project");
1568 let src = project.join("src");
1569 fs::create_dir_all(&src).expect("create src dir");
1570 fs::write(src.join("main.rs"), "SearchIndex\nSearchIndex\n").expect("write src file");
1571
1572 let index = SearchIndex::build(&project);
1573 let result = index.search_grep("SearchIndex", true, &[], &[], &src, 1);
1574
1575 assert_eq!(result.total_matches, 2);
1576 assert_eq!(result.matches.len(), 1);
1577 assert!(result.truncated);
1578 }
1579
1580 #[test]
1581 fn glob_filters_results_to_search_root() {
1582 let dir = tempfile::tempdir().expect("create temp dir");
1583 let project = dir.path().join("project");
1584 let src = project.join("src");
1585 let scripts = project.join("scripts");
1586 fs::create_dir_all(&src).expect("create src dir");
1587 fs::create_dir_all(&scripts).expect("create scripts dir");
1588 fs::write(src.join("main.rs"), "pub fn main() {}\n").expect("write src file");
1589 fs::write(scripts.join("tool.rs"), "pub fn tool() {}\n").expect("write scripts file");
1590
1591 let index = SearchIndex::build(&project);
1592 let files = index.glob("**/*.rs", &src);
1593
1594 assert_eq!(
1595 files,
1596 vec![fs::canonicalize(src.join("main.rs")).expect("canonicalize src file")]
1597 );
1598 }
1599
1600 #[test]
1601 fn glob_includes_hidden_and_binary_files() {
1602 let dir = tempfile::tempdir().expect("create temp dir");
1603 let project = dir.path().join("project");
1604 let hidden_dir = project.join(".hidden");
1605 fs::create_dir_all(&hidden_dir).expect("create hidden dir");
1606 let hidden_file = hidden_dir.join("data.bin");
1607 fs::write(&hidden_file, [0u8, 159, 146, 150]).expect("write binary file");
1608
1609 let index = SearchIndex::build(&project);
1610 let files = index.glob("**/*.bin", &project);
1611
1612 assert_eq!(
1613 files,
1614 vec![fs::canonicalize(hidden_file).expect("canonicalize binary file")]
1615 );
1616 }
1617}