1use std::collections::{HashMap, HashSet};
27use std::path::{Path, PathBuf};
28use std::sync::{Arc, Mutex, OnceLock};
29use std::time::{Duration, Instant};
30
31use ignore::WalkBuilder;
32
33use crate::tools::ctx_search::{is_binary_ext, is_generated_file, MAX_FILE_SIZE, MAX_WALK_DEPTH};
34
35const TTL: Duration = Duration::from_secs(15);
38
39const MAX_FILES: usize = 200_000;
41
42const MAX_POSTING_ENTRIES: usize = 12_000_000;
47
48const MAX_TOTAL_ENTRIES: usize = 48_000_000;
52
53const BLOOM_BITS_PER_ITEM: usize = 12;
57const BLOOM_K: usize = 7;
58const BLOOM_MIN_BITS: usize = 64;
60const BLOOM_MAX_BITS: usize = 1 << 20;
61
62fn is_word_byte(b: u8) -> bool {
64 b.is_ascii_alphanumeric() || b == b'_'
65}
66
67fn pack(b0: u8, b1: u8, b2: u8) -> u32 {
68 (u32::from(b0) << 16) | (u32::from(b1) << 8) | u32::from(b2)
69}
70
71enum Narrowing {
80 Postings(HashMap<u32, Vec<u32>>),
81 Blooms(Vec<FileBloom>),
82}
83
84struct FileBloom {
87 bits: Vec<u64>,
89}
90
91#[inline]
94fn mix64(mut x: u64) -> u64 {
95 x = x.wrapping_add(0x9E37_79B9_7F4A_7C15);
96 x = (x ^ (x >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
97 x = (x ^ (x >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
98 x ^ (x >> 31)
99}
100
101impl FileBloom {
102 fn with_capacity(distinct_trigrams: usize) -> Self {
103 let target = distinct_trigrams
104 .saturating_mul(BLOOM_BITS_PER_ITEM)
105 .next_power_of_two()
106 .clamp(BLOOM_MIN_BITS, BLOOM_MAX_BITS);
107 FileBloom {
108 bits: vec![0u64; target / 64],
109 }
110 }
111
112 #[inline]
113 fn m_bits(&self) -> usize {
114 self.bits.len() * 64
115 }
116
117 #[inline]
119 fn probes(&self, trigram: u32) -> impl Iterator<Item = usize> + '_ {
120 let m = self.m_bits();
121 let mask = m - 1; let h = mix64(u64::from(trigram));
123 let h1 = (h & 0xFFFF_FFFF) as usize;
124 let h2 = ((h >> 32) as usize) | 1; (0..BLOOM_K).map(move |i| h1.wrapping_add(i.wrapping_mul(h2)) & mask)
126 }
127
128 fn insert(&mut self, trigram: u32) {
129 for p in self.probes(trigram).collect::<Vec<_>>() {
130 self.bits[p / 64] |= 1u64 << (p % 64);
131 }
132 }
133
134 fn maybe_contains(&self, trigram: u32) -> bool {
135 self.probes(trigram)
136 .all(|p| self.bits[p / 64] & (1u64 << (p % 64)) != 0)
137 }
138}
139
140pub struct SearchIndex {
142 files: Vec<PathBuf>,
143 narrowing: Narrowing,
145 respect_gitignore: bool,
146 allow_secret_paths: bool,
147 built_at: Instant,
148}
149
150impl SearchIndex {
151 pub fn build(root: &str, respect_gitignore: bool, allow_secret_paths: bool) -> Option<Self> {
154 let root_path = Path::new(root);
155 if !root_path.exists() {
156 return None;
157 }
158
159 let walker = WalkBuilder::new(root_path)
160 .hidden(true)
161 .max_depth(Some(MAX_WALK_DEPTH))
162 .git_ignore(respect_gitignore)
163 .git_global(respect_gitignore)
164 .git_exclude(respect_gitignore)
165 .build();
166
167 let mut files: Vec<PathBuf> = Vec::new();
168 let mut per_file_trigrams: Vec<Vec<u32>> = Vec::new();
172 let mut total_entries: usize = 0;
173 let mut scratch: HashSet<u32> = HashSet::new();
174
175 for entry in walker.filter_map(std::result::Result::ok) {
176 if entry.file_type().is_none_or(|ft| ft.is_dir()) {
177 continue;
178 }
179 if entry.file_type().is_some_and(|ft| ft.is_symlink()) {
180 continue;
181 }
182 let path = entry.path();
183 if is_binary_ext(path) || is_generated_file(path) {
184 continue;
185 }
186 if !allow_secret_paths && crate::core::io_boundary::is_secret_like(path).is_some() {
187 continue;
188 }
189 let state = match std::fs::metadata(path) {
194 Ok(meta) if !meta.file_type().is_file() => continue,
195 Ok(meta) if meta.len() > MAX_FILE_SIZE => continue,
196 Ok(meta) => crate::core::content_cache::FileState::from_metadata(&meta),
197 Err(_) => continue,
198 };
199 let content: std::sync::Arc<str> = if let Some(cached) =
205 state.and_then(|s| crate::core::content_cache::get(path, s))
206 {
207 cached
208 } else {
209 let Ok(text) = std::fs::read_to_string(path) else {
210 continue;
211 };
212 let arc: std::sync::Arc<str> = std::sync::Arc::from(text);
213 if let Some(s) = state {
214 crate::core::content_cache::insert(path, s, std::sync::Arc::clone(&arc));
215 }
216 arc
217 };
218
219 if files.len() >= MAX_FILES {
220 return None; }
222
223 scratch.clear();
224 let bytes = content.as_bytes();
225 if bytes.len() >= 3 {
226 for w in bytes.windows(3) {
227 if is_word_byte(w[0]) && is_word_byte(w[1]) && is_word_byte(w[2]) {
228 scratch.insert(pack(w[0], w[1], w[2]));
229 }
230 }
231 }
232 total_entries += scratch.len();
233 if total_entries > MAX_TOTAL_ENTRIES {
234 return None; }
236 let mut tris: Vec<u32> = scratch.iter().copied().collect();
237 tris.sort_unstable();
238 files.push(path.to_path_buf());
239 per_file_trigrams.push(tris);
240 }
241
242 let narrowing = build_narrowing(&per_file_trigrams, total_entries);
243
244 Some(Self {
245 files,
246 narrowing,
247 respect_gitignore,
248 allow_secret_paths,
249 built_at: Instant::now(),
250 })
251 }
252
253 fn is_fresh(&self) -> bool {
254 self.built_at.elapsed() < TTL
255 }
256
257 fn config_matches(&self, respect_gitignore: bool, allow_secret_paths: bool) -> bool {
258 self.respect_gitignore == respect_gitignore && self.allow_secret_paths == allow_secret_paths
259 }
260
261 pub fn candidate_paths(&self, pattern: &str, ext: Option<&str>) -> CandidateSet {
270 if let Some(ids) = self.literal_candidates(pattern) {
271 let paths = ids
272 .into_iter()
273 .map(|id| self.files[id as usize].clone())
274 .filter(|p| ext_matches(p, ext))
275 .collect();
276 CandidateSet::Narrowed(paths)
277 } else {
278 let paths = self
279 .files
280 .iter()
281 .filter(|p| ext_matches(p, ext))
282 .cloned()
283 .collect();
284 CandidateSet::FullList(paths)
285 }
286 }
287
288 fn literal_candidates(&self, pattern: &str) -> Option<Vec<u32>> {
292 let bytes = pattern.as_bytes();
293 if bytes.len() < 3 || !bytes.iter().all(|&b| is_word_byte(b)) {
294 return None;
295 }
296 let mut tris: Vec<u32> = bytes.windows(3).map(|w| pack(w[0], w[1], w[2])).collect();
298 tris.sort_unstable();
299 tris.dedup();
300
301 match &self.narrowing {
302 Narrowing::Postings(trigrams) => Some(Self::postings_intersect(trigrams, &tris)),
303 Narrowing::Blooms(blooms) => Some(Self::bloom_scan(blooms, &tris)),
304 }
305 }
306
307 fn postings_intersect(trigrams: &HashMap<u32, Vec<u32>>, tris: &[u32]) -> Vec<u32> {
310 let mut lists: Vec<&Vec<u32>> = Vec::with_capacity(tris.len());
311 for &tri in tris {
312 match trigrams.get(&tri) {
313 None => return Vec::new(),
315 Some(list) => lists.push(list),
316 }
317 }
318 lists.sort_by_key(|l| l.len());
319
320 let mut acc: Vec<u32> = lists[0].clone();
321 for list in &lists[1..] {
322 acc = intersect_sorted(&acc, list);
323 if acc.is_empty() {
324 break;
325 }
326 }
327 acc
328 }
329
330 fn bloom_scan(blooms: &[FileBloom], tris: &[u32]) -> Vec<u32> {
334 let mut out = Vec::new();
335 for (fid, bloom) in blooms.iter().enumerate() {
336 if tris.iter().all(|&t| bloom.maybe_contains(t)) {
337 out.push(fid as u32);
338 }
339 }
340 out
341 }
342}
343
344fn build_narrowing(per_file: &[Vec<u32>], total_entries: usize) -> Narrowing {
346 if total_entries <= MAX_POSTING_ENTRIES {
347 let mut trigrams: HashMap<u32, Vec<u32>> = HashMap::new();
348 for (fid, tris) in per_file.iter().enumerate() {
349 for &t in tris {
350 trigrams.entry(t).or_default().push(fid as u32);
352 }
353 }
354 Narrowing::Postings(trigrams)
355 } else {
356 let blooms = per_file
357 .iter()
358 .map(|tris| {
359 let mut b = FileBloom::with_capacity(tris.len());
360 for &t in tris {
361 b.insert(t);
362 }
363 b
364 })
365 .collect();
366 Narrowing::Blooms(blooms)
367 }
368}
369
370pub enum CandidateSet {
372 Narrowed(Vec<PathBuf>),
374 FullList(Vec<PathBuf>),
376}
377
378impl CandidateSet {
379 pub fn into_paths(self) -> Vec<PathBuf> {
380 match self {
381 CandidateSet::Narrowed(p) | CandidateSet::FullList(p) => p,
382 }
383 }
384}
385
386fn ext_matches(path: &Path, ext: Option<&str>) -> bool {
387 match ext {
388 None => true,
389 Some(want) => path.extension().and_then(|e| e.to_str()) == Some(want),
390 }
391}
392
393fn intersect_sorted(a: &[u32], b: &[u32]) -> Vec<u32> {
395 let mut out = Vec::new();
396 let (mut i, mut j) = (0, 0);
397 while i < a.len() && j < b.len() {
398 match a[i].cmp(&b[j]) {
399 std::cmp::Ordering::Less => i += 1,
400 std::cmp::Ordering::Greater => j += 1,
401 std::cmp::Ordering::Equal => {
402 out.push(a[i]);
403 i += 1;
404 j += 1;
405 }
406 }
407 }
408 out
409}
410
411struct CacheEntry {
416 index: Option<Arc<SearchIndex>>,
417 building: bool,
418}
419
420static CACHE: OnceLock<Mutex<HashMap<String, CacheEntry>>> = OnceLock::new();
421
422fn cache() -> &'static Mutex<HashMap<String, CacheEntry>> {
423 CACHE.get_or_init(|| Mutex::new(HashMap::new()))
424}
425
426fn index_disabled() -> bool {
429 std::env::var("LEAN_CTX_DISABLE_SEARCH_INDEX")
430 .is_ok_and(|v| v == "1" || v.eq_ignore_ascii_case("true"))
431}
432
433pub fn get_fresh(
437 root: &str,
438 respect_gitignore: bool,
439 allow_secret_paths: bool,
440) -> Option<Arc<SearchIndex>> {
441 if !respect_gitignore || index_disabled() {
443 return None;
444 }
445
446 let mut needs_build = false;
447 let result = {
448 let mut map = cache()
449 .lock()
450 .unwrap_or_else(std::sync::PoisonError::into_inner);
451 let entry = map.entry(root.to_string()).or_insert(CacheEntry {
452 index: None,
453 building: false,
454 });
455 match &entry.index {
456 Some(idx)
457 if idx.config_matches(respect_gitignore, allow_secret_paths) && idx.is_fresh() =>
458 {
459 Some(Arc::clone(idx))
460 }
461 Some(idx) if idx.config_matches(respect_gitignore, allow_secret_paths) => {
462 needs_build = !entry.building;
464 if needs_build {
465 entry.building = true;
466 }
467 Some(Arc::clone(idx))
468 }
469 _ => {
470 needs_build = !entry.building;
471 if needs_build {
472 entry.building = true;
473 }
474 None
475 }
476 }
477 };
478
479 if needs_build {
480 spawn_build(root.to_string(), respect_gitignore, allow_secret_paths);
481 }
482 result
483}
484
485pub fn ensure_background(root: &str, respect_gitignore: bool, allow_secret_paths: bool) {
488 if !respect_gitignore || index_disabled() {
489 return;
490 }
491 let needs_build = {
492 let mut map = cache()
493 .lock()
494 .unwrap_or_else(std::sync::PoisonError::into_inner);
495 let entry = map.entry(root.to_string()).or_insert(CacheEntry {
496 index: None,
497 building: false,
498 });
499 let fresh = entry.index.as_ref().is_some_and(|idx| {
500 idx.config_matches(respect_gitignore, allow_secret_paths) && idx.is_fresh()
501 });
502 if fresh || entry.building {
503 false
504 } else {
505 entry.building = true;
506 true
507 }
508 };
509 if needs_build {
510 spawn_build(root.to_string(), respect_gitignore, allow_secret_paths);
511 }
512}
513
514pub fn warm_blocking(root: &str, respect_gitignore: bool, allow_secret_paths: bool) -> bool {
518 if !respect_gitignore || index_disabled() {
519 return false;
520 }
521 let Some(idx) = SearchIndex::build(root, respect_gitignore, allow_secret_paths) else {
522 return false;
523 };
524 let mut map = cache()
525 .lock()
526 .unwrap_or_else(std::sync::PoisonError::into_inner);
527 map.insert(
528 root.to_string(),
529 CacheEntry {
530 index: Some(Arc::new(idx)),
531 building: false,
532 },
533 );
534 true
535}
536
537fn spawn_build(root: String, respect_gitignore: bool, allow_secret_paths: bool) {
538 std::thread::spawn(move || {
539 let built = std::panic::catch_unwind(|| {
540 SearchIndex::build(&root, respect_gitignore, allow_secret_paths)
541 })
542 .ok()
543 .flatten();
544
545 let mut map = cache()
546 .lock()
547 .unwrap_or_else(std::sync::PoisonError::into_inner);
548 if let Some(entry) = map.get_mut(&root) {
549 entry.building = false;
550 if let Some(idx) = built {
551 entry.index = Some(Arc::new(idx));
552 }
553 }
554 });
555}
556
557#[cfg(test)]
558mod tests {
559 use super::*;
560
561 fn corpus() -> tempfile::TempDir {
562 let dir = tempfile::tempdir().unwrap();
563 std::fs::write(dir.path().join("a.rs"), "fn handler() {}\nlet x = 1;\n").unwrap();
564 std::fs::write(dir.path().join("b.rs"), "fn other() {}\n// nothing here\n").unwrap();
565 std::fs::write(dir.path().join("c.txt"), "handler appears in text too\n").unwrap();
566 dir
567 }
568
569 #[test]
570 fn narrows_to_files_containing_literal() {
571 let dir = corpus();
572 let idx = SearchIndex::build(dir.path().to_str().unwrap(), true, false).unwrap();
573 let cands = idx.candidate_paths("handler", None);
574 let paths = cands.into_paths();
575 assert!(paths.iter().any(|p| p.ends_with("a.rs")));
577 assert!(paths.iter().any(|p| p.ends_with("c.txt")));
578 assert!(!paths.iter().any(|p| p.ends_with("b.rs")));
579 }
580
581 #[test]
582 fn absent_trigram_yields_empty_candidates() {
583 let dir = corpus();
584 let idx = SearchIndex::build(dir.path().to_str().unwrap(), true, false).unwrap();
585 match idx.candidate_paths("zzzqqq", None) {
586 CandidateSet::Narrowed(p) => assert!(p.is_empty()),
587 CandidateSet::FullList(_) => panic!("pure literal should narrow"),
588 }
589 }
590
591 #[test]
592 fn ext_filter_restricts_candidates() {
593 let dir = corpus();
594 let idx = SearchIndex::build(dir.path().to_str().unwrap(), true, false).unwrap();
595 let paths = idx.candidate_paths("handler", Some("rs")).into_paths();
596 assert!(paths.iter().all(|p| p.extension().unwrap() == "rs"));
597 assert!(paths.iter().any(|p| p.ends_with("a.rs")));
598 }
599
600 #[test]
601 #[cfg(unix)]
602 fn build_skips_named_pipe_without_hanging() {
603 use std::sync::mpsc;
604 use std::time::Duration;
605 let dir = corpus();
609 let fifo = dir.path().join("pipe.fifo");
610 let c = std::ffi::CString::new(fifo.to_string_lossy().as_bytes()).unwrap();
611 assert_eq!(
612 unsafe { libc::mkfifo(c.as_ptr(), 0o644) },
615 0,
616 "mkfifo failed"
617 );
618
619 let root = dir.path().to_str().unwrap().to_string();
620 let (tx, rx) = mpsc::channel();
621 std::thread::spawn(move || {
622 let built = SearchIndex::build(&root, true, false);
623 let _ = tx.send(built.map(|idx| idx.candidate_paths("handler", None).into_paths()));
624 });
625 let paths = rx
626 .recv_timeout(Duration::from_secs(5))
627 .expect("SearchIndex::build hung on a FIFO (#336 regression)")
628 .expect("index should build");
629 assert!(paths.iter().any(|p| p.ends_with("a.rs")));
630 assert!(!paths.iter().any(|p| p.ends_with("pipe.fifo")));
631 }
632
633 #[test]
634 fn regex_query_falls_back_to_full_list() {
635 let dir = corpus();
636 let idx = SearchIndex::build(dir.path().to_str().unwrap(), true, false).unwrap();
637 match idx.candidate_paths("fn .*\\(\\)", None) {
638 CandidateSet::FullList(p) => assert!(!p.is_empty()),
639 CandidateSet::Narrowed(_) => panic!("metachar query must not narrow"),
640 }
641 }
642
643 #[test]
644 fn short_query_falls_back_to_full_list() {
645 let dir = corpus();
646 let idx = SearchIndex::build(dir.path().to_str().unwrap(), true, false).unwrap();
647 assert!(matches!(
648 idx.candidate_paths("fn", None),
649 CandidateSet::FullList(_)
650 ));
651 }
652
653 #[test]
657 fn narrowing_has_identical_recall_to_full_scan() {
658 use regex::Regex;
659 use std::collections::BTreeSet;
660
661 let dir = tempfile::tempdir().unwrap();
662 let samples = [
664 (
665 "auth/login.rs",
666 "fn authenticate(user) {}\nlet token = mint();\n",
667 ),
668 (
669 "auth/session.rs",
670 "struct Session;\n// authenticate again here\n",
671 ),
672 ("db/pool.rs", "fn connect() {}\nlet retries = 3;\n"),
673 (
674 "ui/button.tsx",
675 "export const Button = () => authenticate;\n",
676 ),
677 ("readme.md", "This project uses authenticate flows.\n"),
678 ("unrelated.rs", "fn helper() { let v = 1; }\n"),
679 ];
680 for (rel, content) in samples {
681 let p = dir.path().join(rel);
682 std::fs::create_dir_all(p.parent().unwrap()).unwrap();
683 std::fs::write(p, content).unwrap();
684 }
685 let idx = SearchIndex::build(dir.path().to_str().unwrap(), true, false).unwrap();
686
687 let full_scan = |pat: &str| -> BTreeSet<String> {
688 let re = Regex::new(pat).unwrap();
689 let mut hits = BTreeSet::new();
690 for (rel, content) in samples {
691 for (i, line) in content.lines().enumerate() {
692 if re.is_match(line) {
693 hits.insert(format!("{rel}:{}", i + 1));
694 }
695 }
696 }
697 hits
698 };
699
700 for query in ["authenticate", "Session", "retries", "token"] {
701 let re = Regex::new(query).unwrap();
702 let candidates = idx.candidate_paths(query, None).into_paths();
703 let mut narrowed = BTreeSet::new();
704 for path in &candidates {
705 let content = std::fs::read_to_string(path).unwrap();
706 let rel = path.strip_prefix(dir.path()).unwrap().to_string_lossy();
707 for (i, line) in content.lines().enumerate() {
708 if re.is_match(line) {
709 narrowed.insert(format!("{}:{}", rel.replace('\\', "/"), i + 1));
710 }
711 }
712 }
713 assert_eq!(
714 narrowed,
715 full_scan(query),
716 "recall mismatch for query {query:?}"
717 );
718 }
719 }
720
721 #[test]
722 fn intersect_sorted_basic() {
723 assert_eq!(
724 intersect_sorted(&[1, 2, 3, 5], &[2, 3, 4, 5]),
725 vec![2, 3, 5]
726 );
727 assert_eq!(intersect_sorted(&[1, 2], &[3, 4]), Vec::<u32>::new());
728 }
729
730 fn trigrams_of(s: &str) -> Vec<u32> {
733 let mut set = HashSet::new();
734 let b = s.as_bytes();
735 if b.len() >= 3 {
736 for w in b.windows(3) {
737 if is_word_byte(w[0]) && is_word_byte(w[1]) && is_word_byte(w[2]) {
738 set.insert(pack(w[0], w[1], w[2]));
739 }
740 }
741 }
742 let mut v: Vec<u32> = set.into_iter().collect();
743 v.sort_unstable();
744 v
745 }
746
747 #[test]
748 fn file_bloom_has_no_false_negatives() {
749 let tris = trigrams_of("fn authenticate(user) { let token = mint(); }");
750 let mut bloom = FileBloom::with_capacity(tris.len());
751 for &t in &tris {
752 bloom.insert(t);
753 }
754 assert!(tris.iter().all(|&t| bloom.maybe_contains(t)));
756 }
757
758 #[test]
762 fn bloom_tier_is_superset_of_postings_tier() {
763 let mut seed = 0x1234_5678_9abc_def0u64;
765 let mut rng = || {
766 seed = seed
767 .wrapping_mul(6364136223846793005)
768 .wrapping_add(1442695040888963407);
769 (seed >> 33) as u32
770 };
771 let mut per_file: Vec<Vec<u32>> = Vec::new();
772 for _ in 0..80 {
773 let n = 50 + (rng() % 250) as usize;
774 let mut s = HashSet::new();
775 for _ in 0..n {
776 s.insert(rng() & 0x00FF_FFFF);
777 }
778 let mut v: Vec<u32> = s.into_iter().collect();
779 v.sort_unstable();
780 per_file.push(v);
781 }
782 let total: usize = per_file.iter().map(Vec::len).sum();
783
784 let postings = build_narrowing(&per_file, total); let blooms = build_narrowing(&per_file, MAX_POSTING_ENTRIES + 1); let (Narrowing::Postings(pt), Narrowing::Blooms(bl)) = (&postings, &blooms) else {
787 panic!("unexpected narrowing tiers");
788 };
789
790 for f in &per_file {
793 if f.len() < 3 {
794 continue;
795 }
796 let q = vec![f[0], f[f.len() / 2], f[f.len() - 1]];
797 let exact = SearchIndex::postings_intersect(pt, &q);
798 let bloom: HashSet<u32> = SearchIndex::bloom_scan(bl, &q).into_iter().collect();
799 for id in exact {
800 assert!(
801 bloom.contains(&id),
802 "Bloom tier dropped a true match (false negative) for {q:?}"
803 );
804 }
805 }
806 }
807
808 #[test]
811 fn bloom_tier_end_to_end_recall() {
812 let samples = [
813 (
814 "auth_login.rs",
815 "fn authenticate(user) {}\nlet token = mint();\n",
816 ),
817 (
818 "auth_session.rs",
819 "struct Session;\n// authenticate again here\n",
820 ),
821 ("db_pool.rs", "fn connect() {}\nlet retries = 3;\n"),
822 (
823 "ui_button.tsx",
824 "export const Button = () => authenticate;\n",
825 ),
826 ("readme.md", "This project uses authenticate flows.\n"),
827 ("unrelated.rs", "fn helper() { let v = 1; }\n"),
828 ];
829 let files: Vec<PathBuf> = samples.iter().map(|(rel, _)| PathBuf::from(rel)).collect();
830 let per_file: Vec<Vec<u32>> = samples.iter().map(|(_, c)| trigrams_of(c)).collect();
831
832 let idx = SearchIndex {
833 files,
834 narrowing: build_narrowing(&per_file, MAX_POSTING_ENTRIES + 1),
835 respect_gitignore: true,
836 allow_secret_paths: false,
837 built_at: Instant::now(),
838 };
839 assert!(
840 matches!(idx.narrowing, Narrowing::Blooms(_)),
841 "test must exercise the Bloom tier"
842 );
843
844 for query in ["authenticate", "Session", "retries", "token"] {
845 let cands: HashSet<String> = idx
846 .candidate_paths(query, None)
847 .into_paths()
848 .iter()
849 .map(|p| p.to_string_lossy().to_string())
850 .collect();
851 for (rel, content) in samples {
852 if content.contains(query) {
853 assert!(
854 cands.contains(rel),
855 "Bloom tier dropped real match {rel} for query {query:?}"
856 );
857 }
858 }
859 }
860 }
861}