1#![forbid(unsafe_code)]
2
3use std::cmp::Ordering;
47use std::fmt;
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
55pub enum MatchType {
56 NoMatch,
58 Fuzzy,
60 Substring,
62 WordStart,
64 Prefix,
66 Exact,
68}
69
70impl MatchType {
71 pub fn prior_odds(self) -> f64 {
78 match self {
79 Self::Exact => 99.0, Self::Prefix => 9.0, Self::WordStart => 4.0, Self::Substring => 2.0, Self::Fuzzy => 0.333, Self::NoMatch => 0.0, }
86 }
87
88 pub fn description(self) -> &'static str {
90 match self {
91 Self::Exact => "exact match",
92 Self::Prefix => "prefix match",
93 Self::WordStart => "word-start match",
94 Self::Substring => "contiguous substring",
95 Self::Fuzzy => "fuzzy match",
96 Self::NoMatch => "no match",
97 }
98 }
99}
100
101fn compare_ranked_match_results(
102 left: &(usize, MatchResult),
103 right: &(usize, MatchResult),
104) -> Ordering {
105 right
106 .1
107 .score
108 .partial_cmp(&left.1.score)
109 .unwrap_or(Ordering::Equal)
110 .then_with(|| right.1.match_type.cmp(&left.1.match_type))
111}
112
113#[derive(Debug, Clone)]
119pub struct EvidenceEntry {
120 pub kind: EvidenceKind,
122 pub bayes_factor: f64,
125 pub description: EvidenceDescription,
127}
128
129#[derive(Debug, Clone, Copy, PartialEq, Eq)]
131pub enum EvidenceKind {
132 MatchType,
134 WordBoundary,
136 Position,
138 GapPenalty,
140 TagMatch,
142 TitleLength,
144}
145
146impl fmt::Display for EvidenceEntry {
147 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148 let direction = if self.bayes_factor > 1.0 {
149 "supports"
150 } else if self.bayes_factor < 1.0 {
151 "opposes"
152 } else {
153 "neutral"
154 };
155 write!(
156 f,
157 "{:?}: BF={:.2} ({}) - {}",
158 self.kind, self.bayes_factor, direction, self.description
159 )
160 }
161}
162
163#[derive(Debug, Clone)]
165pub enum EvidenceDescription {
166 Static(&'static str),
167 TitleLengthChars { len: usize },
168 FirstMatchPos { pos: usize },
169 WordBoundaryCount { count: usize },
170 GapTotal { total: usize },
171 CoveragePercent { percent: f64 },
172}
173
174impl fmt::Display for EvidenceDescription {
175 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176 match self {
177 Self::Static(msg) => write!(f, "{msg}"),
178 Self::TitleLengthChars { len } => write!(f, "title length {} chars", len),
179 Self::FirstMatchPos { pos } => write!(f, "first match at position {}", pos),
180 Self::WordBoundaryCount { count } => write!(f, "{} word boundary matches", count),
181 Self::GapTotal { total } => write!(f, "total gap of {} characters", total),
182 Self::CoveragePercent { percent } => write!(f, "query covers {:.0}% of title", percent),
183 }
184 }
185}
186
187#[derive(Debug, Clone, Default)]
196pub struct EvidenceLedger {
197 entries: Vec<EvidenceEntry>,
198}
199
200impl EvidenceLedger {
201 pub fn new() -> Self {
203 Self::default()
204 }
205
206 pub fn add(&mut self, kind: EvidenceKind, bayes_factor: f64, description: EvidenceDescription) {
208 self.entries.push(EvidenceEntry {
209 kind,
210 bayes_factor,
211 description,
212 });
213 }
214
215 pub fn entries(&self) -> &[EvidenceEntry] {
217 &self.entries
218 }
219
220 pub fn combined_bayes_factor(&self) -> f64 {
222 self.entries.iter().map(|e| e.bayes_factor).product()
223 }
224
225 #[must_use = "use the prior odds (if any) for diagnostics"]
227 pub fn prior_odds(&self) -> Option<f64> {
228 self.entries
229 .iter()
230 .find(|e| e.kind == EvidenceKind::MatchType)
231 .map(|e| e.bayes_factor)
232 }
233
234 pub fn posterior_probability(&self) -> f64 {
239 let prior = self.prior_odds().unwrap_or(1.0);
240 let bf: f64 = self
242 .entries
243 .iter()
244 .filter(|e| e.kind != EvidenceKind::MatchType)
245 .map(|e| e.bayes_factor)
246 .product();
247
248 let posterior_odds = prior * bf;
249 if posterior_odds.is_infinite() {
250 1.0
251 } else {
252 posterior_odds / (1.0 + posterior_odds)
253 }
254 }
255}
256
257impl EvidenceLedger {
258 #[must_use]
260 pub fn to_jsonl(&self) -> String {
261 let entries_json: Vec<String> = self
262 .entries
263 .iter()
264 .map(|e| {
265 format!(
266 r#"{{"kind":"{:?}","bf":{:.4},"desc":"{}"}}"#,
267 e.kind, e.bayes_factor, e.description
268 )
269 })
270 .collect();
271 format!(
272 r#"{{"schema":"palette-scoring-v1","entries":[{}],"combined_bf":{:.6},"posterior_prob":{:.6}}}"#,
273 entries_json.join(","),
274 self.combined_bayes_factor(),
275 self.posterior_probability(),
276 )
277 }
278}
279
280impl fmt::Display for EvidenceLedger {
281 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282 writeln!(f, "Evidence Ledger:")?;
283 for entry in &self.entries {
284 writeln!(f, " {}", entry)?;
285 }
286 writeln!(f, " Combined BF: {:.3}", self.combined_bayes_factor())?;
287 writeln!(f, " Posterior P: {:.3}", self.posterior_probability())?;
288 Ok(())
289 }
290}
291
292#[derive(Debug, Clone)]
298pub struct MatchResult {
299 pub score: f64,
301 pub match_type: MatchType,
303 pub match_positions: Vec<usize>,
305 pub evidence: EvidenceLedger,
307}
308
309impl MatchResult {
310 pub fn no_match() -> Self {
312 let mut evidence = EvidenceLedger::new();
313 evidence.add(
314 EvidenceKind::MatchType,
315 0.0,
316 EvidenceDescription::Static("no matching characters found"),
317 );
318 Self {
319 score: 0.0,
320 match_type: MatchType::NoMatch,
321 match_positions: Vec::new(),
322 evidence,
323 }
324 }
325}
326
327#[derive(Debug, Clone, Default)]
336pub struct BayesianScorer {
337 pub track_evidence: bool,
339}
340
341impl BayesianScorer {
342 pub fn new() -> Self {
344 Self {
345 track_evidence: true,
346 }
347 }
348
349 pub fn fast() -> Self {
351 Self {
352 track_evidence: false,
353 }
354 }
355
356 pub fn score(&self, query: &str, title: &str) -> MatchResult {
360 if query.len() > title.len() {
362 return MatchResult::no_match();
363 }
364
365 if query.is_empty() {
367 return self.score_empty_query(title);
368 }
369
370 let query_lower = query.to_lowercase();
372 let title_lower = title.to_lowercase();
373
374 let (match_type, positions) = self.detect_match_type(&query_lower, &title_lower, title);
376
377 if match_type == MatchType::NoMatch {
378 return MatchResult::no_match();
379 }
380
381 self.compute_score(match_type, positions, &query_lower, title)
383 }
384
385 pub fn score_with_query_lower(
389 &self,
390 query: &str,
391 query_lower: &str,
392 title: &str,
393 ) -> MatchResult {
394 let title_lower = title.to_lowercase();
395 self.score_with_lowered_title(query, query_lower, title, &title_lower)
396 }
397
398 pub fn score_with_lowered_title(
402 &self,
403 query: &str,
404 query_lower: &str,
405 title: &str,
406 title_lower: &str,
407 ) -> MatchResult {
408 self.score_with_lowered_title_and_words(query, query_lower, title, title_lower, None)
409 }
410
411 pub fn score_with_lowered_title_and_words(
413 &self,
414 query: &str,
415 query_lower: &str,
416 title: &str,
417 title_lower: &str,
418 word_starts: Option<&[usize]>,
419 ) -> MatchResult {
420 if query.len() > title.len() {
422 return MatchResult::no_match();
423 }
424
425 if query.is_empty() {
427 return self.score_empty_query(title);
428 }
429
430 let (match_type, positions) =
432 self.detect_match_type_with_words(query_lower, title_lower, title, word_starts);
433
434 if match_type == MatchType::NoMatch {
435 return MatchResult::no_match();
436 }
437
438 self.compute_score(match_type, positions, query_lower, title)
440 }
441
442 pub fn score_with_tags(&self, query: &str, title: &str, tags: &[&str]) -> MatchResult {
444 let mut result = self.score(query, title);
445
446 let query_lower = query.to_lowercase();
448 let tag_match = tags
449 .iter()
450 .any(|tag| crate::contains_ignore_case(tag, &query_lower));
451
452 if tag_match && result.match_type != MatchType::NoMatch {
453 if self.track_evidence {
455 result.evidence.add(
456 EvidenceKind::TagMatch,
457 3.0, EvidenceDescription::Static("query matches tag"),
459 );
460 result.score = result.evidence.posterior_probability();
461 } else if (0.0..1.0).contains(&result.score) {
462 let odds = result.score / (1.0 - result.score);
463 let boosted = odds * 3.0;
464 result.score = boosted / (1.0 + boosted);
465 }
466 }
467
468 result
469 }
470
471 fn score_empty_query(&self, title: &str) -> MatchResult {
473 let length_factor = 1.0 + (1.0 / (title.len() as f64 + 1.0)) * 0.1;
475 if self.track_evidence {
476 let mut evidence = EvidenceLedger::new();
477 evidence.add(
478 EvidenceKind::MatchType,
479 1.0, EvidenceDescription::Static("empty query matches all"),
481 );
482 evidence.add(
483 EvidenceKind::TitleLength,
484 length_factor,
485 EvidenceDescription::TitleLengthChars { len: title.len() },
486 );
487 let score = evidence.posterior_probability();
488 MatchResult {
489 score,
490 match_type: MatchType::Fuzzy, match_positions: Vec::new(),
492 evidence,
493 }
494 } else {
495 let odds = length_factor;
496 let score = odds / (1.0 + odds);
497 MatchResult {
498 score,
499 match_type: MatchType::Fuzzy,
500 match_positions: Vec::new(),
501 evidence: EvidenceLedger::new(),
502 }
503 }
504 }
505
506 fn detect_match_type(
508 &self,
509 query_lower: &str,
510 title_lower: &str,
511 title: &str,
512 ) -> (MatchType, Vec<usize>) {
513 self.detect_match_type_with_words(query_lower, title_lower, title, None)
514 }
515
516 fn detect_match_type_with_words(
518 &self,
519 query_lower: &str,
520 title_lower: &str,
521 title: &str,
522 word_starts: Option<&[usize]>,
523 ) -> (MatchType, Vec<usize>) {
524 if query_lower.is_ascii() && title_lower.is_ascii() {
525 return self.detect_match_type_ascii(query_lower, title_lower, word_starts);
526 }
527
528 if query_lower == title_lower {
530 let positions: Vec<usize> = (0..title.chars().count()).collect();
531 return (MatchType::Exact, positions);
532 }
533
534 if title_lower.starts_with(query_lower) {
536 let positions: Vec<usize> = (0..query_lower.chars().count()).collect();
537 return (MatchType::Prefix, positions);
538 }
539
540 if let Some(positions) = self.word_start_match(query_lower, title_lower) {
542 return (MatchType::WordStart, positions);
543 }
544
545 if let Some(byte_start) = title_lower.find(query_lower) {
547 let char_start = title_lower[..byte_start].chars().count();
548 let char_len = query_lower.chars().count();
549 let positions: Vec<usize> = (char_start..char_start + char_len).collect();
550 return (MatchType::Substring, positions);
551 }
552
553 if let Some(positions) = self.fuzzy_match(query_lower, title_lower) {
555 return (MatchType::Fuzzy, positions);
556 }
557
558 (MatchType::NoMatch, Vec::new())
559 }
560
561 fn detect_match_type_ascii(
563 &self,
564 query_lower: &str,
565 title_lower: &str,
566 word_starts: Option<&[usize]>,
567 ) -> (MatchType, Vec<usize>) {
568 let query_bytes = query_lower.as_bytes();
569 let title_bytes = title_lower.as_bytes();
570
571 if query_bytes == title_bytes {
572 let positions: Vec<usize> = (0..title_bytes.len()).collect();
573 return (MatchType::Exact, positions);
574 }
575
576 if title_bytes.starts_with(query_bytes) {
577 let positions: Vec<usize> = (0..query_bytes.len()).collect();
578 return (MatchType::Prefix, positions);
579 }
580
581 if let Some(positions) = self.word_start_match_ascii(query_bytes, title_bytes, word_starts)
582 {
583 return (MatchType::WordStart, positions);
584 }
585
586 if let Some(start) = title_lower.find(query_lower) {
587 let positions: Vec<usize> = (start..start + query_bytes.len()).collect();
588 return (MatchType::Substring, positions);
589 }
590
591 if let Some(positions) = self.fuzzy_match_ascii(query_bytes, title_bytes) {
592 return (MatchType::Fuzzy, positions);
593 }
594
595 (MatchType::NoMatch, Vec::new())
596 }
597
598 fn word_start_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
600 let mut positions = Vec::new();
601 let mut query_chars = query.chars().peekable();
602
603 let title_bytes = title.as_bytes();
604 for (char_index, (i, c)) in title.char_indices().enumerate() {
605 let is_word_start = i == 0 || {
607 let prev = title_bytes
608 .get(i.saturating_sub(1))
609 .copied()
610 .unwrap_or(b' ');
611 prev == b' ' || prev == b'-' || prev == b'_'
612 };
613
614 if is_word_start
615 && let Some(&qc) = query_chars.peek()
616 && c == qc
617 {
618 positions.push(char_index);
619 query_chars.next();
620 }
621 }
622
623 if query_chars.peek().is_none() {
624 Some(positions)
625 } else {
626 None
627 }
628 }
629
630 fn word_start_match_ascii(
632 &self,
633 query: &[u8],
634 title: &[u8],
635 word_starts: Option<&[usize]>,
636 ) -> Option<Vec<usize>> {
637 let mut positions = Vec::new();
638 let mut query_idx = 0;
639 if query.is_empty() {
640 return Some(positions);
641 }
642
643 if let Some(starts) = word_starts {
644 for &pos in starts {
645 if pos >= title.len() {
646 continue;
647 }
648 if title[pos] == query[query_idx] {
649 positions.push(pos);
650 query_idx += 1;
651 if query_idx == query.len() {
652 return Some(positions);
653 }
654 }
655 }
656 } else {
657 for (i, &b) in title.iter().enumerate() {
658 let is_word_start = i == 0 || matches!(title[i - 1], b' ' | b'-' | b'_');
659 if is_word_start && b == query[query_idx] {
660 positions.push(i);
661 query_idx += 1;
662 if query_idx == query.len() {
663 return Some(positions);
664 }
665 }
666 }
667 }
668
669 None
670 }
671
672 fn fuzzy_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
674 let mut positions = Vec::new();
675 let mut query_chars = query.chars().peekable();
676
677 for (char_index, c) in title.chars().enumerate() {
678 if let Some(&qc) = query_chars.peek()
679 && c == qc
680 {
681 positions.push(char_index);
682 query_chars.next();
683 }
684 }
685
686 if query_chars.peek().is_none() {
687 Some(positions)
688 } else {
689 None
690 }
691 }
692
693 fn fuzzy_match_ascii(&self, query: &[u8], title: &[u8]) -> Option<Vec<usize>> {
695 let mut positions = Vec::new();
696 let mut query_idx = 0;
697 if query.is_empty() {
698 return Some(positions);
699 }
700
701 for (i, &b) in title.iter().enumerate() {
702 if b == query[query_idx] {
703 positions.push(i);
704 query_idx += 1;
705 if query_idx == query.len() {
706 return Some(positions);
707 }
708 }
709 }
710
711 None
712 }
713
714 fn compute_score(
716 &self,
717 match_type: MatchType,
718 positions: Vec<usize>,
719 query: &str,
720 title: &str,
721 ) -> MatchResult {
722 let positions_ref = positions.as_slice();
723 if !self.track_evidence {
724 let mut combined_bf = match_type.prior_odds();
725
726 if let Some(&first_pos) = positions_ref.first() {
727 let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
728 combined_bf *= position_factor;
729 }
730
731 let word_boundary_count = self.count_word_boundaries(positions_ref, title);
732 if word_boundary_count > 0 {
733 let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
734 combined_bf *= boundary_factor;
735 }
736
737 if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
738 let total_gap = self.total_gap(positions_ref);
739 let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
740 combined_bf *= gap_factor;
741 }
742
743 let title_len = title.len().max(1);
744 let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
745 combined_bf *= length_factor;
746
747 let score = combined_bf / (1.0 + combined_bf);
748 return MatchResult {
749 score,
750 match_type,
751 match_positions: positions,
752 evidence: EvidenceLedger::new(),
753 };
754 }
755
756 let mut evidence = EvidenceLedger::new();
757
758 let prior_odds = match_type.prior_odds();
760 evidence.add(
761 EvidenceKind::MatchType,
762 prior_odds,
763 EvidenceDescription::Static(match_type.description()),
764 );
765
766 if let Some(&first_pos) = positions_ref.first() {
768 let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
769 evidence.add(
770 EvidenceKind::Position,
771 position_factor,
772 EvidenceDescription::FirstMatchPos { pos: first_pos },
773 );
774 }
775
776 let word_boundary_count = self.count_word_boundaries(positions_ref, title);
778 if word_boundary_count > 0 {
779 let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
780 evidence.add(
781 EvidenceKind::WordBoundary,
782 boundary_factor,
783 EvidenceDescription::WordBoundaryCount {
784 count: word_boundary_count,
785 },
786 );
787 }
788
789 if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
791 let total_gap = self.total_gap(positions_ref);
792 let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
793 evidence.add(
794 EvidenceKind::GapPenalty,
795 gap_factor,
796 EvidenceDescription::GapTotal { total: total_gap },
797 );
798 }
799
800 let title_len = title.len().max(1);
802 let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
803 evidence.add(
804 EvidenceKind::TitleLength,
805 length_factor,
806 EvidenceDescription::CoveragePercent {
807 percent: (query.len() as f64 / title_len as f64) * 100.0,
808 },
809 );
810
811 let score = evidence.posterior_probability();
812
813 MatchResult {
814 score,
815 match_type,
816 match_positions: positions,
817 evidence,
818 }
819 }
820
821 fn count_word_boundaries(&self, positions: &[usize], title: &str) -> usize {
823 let title_bytes = title.as_bytes();
824 let mut count = 0;
825 for &pos in positions {
826 let is_boundary = if pos == 0 {
827 true
828 } else {
829 let prev = title_bytes
830 .get(pos.saturating_sub(1))
831 .copied()
832 .unwrap_or(b' ');
833 prev == b' ' || prev == b'-' || prev == b'_'
834 };
835 if is_boundary {
836 count += 1;
837 }
838 }
839 count
840 }
841
842 fn total_gap(&self, positions: &[usize]) -> usize {
844 if positions.len() < 2 {
845 return 0;
846 }
847 let mut total = 0;
848 for window in positions.windows(2) {
849 total += window[1].saturating_sub(window[0]).saturating_sub(1);
850 }
851 total
852 }
853}
854
855#[derive(Debug, Clone)]
871pub struct RankConfidence {
872 pub confidence: f64,
876
877 pub gap_to_next: f64,
879
880 pub stability: RankStability,
882}
883
884#[derive(Debug, Clone, Copy, PartialEq, Eq)]
886pub enum RankStability {
887 Stable,
889 Marginal,
891 Unstable,
893}
894
895impl RankStability {
896 pub fn label(self) -> &'static str {
898 match self {
899 Self::Stable => "stable",
900 Self::Marginal => "marginal",
901 Self::Unstable => "unstable",
902 }
903 }
904}
905
906#[derive(Debug, Clone)]
908pub struct RankedResults {
909 pub items: Vec<RankedItem>,
911
912 pub summary: RankingSummary,
914}
915
916#[derive(Debug, Clone)]
918pub struct RankedItem {
919 pub original_index: usize,
921
922 pub result: MatchResult,
924
925 pub rank_confidence: RankConfidence,
927}
928
929#[derive(Debug, Clone)]
931pub struct RankingSummary {
932 pub count: usize,
934
935 pub stable_count: usize,
937
938 pub tie_group_count: usize,
940
941 pub median_gap: f64,
943}
944
945#[derive(Debug, Clone)]
973pub struct ConformalRanker {
974 pub tie_epsilon: f64,
976
977 pub stable_threshold: f64,
979
980 pub marginal_threshold: f64,
982}
983
984impl Default for ConformalRanker {
985 fn default() -> Self {
986 Self {
987 tie_epsilon: 1e-9,
988 stable_threshold: 0.7,
989 marginal_threshold: 0.3,
990 }
991 }
992}
993
994impl ConformalRanker {
995 pub fn new() -> Self {
997 Self::default()
998 }
999
1000 pub fn rank(&self, results: Vec<MatchResult>) -> RankedResults {
1005 let count = results.len();
1006
1007 if count == 0 {
1008 return RankedResults {
1009 items: Vec::new(),
1010 summary: RankingSummary {
1011 count: 0,
1012 stable_count: 0,
1013 tie_group_count: 0,
1014 median_gap: 0.0,
1015 },
1016 };
1017 }
1018
1019 let mut indexed: Vec<(usize, MatchResult)> = results.into_iter().enumerate().collect();
1023 indexed.sort_by(|a, b| {
1024 b.1.score
1025 .partial_cmp(&a.1.score)
1026 .unwrap_or(std::cmp::Ordering::Equal)
1027 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1028 });
1029
1030 let gaps: Vec<f64> = if count > 1 {
1032 indexed
1033 .windows(2)
1034 .map(|w| (w[0].1.score - w[1].1.score).max(0.0))
1035 .collect()
1036 } else {
1037 Vec::new()
1038 };
1039
1040 let mut sorted_gaps = gaps.clone();
1042 sorted_gaps.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1043
1044 let mut items: Vec<RankedItem> = Vec::with_capacity(count);
1046 let mut stable_count = 0;
1047 let mut tie_group_count = 0;
1048 let mut in_tie_group = false;
1049
1050 for (rank, (orig_idx, result)) in indexed.into_iter().enumerate() {
1051 let gap_to_next = gaps.get(rank).copied().unwrap_or(0.0);
1052
1053 let confidence = if sorted_gaps.is_empty() {
1055 1.0
1057 } else {
1058 let leq_count =
1059 sorted_gaps.partition_point(|&g| g <= gap_to_next + self.tie_epsilon * 0.5);
1060 leq_count as f64 / sorted_gaps.len() as f64
1061 };
1062
1063 let is_tie = gap_to_next < self.tie_epsilon;
1064 let stability = if is_tie {
1065 if !in_tie_group {
1066 tie_group_count += 1;
1067 in_tie_group = true;
1068 }
1069 RankStability::Unstable
1070 } else {
1071 in_tie_group = false;
1072 if confidence >= self.stable_threshold {
1073 stable_count += 1;
1074 RankStability::Stable
1075 } else if confidence >= self.marginal_threshold {
1076 RankStability::Marginal
1077 } else {
1078 RankStability::Unstable
1079 }
1080 };
1081
1082 items.push(RankedItem {
1083 original_index: orig_idx,
1084 result,
1085 rank_confidence: RankConfidence {
1086 confidence,
1087 gap_to_next,
1088 stability,
1089 },
1090 });
1091 }
1092
1093 let median_gap = if sorted_gaps.is_empty() {
1094 0.0
1095 } else {
1096 let mid = sorted_gaps.len() / 2;
1097 if sorted_gaps.len().is_multiple_of(2) {
1098 (sorted_gaps[mid - 1] + sorted_gaps[mid]) / 2.0
1099 } else {
1100 sorted_gaps[mid]
1101 }
1102 };
1103
1104 RankedResults {
1105 items,
1106 summary: RankingSummary {
1107 count,
1108 stable_count,
1109 tie_group_count,
1110 median_gap,
1111 },
1112 }
1113 }
1114
1115 pub fn rank_top_k(&self, results: Vec<MatchResult>, k: usize) -> RankedResults {
1120 let mut ranked = self.rank(results);
1121 ranked.items.truncate(k);
1122 ranked.summary.stable_count = ranked
1124 .items
1125 .iter()
1126 .filter(|item| item.rank_confidence.stability == RankStability::Stable)
1127 .count();
1128 ranked
1129 }
1130}
1131
1132impl fmt::Display for RankConfidence {
1133 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1134 write!(
1135 f,
1136 "confidence={:.3} gap={:.4} ({})",
1137 self.confidence,
1138 self.gap_to_next,
1139 self.stability.label()
1140 )
1141 }
1142}
1143
1144impl fmt::Display for RankingSummary {
1145 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1146 write!(
1147 f,
1148 "{} items, {} stable, {} tie groups, median gap {:.4}",
1149 self.count, self.stable_count, self.tie_group_count, self.median_gap
1150 )
1151 }
1152}
1153
1154#[derive(Debug, Clone)]
1160#[allow(dead_code)]
1161struct CachedEntry {
1162 corpus_index: usize,
1164}
1165
1166#[derive(Debug)]
1194pub struct IncrementalScorer {
1195 scorer: BayesianScorer,
1197 prev_query: String,
1199 cache: Vec<CachedEntry>,
1201 corpus_generation: u64,
1203 corpus_len: usize,
1205 stats: IncrementalStats,
1207}
1208
1209#[derive(Debug, Clone, Default)]
1211pub struct IncrementalStats {
1212 pub full_scans: u64,
1214 pub incremental_scans: u64,
1216 pub total_evaluated: u64,
1218 pub total_pruned: u64,
1220}
1221
1222impl IncrementalStats {
1223 pub fn prune_ratio(&self) -> f64 {
1225 let total = self.total_evaluated + self.total_pruned;
1226 if total == 0 {
1227 0.0
1228 } else {
1229 self.total_pruned as f64 / total as f64
1230 }
1231 }
1232}
1233
1234impl IncrementalScorer {
1235 pub fn new() -> Self {
1237 Self {
1238 scorer: BayesianScorer::fast(),
1239 prev_query: String::new(),
1240 cache: Vec::new(),
1241 corpus_generation: 0,
1242 corpus_len: 0,
1243 stats: IncrementalStats::default(),
1244 }
1245 }
1246
1247 pub fn with_scorer(scorer: BayesianScorer) -> Self {
1249 Self {
1250 scorer,
1251 prev_query: String::new(),
1252 cache: Vec::new(),
1253 corpus_generation: 0,
1254 corpus_len: 0,
1255 stats: IncrementalStats::default(),
1256 }
1257 }
1258
1259 pub fn invalidate(&mut self) {
1261 self.prev_query.clear();
1262 self.cache.clear();
1263 self.corpus_generation = self.corpus_generation.wrapping_add(1);
1264 }
1265
1266 pub fn stats(&self) -> &IncrementalStats {
1268 &self.stats
1269 }
1270
1271 pub fn score_corpus(
1283 &mut self,
1284 query: &str,
1285 corpus: &[&str],
1286 generation: Option<u64>,
1287 ) -> Vec<(usize, MatchResult)> {
1288 let generation_val = generation.unwrap_or(self.corpus_generation);
1290 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1291 self.invalidate();
1292 self.corpus_generation = generation_val;
1293 self.corpus_len = corpus.len();
1294 }
1295
1296 let can_prune = !self.prev_query.is_empty()
1298 && query.starts_with(&self.prev_query)
1299 && !self.cache.is_empty();
1300
1301 let query_lower = query.to_lowercase();
1302 let results = if can_prune {
1303 self.score_incremental(query, &query_lower, corpus)
1304 } else {
1305 self.score_full(query, &query_lower, corpus)
1306 };
1307
1308 self.prev_query.clear();
1310 self.prev_query.push_str(query);
1311 self.cache = results
1312 .iter()
1313 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1314 .collect();
1315
1316 results
1317 }
1318
1319 pub fn score_corpus_with_lowered(
1323 &mut self,
1324 query: &str,
1325 corpus: &[&str],
1326 corpus_lower: &[&str],
1327 generation: Option<u64>,
1328 ) -> Vec<(usize, MatchResult)> {
1329 debug_assert_eq!(
1330 corpus.len(),
1331 corpus_lower.len(),
1332 "corpus_lower must match corpus length"
1333 );
1334
1335 let generation_val = generation.unwrap_or(self.corpus_generation);
1337 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1338 self.invalidate();
1339 self.corpus_generation = generation_val;
1340 self.corpus_len = corpus.len();
1341 }
1342
1343 let can_prune = !self.prev_query.is_empty()
1345 && query.starts_with(&self.prev_query)
1346 && !self.cache.is_empty();
1347
1348 let query_lower = query.to_lowercase();
1349 let results = if can_prune {
1350 self.score_incremental_lowered(query, &query_lower, corpus, corpus_lower)
1351 } else {
1352 self.score_full_lowered(query, &query_lower, corpus, corpus_lower)
1353 };
1354
1355 self.prev_query.clear();
1357 self.prev_query.push_str(query);
1358 self.cache = results
1359 .iter()
1360 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1361 .collect();
1362
1363 results
1364 }
1365
1366 pub fn score_corpus_with_lowered_and_words(
1370 &mut self,
1371 query: &str,
1372 corpus: &[String],
1373 corpus_lower: &[String],
1374 word_starts: &[Vec<usize>],
1375 generation: Option<u64>,
1376 ) -> Vec<(usize, MatchResult)> {
1377 debug_assert_eq!(
1378 corpus.len(),
1379 corpus_lower.len(),
1380 "corpus_lower must match corpus length"
1381 );
1382 debug_assert_eq!(
1383 corpus.len(),
1384 word_starts.len(),
1385 "word_starts must match corpus length"
1386 );
1387
1388 let generation_val = generation.unwrap_or(self.corpus_generation);
1390 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1391 self.invalidate();
1392 self.corpus_generation = generation_val;
1393 self.corpus_len = corpus.len();
1394 }
1395
1396 let can_prune = !self.prev_query.is_empty()
1398 && query.starts_with(&self.prev_query)
1399 && !self.cache.is_empty();
1400
1401 let query_lower = query.to_lowercase();
1402 let results = if can_prune {
1403 self.score_incremental_lowered_with_words(
1404 query,
1405 &query_lower,
1406 corpus,
1407 corpus_lower,
1408 word_starts,
1409 )
1410 } else {
1411 self.score_full_lowered_with_words(
1412 query,
1413 &query_lower,
1414 corpus,
1415 corpus_lower,
1416 word_starts,
1417 )
1418 };
1419
1420 self.prev_query.clear();
1422 self.prev_query.push_str(query);
1423 self.cache = results
1424 .iter()
1425 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1426 .collect();
1427
1428 results
1429 }
1430
1431 pub fn score_candidate_indices_with_lowered_and_words(
1439 &mut self,
1440 query: &str,
1441 corpus: &[String],
1442 corpus_lower: &[String],
1443 word_starts: &[Vec<usize>],
1444 candidate_indices: &[usize],
1445 generation: Option<u64>,
1446 ) -> Vec<(usize, MatchResult)> {
1447 debug_assert_eq!(
1448 corpus.len(),
1449 corpus_lower.len(),
1450 "corpus_lower must match corpus length"
1451 );
1452 debug_assert_eq!(
1453 corpus.len(),
1454 word_starts.len(),
1455 "word_starts must match corpus length"
1456 );
1457
1458 let generation_val = generation.unwrap_or(self.corpus_generation);
1459 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1460 self.invalidate();
1461 self.corpus_generation = generation_val;
1462 self.corpus_len = corpus.len();
1463 }
1464
1465 let candidate_count = candidate_indices
1466 .iter()
1467 .filter(|idx| **idx < corpus.len())
1468 .count();
1469 self.stats.incremental_scans += 1;
1470 self.stats.total_evaluated += candidate_count as u64;
1471 self.stats.total_pruned += corpus.len().saturating_sub(candidate_count) as u64;
1472
1473 let query_lower = query.to_lowercase();
1474 let mut results = Vec::with_capacity(candidate_count);
1475 for &idx in candidate_indices {
1476 let (Some(title), Some(title_lower), Some(starts)) =
1477 (corpus.get(idx), corpus_lower.get(idx), word_starts.get(idx))
1478 else {
1479 continue;
1480 };
1481
1482 let result = self.scorer.score_with_lowered_title_and_words(
1483 query,
1484 &query_lower,
1485 title,
1486 title_lower,
1487 Some(starts),
1488 );
1489 if result.score > 0.0 {
1490 results.push((idx, result));
1491 }
1492 }
1493
1494 results.sort_unstable_by(compare_ranked_match_results);
1495
1496 results
1497 }
1498
1499 fn score_full(
1501 &mut self,
1502 query: &str,
1503 query_lower: &str,
1504 corpus: &[&str],
1505 ) -> Vec<(usize, MatchResult)> {
1506 self.stats.full_scans += 1;
1507 self.stats.total_evaluated += corpus.len() as u64;
1508
1509 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1510 for (i, title) in corpus.iter().enumerate() {
1511 let result = self
1512 .scorer
1513 .score_with_query_lower(query, query_lower, title);
1514 if result.score > 0.0 {
1515 results.push((i, result));
1516 }
1517 }
1518
1519 results.sort_unstable_by(compare_ranked_match_results);
1520
1521 results
1522 }
1523
1524 fn score_full_lowered(
1526 &mut self,
1527 query: &str,
1528 query_lower: &str,
1529 corpus: &[&str],
1530 corpus_lower: &[&str],
1531 ) -> Vec<(usize, MatchResult)> {
1532 self.stats.full_scans += 1;
1533 self.stats.total_evaluated += corpus.len() as u64;
1534
1535 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1536 for (i, (title, title_lower)) in corpus.iter().zip(corpus_lower.iter()).enumerate() {
1537 let result =
1538 self.scorer
1539 .score_with_lowered_title(query, query_lower, title, title_lower);
1540 if result.score > 0.0 {
1541 results.push((i, result));
1542 }
1543 }
1544
1545 results.sort_unstable_by(compare_ranked_match_results);
1546
1547 results
1548 }
1549
1550 fn score_full_lowered_with_words(
1552 &mut self,
1553 query: &str,
1554 query_lower: &str,
1555 corpus: &[String],
1556 corpus_lower: &[String],
1557 word_starts: &[Vec<usize>],
1558 ) -> Vec<(usize, MatchResult)> {
1559 self.stats.full_scans += 1;
1560 self.stats.total_evaluated += corpus.len() as u64;
1561
1562 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1563 for (i, ((title, title_lower), starts)) in corpus
1564 .iter()
1565 .zip(corpus_lower.iter())
1566 .zip(word_starts.iter())
1567 .enumerate()
1568 {
1569 let result = self.scorer.score_with_lowered_title_and_words(
1570 query,
1571 query_lower,
1572 title,
1573 title_lower,
1574 Some(starts),
1575 );
1576 if result.score > 0.0 {
1577 results.push((i, result));
1578 }
1579 }
1580
1581 results.sort_unstable_by(compare_ranked_match_results);
1582
1583 results
1584 }
1585 fn score_incremental(
1587 &mut self,
1588 query: &str,
1589 query_lower: &str,
1590 corpus: &[&str],
1591 ) -> Vec<(usize, MatchResult)> {
1592 self.stats.incremental_scans += 1;
1593
1594 let prev_match_count = self.cache.len();
1595 let pruned = corpus.len().saturating_sub(prev_match_count);
1596 self.stats.total_pruned += pruned as u64;
1597 self.stats.total_evaluated += prev_match_count as u64;
1598
1599 let mut results: Vec<(usize, MatchResult)> =
1600 Vec::with_capacity(self.cache.len().min(corpus.len()));
1601 for entry in &self.cache {
1602 if entry.corpus_index < corpus.len() {
1603 let result = self.scorer.score_with_query_lower(
1604 query,
1605 query_lower,
1606 corpus[entry.corpus_index],
1607 );
1608 if result.score > 0.0 {
1609 results.push((entry.corpus_index, result));
1610 }
1611 }
1612 }
1613
1614 results.sort_unstable_by(compare_ranked_match_results);
1615
1616 results
1617 }
1618
1619 fn score_incremental_lowered(
1621 &mut self,
1622 query: &str,
1623 query_lower: &str,
1624 corpus: &[&str],
1625 corpus_lower: &[&str],
1626 ) -> Vec<(usize, MatchResult)> {
1627 self.stats.incremental_scans += 1;
1628
1629 let prev_match_count = self.cache.len();
1630 let pruned = corpus.len().saturating_sub(prev_match_count);
1631 self.stats.total_pruned += pruned as u64;
1632 self.stats.total_evaluated += prev_match_count as u64;
1633
1634 let mut results: Vec<(usize, MatchResult)> =
1635 Vec::with_capacity(self.cache.len().min(corpus.len()));
1636 for entry in &self.cache {
1637 if entry.corpus_index < corpus.len() {
1638 let title = corpus[entry.corpus_index];
1639 let title_lower = corpus_lower[entry.corpus_index];
1640 let result =
1641 self.scorer
1642 .score_with_lowered_title(query, query_lower, title, title_lower);
1643 if result.score > 0.0 {
1644 results.push((entry.corpus_index, result));
1645 }
1646 }
1647 }
1648
1649 results.sort_unstable_by(compare_ranked_match_results);
1650
1651 results
1652 }
1653
1654 fn score_incremental_lowered_with_words(
1656 &mut self,
1657 query: &str,
1658 query_lower: &str,
1659 corpus: &[String],
1660 corpus_lower: &[String],
1661 word_starts: &[Vec<usize>],
1662 ) -> Vec<(usize, MatchResult)> {
1663 self.stats.incremental_scans += 1;
1664
1665 let prev_match_count = self.cache.len();
1666 let pruned = corpus.len().saturating_sub(prev_match_count);
1667 self.stats.total_pruned += pruned as u64;
1668 self.stats.total_evaluated += prev_match_count as u64;
1669
1670 let mut results: Vec<(usize, MatchResult)> =
1671 Vec::with_capacity(self.cache.len().min(corpus.len()));
1672 for entry in &self.cache {
1673 if entry.corpus_index < corpus.len() {
1674 let title = &corpus[entry.corpus_index];
1675 let title_lower = &corpus_lower[entry.corpus_index];
1676 let starts = &word_starts[entry.corpus_index];
1677 let result = self.scorer.score_with_lowered_title_and_words(
1678 query,
1679 query_lower,
1680 title,
1681 title_lower,
1682 Some(starts),
1683 );
1684 if result.score > 0.0 {
1685 results.push((entry.corpus_index, result));
1686 }
1687 }
1688 }
1689
1690 results.sort_by(|a, b| {
1691 b.1.score
1692 .partial_cmp(&a.1.score)
1693 .unwrap_or(std::cmp::Ordering::Equal)
1694 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1695 });
1696
1697 results
1698 }
1699}
1700
1701impl Default for IncrementalScorer {
1702 fn default() -> Self {
1703 Self::new()
1704 }
1705}
1706
1707#[cfg(test)]
1712mod tests {
1713 use super::*;
1714
1715 #[test]
1718 fn exact_match_highest_score() {
1719 let scorer = BayesianScorer::new();
1720 let result = scorer.score("Settings", "Settings");
1721 assert_eq!(result.match_type, MatchType::Exact);
1722 assert!(result.score > 0.95, "Exact match should score > 0.95");
1723 }
1724
1725 #[test]
1726 fn prefix_match_high_score() {
1727 let scorer = BayesianScorer::new();
1728 let result = scorer.score("set", "Settings");
1729 assert_eq!(result.match_type, MatchType::Prefix);
1730 assert!(result.score > 0.85, "Prefix match should score > 0.85");
1731 }
1732
1733 #[test]
1734 fn word_start_match_score() {
1735 let scorer = BayesianScorer::new();
1736 let result = scorer.score("gd", "Go Dashboard");
1737 assert_eq!(result.match_type, MatchType::WordStart);
1738 assert!(result.score > 0.75, "Word-start should score > 0.75");
1739 }
1740
1741 #[test]
1742 fn substring_match_moderate_score() {
1743 let scorer = BayesianScorer::new();
1744 let result = scorer.score("set", "Asset Manager");
1745 assert_eq!(result.match_type, MatchType::Substring);
1746 assert!(result.score > 0.5, "Substring should score > 0.5");
1747 }
1748
1749 #[test]
1750 fn fuzzy_match_low_score() {
1751 let scorer = BayesianScorer::new();
1752 let result = scorer.score("stg", "Settings");
1753 assert_eq!(result.match_type, MatchType::Fuzzy);
1754 assert!(result.score > 0.2, "Fuzzy should score > 0.2");
1755 assert!(result.score < 0.7, "Fuzzy should score < 0.7");
1756 }
1757
1758 #[test]
1759 fn no_match_returns_zero() {
1760 let scorer = BayesianScorer::new();
1761 let result = scorer.score("xyz", "Settings");
1762 assert_eq!(result.match_type, MatchType::NoMatch);
1763 assert_eq!(result.score, 0.0);
1764 }
1765
1766 #[test]
1769 fn word_start_match_ascii_empty_query_matches() {
1770 let scorer = BayesianScorer::new();
1771 let positions = scorer
1772 .word_start_match_ascii(b"", b"go dashboard", None)
1773 .expect("empty query should match");
1774 assert!(positions.is_empty());
1775 }
1776
1777 #[test]
1778 fn word_start_match_ascii_skips_out_of_bounds_precomputed_positions() {
1779 let scorer = BayesianScorer::new();
1780 let starts = [999, 0, 3];
1781 let positions = scorer
1782 .word_start_match_ascii(b"gd", b"go dashboard", Some(&starts))
1783 .expect("should still match despite out-of-bounds precomputed starts");
1784 assert_eq!(positions, vec![0, 3]);
1785 }
1786
1787 #[test]
1788 fn fuzzy_match_ascii_empty_query_matches() {
1789 let scorer = BayesianScorer::new();
1790 let positions = scorer
1791 .fuzzy_match_ascii(b"", b"settings")
1792 .expect("empty query should match");
1793 assert!(positions.is_empty());
1794 }
1795
1796 #[test]
1799 fn score_bounded() {
1800 let scorer = BayesianScorer::new();
1801 let test_cases = [
1802 ("a", "abcdefghijklmnop"),
1803 ("full", "full"),
1804 ("", "anything"),
1805 ("xyz", "abc"),
1806 ("stg", "Settings"),
1807 ];
1808
1809 for (query, title) in test_cases {
1810 let result = scorer.score(query, title);
1811 assert!(
1812 result.score >= 0.0 && result.score <= 1.0,
1813 "Score for ({}, {}) = {} not in [0, 1]",
1814 query,
1815 title,
1816 result.score
1817 );
1818 }
1819 }
1820
1821 #[test]
1822 fn score_deterministic() {
1823 let scorer = BayesianScorer::new();
1824 let result1 = scorer.score("nav", "Navigation");
1825 let result2 = scorer.score("nav", "Navigation");
1826 assert!(
1827 (result1.score - result2.score).abs() < f64::EPSILON,
1828 "Same input should produce identical scores"
1829 );
1830 }
1831
1832 #[test]
1833 fn tiebreak_shorter_first() {
1834 let scorer = BayesianScorer::new();
1835 let short = scorer.score("set", "Set");
1836 let long = scorer.score("set", "Settings");
1837 assert!(
1838 short.score >= long.score,
1839 "Shorter title should score >= longer: {} vs {}",
1840 short.score,
1841 long.score
1842 );
1843 }
1844
1845 #[test]
1848 fn case_insensitive() {
1849 let scorer = BayesianScorer::new();
1850 let lower = scorer.score("set", "Settings");
1851 let upper = scorer.score("SET", "Settings");
1852 assert!(
1853 (lower.score - upper.score).abs() < f64::EPSILON,
1854 "Case should not affect score"
1855 );
1856 }
1857
1858 #[test]
1861 fn match_positions_correct() {
1862 let scorer = BayesianScorer::new();
1863 let result = scorer.score("gd", "Go Dashboard");
1864 assert_eq!(result.match_positions, vec![0, 3]);
1865 }
1866
1867 #[test]
1868 fn fuzzy_match_positions() {
1869 let scorer = BayesianScorer::new();
1870 let result = scorer.score("stg", "Settings");
1871 assert_eq!(result.match_positions.len(), 3);
1873 assert_eq!(result.match_positions[0], 0); }
1875
1876 #[test]
1879 fn empty_query_returns_all() {
1880 let scorer = BayesianScorer::new();
1881 let result = scorer.score("", "Any Command");
1882 assert!(result.score > 0.0, "Empty query should have positive score");
1883 assert!(result.score < 1.0, "Empty query should not be max score");
1884 }
1885
1886 #[test]
1887 fn empty_title_is_safe() {
1888 let scorer = BayesianScorer::new();
1889 let result = scorer.score("x", "");
1890 assert_eq!(result.match_type, MatchType::NoMatch);
1891 assert!(result.score.is_finite(), "Score should remain finite");
1892 }
1893
1894 #[test]
1895 fn empty_query_empty_title_is_finite() {
1896 let scorer = BayesianScorer::new();
1897 let result = scorer.score("", "");
1898 assert!(result.score.is_finite(), "Score should remain finite");
1899 assert_eq!(result.match_type, MatchType::Fuzzy);
1900 }
1901
1902 #[test]
1905 fn query_longer_than_title() {
1906 let scorer = BayesianScorer::new();
1907 let result = scorer.score("verylongquery", "short");
1908 assert_eq!(result.match_type, MatchType::NoMatch);
1909 assert_eq!(result.score, 0.0);
1910 }
1911
1912 #[test]
1913 fn long_query_exact_match_is_handled() {
1914 let scorer = BayesianScorer::new();
1915 let query = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
1916 let result = scorer.score(query, query);
1917 assert_eq!(result.match_type, MatchType::Exact);
1918 assert!(result.score.is_finite());
1919 assert!(result.score > 0.9, "Exact long query should score high");
1920 }
1921
1922 #[test]
1923 fn gap_penalty_prefers_tight_fuzzy_matches() {
1924 let scorer = BayesianScorer::new();
1925 let tight = scorer.score("ace", "abcde");
1926 let gappy = scorer.score("ace", "a...c...e");
1927 assert_eq!(tight.match_type, MatchType::Fuzzy);
1928 assert_eq!(gappy.match_type, MatchType::Fuzzy);
1929 assert!(
1930 tight.score > gappy.score,
1931 "Tight fuzzy match should score higher than gappy: {} vs {}",
1932 tight.score,
1933 gappy.score
1934 );
1935 }
1936
1937 #[test]
1940 fn tag_match_boosts_score() {
1941 let scorer = BayesianScorer::new();
1942 let without_tag = scorer.score("set", "Settings");
1944 let with_tag = scorer.score_with_tags("set", "Settings", &["config", "setup"]);
1945 assert!(
1947 with_tag.score > without_tag.score,
1948 "Tag match should boost score: {} > {}",
1949 with_tag.score,
1950 without_tag.score
1951 );
1952 }
1953
1954 #[test]
1957 fn evidence_ledger_tracks_factors() {
1958 let scorer = BayesianScorer::new();
1959 let result = scorer.score("set", "Settings");
1960
1961 assert!(!result.evidence.entries().is_empty());
1962
1963 assert!(
1965 result
1966 .evidence
1967 .entries()
1968 .iter()
1969 .any(|e| e.kind == EvidenceKind::MatchType)
1970 );
1971 }
1972
1973 #[test]
1974 fn evidence_ledger_display() {
1975 let scorer = BayesianScorer::new();
1976 let result = scorer.score("gd", "Go Dashboard");
1977 let display = format!("{}", result.evidence);
1978 assert!(display.contains("Evidence Ledger"));
1979 assert!(display.contains("Posterior P:"));
1980 }
1981
1982 #[test]
1985 fn property_ordering_total() {
1986 let scorer = BayesianScorer::new();
1987 let titles = ["Settings", "Set Theme", "Reset View", "Asset"];
1988
1989 let mut scores: Vec<(f64, &str)> = titles
1990 .iter()
1991 .map(|t| (scorer.score("set", t).score, *t))
1992 .collect();
1993
1994 scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
1996
1997 for (score, _) in &scores {
1999 assert!(score.is_finite());
2000 }
2001 }
2002
2003 #[test]
2004 fn property_prefix_monotonic() {
2005 let scorer = BayesianScorer::new();
2006 let one_char = scorer.score("s", "Settings");
2008 let three_char = scorer.score("set", "Settings");
2009 assert!(
2011 three_char.score >= one_char.score,
2012 "Longer prefix should score >= shorter"
2013 );
2014 }
2015
2016 #[test]
2019 fn match_type_prior_ordering() {
2020 assert!(MatchType::Exact.prior_odds() > MatchType::Prefix.prior_odds());
2021 assert!(MatchType::Prefix.prior_odds() > MatchType::WordStart.prior_odds());
2022 assert!(MatchType::WordStart.prior_odds() > MatchType::Substring.prior_odds());
2023 assert!(MatchType::Substring.prior_odds() > MatchType::Fuzzy.prior_odds());
2024 assert!(MatchType::Fuzzy.prior_odds() > MatchType::NoMatch.prior_odds());
2025 }
2026
2027 #[test]
2032 fn conformal_tie_breaks_by_match_type() {
2033 let mut exact = MatchResult::no_match();
2034 exact.score = 0.5;
2035 exact.match_type = MatchType::Exact;
2036
2037 let mut prefix = MatchResult::no_match();
2038 prefix.score = 0.5;
2039 prefix.match_type = MatchType::Prefix;
2040
2041 let mut word_start = MatchResult::no_match();
2042 word_start.score = 0.5;
2043 word_start.match_type = MatchType::WordStart;
2044
2045 let mut substring = MatchResult::no_match();
2046 substring.score = 0.5;
2047 substring.match_type = MatchType::Substring;
2048
2049 let ranker = ConformalRanker::new();
2050 let ranked = ranker.rank(vec![substring, word_start, prefix, exact]);
2051 let order: Vec<MatchType> = ranked
2052 .items
2053 .iter()
2054 .map(|item| item.result.match_type)
2055 .collect();
2056
2057 assert_eq!(
2058 order,
2059 vec![
2060 MatchType::Exact,
2061 MatchType::Prefix,
2062 MatchType::WordStart,
2063 MatchType::Substring
2064 ]
2065 );
2066 }
2067
2068 #[test]
2069 fn conformal_empty_input() {
2070 let ranker = ConformalRanker::new();
2071 let ranked = ranker.rank(Vec::new());
2072 assert_eq!(ranked.items.len(), 0);
2073 assert_eq!(ranked.summary.count, 0);
2074 assert_eq!(ranked.summary.stable_count, 0);
2075 assert_eq!(ranked.summary.tie_group_count, 0);
2076 assert_eq!(ranked.summary.median_gap, 0.0);
2077 }
2078
2079 #[test]
2080 fn conformal_single_item() {
2081 let scorer = BayesianScorer::new();
2082 let results = vec![scorer.score("set", "Settings")];
2083 let ranker = ConformalRanker::new();
2084 let ranked = ranker.rank(results);
2085
2086 assert_eq!(ranked.items.len(), 1);
2087 assert_eq!(ranked.items[0].rank_confidence.confidence, 1.0);
2088 assert_eq!(ranked.summary.count, 1);
2089 }
2090
2091 #[test]
2092 fn conformal_sorted_descending() {
2093 let scorer = BayesianScorer::new();
2094 let results = vec![
2095 scorer.score("set", "Settings"),
2096 scorer.score("set", "Asset Manager"),
2097 scorer.score("set", "Reset Configuration Panel"),
2098 ];
2099 let ranker = ConformalRanker::new();
2100 let ranked = ranker.rank(results);
2101
2102 for w in ranked.items.windows(2) {
2104 let a = w[0].result.score;
2105 let b = w[1].result.score;
2106 assert!(a >= b, "Items should be sorted descending: {a} >= {b}");
2107 }
2108 }
2109
2110 #[test]
2111 fn conformal_confidence_bounded() {
2112 let scorer = BayesianScorer::new();
2113 let titles = [
2114 "Settings",
2115 "Set Theme",
2116 "Asset Manager",
2117 "Reset View",
2118 "Offset Tool",
2119 "Test Suite",
2120 ];
2121 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2122 let ranker = ConformalRanker::new();
2123 let ranked = ranker.rank(results);
2124
2125 for item in &ranked.items {
2126 assert!(
2127 item.rank_confidence.confidence >= 0.0 && item.rank_confidence.confidence <= 1.0,
2128 "Confidence must be in [0, 1], got {}",
2129 item.rank_confidence.confidence
2130 );
2131 assert!(
2132 item.rank_confidence.gap_to_next >= 0.0,
2133 "Gap must be non-negative"
2134 );
2135 }
2136 }
2137
2138 #[test]
2139 fn conformal_deterministic() {
2140 let scorer = BayesianScorer::new();
2141 let titles = ["Settings", "Set Theme", "Asset", "Reset"];
2142
2143 let results1: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2144 let results2: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2145
2146 let ranker = ConformalRanker::new();
2147 let ranked1 = ranker.rank(results1);
2148 let ranked2 = ranker.rank(results2);
2149
2150 for (a, b) in ranked1.items.iter().zip(ranked2.items.iter()) {
2151 assert!(
2152 (a.rank_confidence.confidence - b.rank_confidence.confidence).abs() < f64::EPSILON,
2153 "Confidence should be deterministic"
2154 );
2155 assert_eq!(
2156 a.original_index, b.original_index,
2157 "Rank order should be deterministic"
2158 );
2159 }
2160 }
2161
2162 #[test]
2163 fn conformal_ties_detected() {
2164 let mut r1 = MatchResult::no_match();
2166 r1.score = 0.8;
2167 r1.match_type = MatchType::Prefix;
2168 let mut r2 = MatchResult::no_match();
2169 r2.score = 0.8;
2170 r2.match_type = MatchType::Prefix;
2171 let mut r3 = MatchResult::no_match();
2172 r3.score = 0.5;
2173 r3.match_type = MatchType::Substring;
2174
2175 let ranker = ConformalRanker::new();
2176 let ranked = ranker.rank(vec![r1, r2, r3]);
2177
2178 assert_eq!(
2180 ranked.items[0].rank_confidence.stability,
2181 RankStability::Unstable,
2182 "Tied items should be Unstable"
2183 );
2184 assert!(
2185 ranked.summary.tie_group_count >= 1,
2186 "Should detect at least one tie group"
2187 );
2188 }
2189
2190 #[test]
2191 fn conformal_all_identical_scores() {
2192 let mut results = Vec::new();
2193 for _ in 0..5 {
2194 let mut r = MatchResult::no_match();
2195 r.score = 0.5;
2196 r.match_type = MatchType::Fuzzy;
2197 results.push(r);
2198 }
2199
2200 let ranker = ConformalRanker::new();
2201 let ranked = ranker.rank(results);
2202
2203 for item in &ranked.items {
2205 assert_eq!(item.rank_confidence.stability, RankStability::Unstable);
2206 }
2207 }
2208
2209 #[test]
2210 fn conformal_well_separated_scores_are_stable() {
2211 let mut results = Vec::new();
2212 for &s in &[0.9, 0.6, 0.3, 0.1] {
2214 let mut r = MatchResult::no_match();
2215 r.score = s;
2216 r.match_type = MatchType::Prefix;
2217 results.push(r);
2218 }
2219
2220 let ranker = ConformalRanker::new();
2221 let ranked = ranker.rank(results);
2222
2223 assert!(
2225 ranked.summary.stable_count >= 2,
2226 "Well-separated scores should yield stable positions, got {}",
2227 ranked.summary.stable_count
2228 );
2229 }
2230
2231 #[test]
2232 fn conformal_top_k_truncates() {
2233 let scorer = BayesianScorer::new();
2234 let titles = [
2235 "Settings",
2236 "Set Theme",
2237 "Asset Manager",
2238 "Reset View",
2239 "Offset Tool",
2240 ];
2241 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2242
2243 let ranker = ConformalRanker::new();
2244 let ranked = ranker.rank_top_k(results, 2);
2245
2246 assert_eq!(ranked.items.len(), 2);
2247 assert!(ranked.items[0].rank_confidence.confidence > 0.0);
2249 }
2250
2251 #[test]
2252 fn conformal_original_indices_preserved() {
2253 let scorer = BayesianScorer::new();
2254 let titles = ["Zebra Tool", "Settings", "Apple"];
2255 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2256
2257 let ranker = ConformalRanker::new();
2258 let ranked = ranker.rank(results);
2259
2260 assert_eq!(
2262 ranked.items[0].original_index, 1,
2263 "Settings should be first; original_index should be 1"
2264 );
2265 }
2266
2267 #[test]
2268 fn conformal_summary_display() {
2269 let scorer = BayesianScorer::new();
2270 let results = vec![
2271 scorer.score("set", "Settings"),
2272 scorer.score("set", "Set Theme"),
2273 ];
2274 let ranker = ConformalRanker::new();
2275 let ranked = ranker.rank(results);
2276
2277 let display = format!("{}", ranked.summary);
2278 assert!(display.contains("2 items"));
2279 }
2280
2281 #[test]
2282 fn conformal_rank_confidence_display() {
2283 let rc = RankConfidence {
2284 confidence: 0.85,
2285 gap_to_next: 0.1234,
2286 stability: RankStability::Stable,
2287 };
2288 let display = format!("{}", rc);
2289 assert!(display.contains("0.850"));
2290 assert!(display.contains("stable"));
2291 }
2292
2293 #[test]
2294 fn conformal_stability_labels() {
2295 assert_eq!(RankStability::Stable.label(), "stable");
2296 assert_eq!(RankStability::Marginal.label(), "marginal");
2297 assert_eq!(RankStability::Unstable.label(), "unstable");
2298 }
2299
2300 #[test]
2303 fn conformal_last_item_gap_zero() {
2304 let scorer = BayesianScorer::new();
2305 let results = vec![
2306 scorer.score("set", "Settings"),
2307 scorer.score("set", "Asset"),
2308 scorer.score("set", "Reset"),
2309 ];
2310 let ranker = ConformalRanker::new();
2311 let ranked = ranker.rank(results);
2312
2313 let last = ranked.items.last().unwrap();
2314 assert_eq!(
2315 last.rank_confidence.gap_to_next, 0.0,
2316 "Last item gap should be 0"
2317 );
2318 }
2319
2320 #[test]
2323 fn conformal_median_gap_non_negative() {
2324 let scorer = BayesianScorer::new();
2325 let titles = [
2326 "Settings",
2327 "Set Theme",
2328 "Asset Manager",
2329 "Reset View",
2330 "Offset Tool",
2331 "Test Suite",
2332 "System Settings",
2333 "Reset Defaults",
2334 ];
2335 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2336 let ranker = ConformalRanker::new();
2337 let ranked = ranker.rank(results);
2338
2339 assert!(
2340 ranked.summary.median_gap >= 0.0,
2341 "Median gap must be non-negative"
2342 );
2343 }
2344
2345 fn test_corpus() -> Vec<&'static str> {
2350 vec![
2351 "Open File",
2352 "Save File",
2353 "Close Tab",
2354 "Git: Commit",
2355 "Git: Push",
2356 "Git: Pull",
2357 "Go to Line",
2358 "Find in Files",
2359 "Toggle Terminal",
2360 "Format Document",
2361 ]
2362 }
2363
2364 #[test]
2365 fn incremental_full_scan_on_first_call() {
2366 let mut scorer = IncrementalScorer::new();
2367 let corpus = test_corpus();
2368 let results = scorer.score_corpus("git", &corpus, None);
2369
2370 assert!(!results.is_empty(), "Should find matches for 'git'");
2371 assert_eq!(scorer.stats().full_scans, 1);
2372 assert_eq!(scorer.stats().incremental_scans, 0);
2373 }
2374
2375 #[test]
2376 fn incremental_prunes_on_query_extension() {
2377 let mut scorer = IncrementalScorer::new();
2378 let corpus = test_corpus();
2379
2380 let r1 = scorer.score_corpus("g", &corpus, None);
2382 assert_eq!(scorer.stats().full_scans, 1);
2383
2384 let r2 = scorer.score_corpus("gi", &corpus, None);
2386 assert_eq!(scorer.stats().incremental_scans, 1);
2387 assert!(r2.len() <= r1.len(), "Extended query should match <= items");
2388
2389 let r3 = scorer.score_corpus("git", &corpus, None);
2391 assert_eq!(scorer.stats().incremental_scans, 2);
2392 assert!(r3.len() <= r2.len());
2393 }
2394
2395 #[test]
2396 fn incremental_correctness_matches_full_scan() {
2397 let corpus = test_corpus();
2398
2399 let mut inc = IncrementalScorer::new();
2401 inc.score_corpus("g", &corpus, None);
2402 let inc_results = inc.score_corpus("git", &corpus, None);
2403
2404 let mut full = IncrementalScorer::new();
2406 let full_results = full.score_corpus("git", &corpus, None);
2407
2408 assert_eq!(
2410 inc_results.len(),
2411 full_results.len(),
2412 "Incremental and full scan should return same count"
2413 );
2414
2415 for (a, b) in inc_results.iter().zip(full_results.iter()) {
2416 assert_eq!(a.0, b.0, "Same corpus indices");
2417 assert!(
2418 (a.1.score - b.1.score).abs() < f64::EPSILON,
2419 "Same scores: {} vs {}",
2420 a.1.score,
2421 b.1.score
2422 );
2423 }
2424 }
2425
2426 #[test]
2427 fn incremental_falls_back_on_non_extension() {
2428 let mut scorer = IncrementalScorer::new();
2429 let corpus = test_corpus();
2430
2431 scorer.score_corpus("git", &corpus, None);
2432 assert_eq!(scorer.stats().full_scans, 1);
2433
2434 scorer.score_corpus("fi", &corpus, None);
2436 assert_eq!(scorer.stats().full_scans, 2);
2437 }
2438
2439 #[test]
2440 fn incremental_invalidate_forces_full_scan() {
2441 let mut scorer = IncrementalScorer::new();
2442 let corpus = test_corpus();
2443
2444 scorer.score_corpus("g", &corpus, None);
2445 scorer.invalidate();
2446
2447 scorer.score_corpus("gi", &corpus, None);
2449 assert_eq!(scorer.stats().full_scans, 2);
2450 assert_eq!(scorer.stats().incremental_scans, 0);
2451 }
2452
2453 #[test]
2454 fn incremental_generation_change_invalidates() {
2455 let mut scorer = IncrementalScorer::new();
2456 let corpus = test_corpus();
2457
2458 scorer.score_corpus("g", &corpus, Some(1));
2459
2460 scorer.score_corpus("gi", &corpus, Some(2));
2462 assert_eq!(scorer.stats().full_scans, 2);
2463 }
2464
2465 #[test]
2466 fn incremental_corpus_size_change_invalidates() {
2467 let mut scorer = IncrementalScorer::new();
2468 let corpus1 = test_corpus();
2469 let corpus2 = &corpus1[..5];
2470
2471 scorer.score_corpus("g", &corpus1, None);
2472 scorer.score_corpus("gi", corpus2, None);
2473 assert_eq!(scorer.stats().full_scans, 2);
2475 }
2476
2477 #[test]
2478 fn incremental_skips_out_of_bounds_cache_entries() {
2479 let mut scorer = IncrementalScorer::new();
2480
2481 scorer.cache = vec![
2484 CachedEntry { corpus_index: 0 },
2485 CachedEntry { corpus_index: 999 },
2486 ];
2487
2488 let corpus = ["a"];
2489 let results = scorer.score_incremental("a", "a", &corpus);
2490 assert_eq!(results.len(), 1);
2491 assert_eq!(results[0].0, 0);
2492
2493 let corpus_lower = ["a"];
2494 let results = scorer.score_incremental_lowered("a", "a", &corpus, &corpus_lower);
2495 assert_eq!(results.len(), 1);
2496 assert_eq!(results[0].0, 0);
2497
2498 let corpus_s = vec!["a".to_string()];
2499 let corpus_lower_s = vec!["a".to_string()];
2500 let word_starts = vec![vec![0]];
2501 let results = scorer.score_incremental_lowered_with_words(
2502 "a",
2503 "a",
2504 &corpus_s,
2505 &corpus_lower_s,
2506 &word_starts,
2507 );
2508 assert_eq!(results.len(), 1);
2509 assert_eq!(results[0].0, 0);
2510 }
2511
2512 #[test]
2513 fn incremental_empty_query() {
2514 let mut scorer = IncrementalScorer::new();
2515 let corpus = test_corpus();
2516
2517 let results = scorer.score_corpus("", &corpus, None);
2518 assert_eq!(results.len(), corpus.len());
2520 }
2521
2522 #[test]
2523 fn incremental_no_match_query() {
2524 let mut scorer = IncrementalScorer::new();
2525 let corpus = test_corpus();
2526
2527 let results = scorer.score_corpus("zzz", &corpus, None);
2528 assert!(results.is_empty());
2529 }
2530
2531 #[test]
2532 fn incremental_stats_prune_ratio() {
2533 let mut scorer = IncrementalScorer::new();
2534 let corpus = test_corpus();
2535
2536 scorer.score_corpus("g", &corpus, None);
2537 scorer.score_corpus("gi", &corpus, None);
2538 scorer.score_corpus("git", &corpus, None);
2539
2540 let stats = scorer.stats();
2541 assert!(
2542 stats.prune_ratio() > 0.0,
2543 "Prune ratio should be > 0 after incremental scans"
2544 );
2545 assert!(stats.total_pruned > 0, "Should have pruned some items");
2546 }
2547
2548 #[test]
2549 fn incremental_results_sorted_descending() {
2550 let mut scorer = IncrementalScorer::new();
2551 let corpus = test_corpus();
2552
2553 let results = scorer.score_corpus("o", &corpus, None);
2554 for w in results.windows(2) {
2555 let a = w[0].1.score;
2556 let b = w[1].1.score;
2557 assert!(a >= b, "Results should be sorted descending: {a} >= {b}");
2558 }
2559 }
2560
2561 #[test]
2562 fn incremental_lowered_matches_full() {
2563 let corpus = vec![
2564 "Open File".to_string(),
2565 "Save File".to_string(),
2566 "Close".to_string(),
2567 "Launch 🚀".to_string(),
2568 ];
2569 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
2570 let lower: Vec<String> = corpus.iter().map(|s| s.to_lowercase()).collect();
2571 let word_starts: Vec<Vec<usize>> = lower
2572 .iter()
2573 .map(|title_lower| {
2574 let bytes = title_lower.as_bytes();
2575 title_lower
2576 .char_indices()
2577 .filter_map(|(i, _)| {
2578 let is_word_start = i == 0 || {
2579 let prev = bytes.get(i.saturating_sub(1)).copied().unwrap_or(b' ');
2580 prev == b' ' || prev == b'-' || prev == b'_'
2581 };
2582 is_word_start.then_some(i)
2583 })
2584 .collect()
2585 })
2586 .collect();
2587
2588 let mut full = IncrementalScorer::new();
2589 let mut lowered = IncrementalScorer::new();
2590
2591 let a = full.score_corpus("fi", &corpus_refs, None);
2592 let b =
2593 lowered.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
2594
2595 assert_eq!(a.len(), b.len());
2596 for ((ia, ra), (ib, rb)) in a.iter().zip(b.iter()) {
2597 assert_eq!(ia, ib);
2598 assert_eq!(ra.match_type, rb.match_type);
2599 assert_eq!(ra.match_positions, rb.match_positions);
2600 assert!(
2601 (ra.score - rb.score).abs() < 1e-12,
2602 "score mismatch: {} vs {}",
2603 ra.score,
2604 rb.score
2605 );
2606 }
2607 }
2608
2609 #[test]
2610 fn incremental_deterministic() {
2611 let corpus = test_corpus();
2612
2613 let mut s1 = IncrementalScorer::new();
2614 let r1 = s1.score_corpus("git", &corpus, None);
2615
2616 let mut s2 = IncrementalScorer::new();
2617 let r2 = s2.score_corpus("git", &corpus, None);
2618
2619 assert_eq!(r1.len(), r2.len());
2620 for (a, b) in r1.iter().zip(r2.iter()) {
2621 assert_eq!(a.0, b.0);
2622 assert!((a.1.score - b.1.score).abs() < f64::EPSILON);
2623 }
2624 }
2625
2626 #[test]
2631 fn match_type_descriptions() {
2632 assert_eq!(MatchType::Exact.description(), "exact match");
2633 assert_eq!(MatchType::Prefix.description(), "prefix match");
2634 assert_eq!(MatchType::WordStart.description(), "word-start match");
2635 assert_eq!(MatchType::Substring.description(), "contiguous substring");
2636 assert_eq!(MatchType::Fuzzy.description(), "fuzzy match");
2637 assert_eq!(MatchType::NoMatch.description(), "no match");
2638 }
2639
2640 #[test]
2641 fn match_type_no_match_prior_is_zero() {
2642 assert_eq!(MatchType::NoMatch.prior_odds(), 0.0);
2643 }
2644
2645 #[test]
2650 fn evidence_description_static_display() {
2651 let d = EvidenceDescription::Static("test message");
2652 assert_eq!(format!("{d}"), "test message");
2653 }
2654
2655 #[test]
2656 fn evidence_description_title_length_display() {
2657 let d = EvidenceDescription::TitleLengthChars { len: 42 };
2658 assert_eq!(format!("{d}"), "title length 42 chars");
2659 }
2660
2661 #[test]
2662 fn evidence_description_first_match_pos_display() {
2663 let d = EvidenceDescription::FirstMatchPos { pos: 5 };
2664 assert_eq!(format!("{d}"), "first match at position 5");
2665 }
2666
2667 #[test]
2668 fn evidence_description_word_boundary_count_display() {
2669 let d = EvidenceDescription::WordBoundaryCount { count: 3 };
2670 assert_eq!(format!("{d}"), "3 word boundary matches");
2671 }
2672
2673 #[test]
2674 fn evidence_description_gap_total_display() {
2675 let d = EvidenceDescription::GapTotal { total: 7 };
2676 assert_eq!(format!("{d}"), "total gap of 7 characters");
2677 }
2678
2679 #[test]
2680 fn evidence_description_coverage_percent_display() {
2681 let d = EvidenceDescription::CoveragePercent { percent: 83.5 };
2682 let s = format!("{d}");
2683 assert!(s.contains("84%"), "Should round: {s}");
2684 }
2685
2686 #[test]
2691 fn evidence_entry_display_supports() {
2692 let entry = EvidenceEntry {
2693 kind: EvidenceKind::Position,
2694 bayes_factor: 1.5,
2695 description: EvidenceDescription::FirstMatchPos { pos: 0 },
2696 };
2697 let s = format!("{entry}");
2698 assert!(s.contains("supports"));
2699 assert!(s.contains("Position"));
2700 }
2701
2702 #[test]
2703 fn evidence_entry_display_opposes() {
2704 let entry = EvidenceEntry {
2705 kind: EvidenceKind::GapPenalty,
2706 bayes_factor: 0.5,
2707 description: EvidenceDescription::GapTotal { total: 10 },
2708 };
2709 let s = format!("{entry}");
2710 assert!(s.contains("opposes"));
2711 }
2712
2713 #[test]
2714 fn evidence_entry_display_neutral() {
2715 let entry = EvidenceEntry {
2716 kind: EvidenceKind::TitleLength,
2717 bayes_factor: 1.0,
2718 description: EvidenceDescription::Static("neutral factor"),
2719 };
2720 let s = format!("{entry}");
2721 assert!(s.contains("neutral"));
2722 }
2723
2724 #[test]
2729 fn ledger_combined_bayes_factor() {
2730 let mut ledger = EvidenceLedger::new();
2731 ledger.add(
2732 EvidenceKind::Position,
2733 2.0,
2734 EvidenceDescription::Static("a"),
2735 );
2736 ledger.add(
2737 EvidenceKind::WordBoundary,
2738 3.0,
2739 EvidenceDescription::Static("b"),
2740 );
2741 assert!((ledger.combined_bayes_factor() - 6.0).abs() < f64::EPSILON);
2742 }
2743
2744 #[test]
2745 fn ledger_combined_bayes_factor_empty() {
2746 let ledger = EvidenceLedger::new();
2747 assert!((ledger.combined_bayes_factor() - 1.0).abs() < f64::EPSILON);
2748 }
2749
2750 #[test]
2751 fn ledger_prior_odds_present() {
2752 let mut ledger = EvidenceLedger::new();
2753 ledger.add(
2754 EvidenceKind::MatchType,
2755 9.0,
2756 EvidenceDescription::Static("prefix"),
2757 );
2758 assert_eq!(ledger.prior_odds(), Some(9.0));
2759 }
2760
2761 #[test]
2762 fn ledger_prior_odds_absent() {
2763 let ledger = EvidenceLedger::new();
2764 assert_eq!(ledger.prior_odds(), None);
2765 }
2766
2767 #[test]
2768 fn ledger_posterior_probability_no_prior() {
2769 let mut ledger = EvidenceLedger::new();
2770 ledger.add(
2771 EvidenceKind::Position,
2772 2.0,
2773 EvidenceDescription::Static("pos"),
2774 );
2775 let prob = ledger.posterior_probability();
2778 assert!((prob - 2.0 / 3.0).abs() < 1e-9);
2779 }
2780
2781 #[test]
2782 fn ledger_posterior_probability_infinite_odds() {
2783 let mut ledger = EvidenceLedger::new();
2784 ledger.add(
2785 EvidenceKind::MatchType,
2786 f64::INFINITY,
2787 EvidenceDescription::Static("inf"),
2788 );
2789 assert_eq!(ledger.posterior_probability(), 1.0);
2790 }
2791
2792 #[test]
2797 fn fast_scorer_no_evidence() {
2798 let scorer = BayesianScorer::fast();
2799 let result = scorer.score("set", "Settings");
2800 assert!(result.score > 0.0);
2801 assert!(result.evidence.entries().is_empty());
2802 }
2803
2804 #[test]
2805 fn fast_scorer_score_matches_probability_range() {
2806 let scorer = BayesianScorer::fast();
2807 let result = scorer.score("set", "Settings");
2808 assert!(result.score >= 0.0 && result.score <= 1.0);
2809 }
2810
2811 #[test]
2812 fn fast_scorer_empty_query() {
2813 let scorer = BayesianScorer::fast();
2814 let result = scorer.score("", "Test");
2815 assert!(result.score > 0.0);
2816 assert!(result.evidence.entries().is_empty());
2817 }
2818
2819 #[test]
2820 fn fast_scorer_fuzzy_with_gap_penalty() {
2821 let scorer = BayesianScorer::fast();
2822 let result = scorer.score("ace", "a...b...c...d...e");
2823 assert_eq!(result.match_type, MatchType::Fuzzy);
2824 assert!(result.score > 0.0);
2825 }
2826
2827 #[test]
2828 fn fast_scorer_tag_boost() {
2829 let scorer = BayesianScorer::fast();
2830 let without = scorer.score("set", "Settings");
2831 let with = scorer.score_with_tags("set", "Settings", &["setup"]);
2832 assert!(with.score > without.score);
2833 }
2834
2835 #[test]
2836 fn fast_scorer_tag_no_boost_at_score_one() {
2837 let scorer = BayesianScorer::fast();
2838 let result = scorer.score_with_tags("Settings", "Settings", &["settings"]);
2840 assert!(result.score <= 1.0);
2841 }
2842
2843 #[test]
2848 fn unicode_exact_match() {
2849 let scorer = BayesianScorer::new();
2850 let result = scorer.score("日本語", "日本語");
2851 assert_eq!(result.match_type, MatchType::Exact);
2852 assert!(result.score > 0.9);
2853 }
2854
2855 #[test]
2856 fn unicode_prefix_match() {
2857 let scorer = BayesianScorer::new();
2858 let result = scorer.score("日本", "日本語テスト");
2859 assert_eq!(result.match_type, MatchType::Prefix);
2860 }
2861
2862 #[test]
2863 fn unicode_substring_match() {
2864 let scorer = BayesianScorer::new();
2865 let result = scorer.score("本語", "日本語テスト");
2866 assert_eq!(result.match_type, MatchType::Substring);
2867 }
2868
2869 #[test]
2870 fn unicode_fuzzy_match() {
2871 let scorer = BayesianScorer::new();
2872 let result = scorer.score("日テ", "日本語テスト");
2873 assert_eq!(result.match_type, MatchType::Fuzzy);
2874 }
2875
2876 #[test]
2877 fn unicode_no_match() {
2878 let scorer = BayesianScorer::new();
2879 let result = scorer.score("ωψξ", "αβγ");
2880 assert_eq!(result.match_type, MatchType::NoMatch);
2881 }
2882
2883 #[test]
2884 fn unicode_word_start_match() {
2885 let scorer = BayesianScorer::new();
2886 let result = scorer.score("gd", "go dashboard");
2888 assert_eq!(result.match_type, MatchType::WordStart);
2889 }
2890
2891 #[test]
2896 fn score_with_query_lower_matches_score() {
2897 let scorer = BayesianScorer::new();
2898 let direct = scorer.score("Set", "Settings");
2899 let pre = scorer.score_with_query_lower("Set", "set", "Settings");
2900 assert!((direct.score - pre.score).abs() < f64::EPSILON);
2901 assert_eq!(direct.match_type, pre.match_type);
2902 }
2903
2904 #[test]
2905 fn score_with_lowered_title_matches_score() {
2906 let scorer = BayesianScorer::new();
2907 let direct = scorer.score("set", "Settings");
2908 let pre = scorer.score_with_lowered_title("set", "set", "Settings", "settings");
2909 assert!((direct.score - pre.score).abs() < f64::EPSILON);
2910 }
2911
2912 #[test]
2913 fn score_with_lowered_title_and_words_matches() {
2914 let scorer = BayesianScorer::new();
2915 let direct = scorer.score("gd", "Go Dashboard");
2916 let pre = scorer.score_with_lowered_title_and_words(
2917 "gd",
2918 "gd",
2919 "Go Dashboard",
2920 "go dashboard",
2921 Some(&[0, 3]),
2922 );
2923 assert_eq!(direct.match_type, pre.match_type);
2924 assert!((direct.score - pre.score).abs() < f64::EPSILON);
2925 }
2926
2927 #[test]
2932 fn tag_match_no_title_match_returns_nomatch() {
2933 let scorer = BayesianScorer::new();
2934 let result = scorer.score_with_tags("xyz", "Settings", &["xyz"]);
2935 assert_eq!(result.match_type, MatchType::NoMatch);
2937 assert_eq!(result.score, 0.0);
2938 }
2939
2940 #[test]
2941 fn tag_match_no_matching_tag() {
2942 let scorer = BayesianScorer::new();
2943 let without = scorer.score("set", "Settings");
2944 let with = scorer.score_with_tags("set", "Settings", &["foo", "bar"]);
2945 assert!((without.score - with.score).abs() < f64::EPSILON);
2947 }
2948
2949 #[test]
2950 fn tag_match_case_insensitive() {
2951 let scorer = BayesianScorer::new();
2952 let result = scorer.score_with_tags("set", "Settings", &["SETUP"]);
2953 let without = scorer.score("set", "Settings");
2954 assert!(result.score > without.score);
2955 }
2956
2957 #[test]
2962 fn no_match_result_has_evidence() {
2963 let r = MatchResult::no_match();
2964 assert_eq!(r.score, 0.0);
2965 assert_eq!(r.match_type, MatchType::NoMatch);
2966 assert!(r.match_positions.is_empty());
2967 assert_eq!(r.evidence.entries().len(), 1);
2968 assert_eq!(r.evidence.entries()[0].kind, EvidenceKind::MatchType);
2969 assert_eq!(r.evidence.entries()[0].bayes_factor, 0.0);
2970 }
2971
2972 #[test]
2977 fn count_word_boundaries_at_start() {
2978 let scorer = BayesianScorer::new();
2979 let count = scorer.count_word_boundaries(&[0], "hello");
2981 assert_eq!(count, 1);
2982 }
2983
2984 #[test]
2985 fn count_word_boundaries_after_separators() {
2986 let scorer = BayesianScorer::new();
2987 let count = scorer.count_word_boundaries(&[0, 2, 4, 6], "a-b_c d");
2989 assert_eq!(count, 4);
2990 }
2991
2992 #[test]
2993 fn count_word_boundaries_mid_word() {
2994 let scorer = BayesianScorer::new();
2995 let count = scorer.count_word_boundaries(&[2], "hello");
2997 assert_eq!(count, 0);
2998 }
2999
3000 #[test]
3001 fn total_gap_empty() {
3002 let scorer = BayesianScorer::new();
3003 assert_eq!(scorer.total_gap(&[]), 0);
3004 }
3005
3006 #[test]
3007 fn total_gap_single() {
3008 let scorer = BayesianScorer::new();
3009 assert_eq!(scorer.total_gap(&[5]), 0);
3010 }
3011
3012 #[test]
3013 fn total_gap_contiguous() {
3014 let scorer = BayesianScorer::new();
3015 assert_eq!(scorer.total_gap(&[0, 1, 2]), 0);
3017 }
3018
3019 #[test]
3020 fn total_gap_with_gaps() {
3021 let scorer = BayesianScorer::new();
3022 assert_eq!(scorer.total_gap(&[0, 3, 7]), 5);
3024 }
3025
3026 #[test]
3031 fn conformal_custom_thresholds() {
3032 let ranker = ConformalRanker {
3033 tie_epsilon: 0.1,
3034 stable_threshold: 0.9,
3035 marginal_threshold: 0.5,
3036 };
3037 let mut r1 = MatchResult::no_match();
3039 r1.score = 0.5;
3040 r1.match_type = MatchType::Prefix;
3041 let mut r2 = MatchResult::no_match();
3042 r2.score = 0.45;
3043 r2.match_type = MatchType::Prefix;
3044
3045 let ranked = ranker.rank(vec![r1, r2]);
3046 assert_eq!(
3047 ranked.items[0].rank_confidence.stability,
3048 RankStability::Unstable,
3049 "Gap 0.05 < epsilon 0.1 → tie"
3050 );
3051 }
3052
3053 #[test]
3054 fn conformal_rank_top_k_larger_than_count() {
3055 let scorer = BayesianScorer::new();
3056 let results = vec![scorer.score("set", "Settings")];
3057 let ranker = ConformalRanker::new();
3058 let ranked = ranker.rank_top_k(results, 100);
3059 assert_eq!(ranked.items.len(), 1);
3060 }
3061
3062 #[test]
3063 fn conformal_rank_top_k_zero() {
3064 let scorer = BayesianScorer::new();
3065 let results = vec![
3066 scorer.score("set", "Settings"),
3067 scorer.score("set", "Asset"),
3068 ];
3069 let ranker = ConformalRanker::new();
3070 let ranked = ranker.rank_top_k(results, 0);
3071 assert!(ranked.items.is_empty());
3072 assert_eq!(ranked.summary.stable_count, 0);
3073 }
3074
3075 #[test]
3076 fn conformal_rank_confidence_display_marginal() {
3077 let rc = RankConfidence {
3078 confidence: 0.5,
3079 gap_to_next: 0.05,
3080 stability: RankStability::Marginal,
3081 };
3082 let s = format!("{rc}");
3083 assert!(s.contains("marginal"));
3084 }
3085
3086 #[test]
3087 fn conformal_rank_confidence_display_unstable() {
3088 let rc = RankConfidence {
3089 confidence: 0.1,
3090 gap_to_next: 0.001,
3091 stability: RankStability::Unstable,
3092 };
3093 let s = format!("{rc}");
3094 assert!(s.contains("unstable"));
3095 }
3096
3097 #[test]
3098 fn conformal_median_gap_even_count() {
3099 let mut results = Vec::new();
3101 for &s in &[0.9, 0.7, 0.5, 0.3, 0.1] {
3102 let mut r = MatchResult::no_match();
3103 r.score = s;
3104 r.match_type = MatchType::Prefix;
3105 results.push(r);
3106 }
3107 let ranker = ConformalRanker::new();
3108 let ranked = ranker.rank(results);
3109 assert!(
3111 (ranked.summary.median_gap - 0.2).abs() < 0.01,
3112 "median_gap={}, expected ~0.2",
3113 ranked.summary.median_gap
3114 );
3115 }
3116
3117 #[test]
3122 fn incremental_with_scorer_uses_provided_scorer() {
3123 let scorer = BayesianScorer::new(); let mut inc = IncrementalScorer::with_scorer(scorer);
3125 let corpus = test_corpus();
3126 let results = inc.score_corpus("git", &corpus, None);
3127 assert!(!results.is_empty());
3128 assert!(!results[0].1.evidence.entries().is_empty());
3130 }
3131
3132 #[test]
3133 fn incremental_default_trait() {
3134 let inc = IncrementalScorer::default();
3135 assert_eq!(inc.stats().full_scans, 0);
3136 assert_eq!(inc.stats().incremental_scans, 0);
3137 }
3138
3139 #[test]
3140 fn incremental_stats_prune_ratio_zero_when_empty() {
3141 let stats = IncrementalStats::default();
3142 assert_eq!(stats.prune_ratio(), 0.0);
3143 }
3144
3145 #[test]
3146 fn incremental_score_corpus_with_lowered() {
3147 let corpus = vec!["Open File", "Save File", "Close Tab"];
3148 let lower = vec!["open file", "save file", "close tab"];
3149 let mut inc = IncrementalScorer::new();
3150 let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3151 assert!(!results.is_empty());
3152 for (idx, _) in &results {
3154 assert!(*idx < corpus.len());
3155 }
3156 }
3157
3158 #[test]
3159 fn incremental_score_corpus_with_lowered_incremental_path() {
3160 let corpus = vec!["Open File", "Save File", "Close Tab"];
3161 let lower = vec!["open file", "save file", "close tab"];
3162 let mut inc = IncrementalScorer::new();
3163 inc.score_corpus_with_lowered("f", &corpus, &lower, None);
3164 let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3165 assert_eq!(inc.stats().incremental_scans, 1);
3166 assert!(!results.is_empty());
3167 }
3168
3169 #[test]
3170 fn incremental_score_corpus_with_lowered_and_words() {
3171 let corpus = vec![
3172 "Open File".to_string(),
3173 "Save File".to_string(),
3174 "Close Tab".to_string(),
3175 ];
3176 let lower = vec![
3177 "open file".to_string(),
3178 "save file".to_string(),
3179 "close tab".to_string(),
3180 ];
3181 let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3182
3183 let mut inc = IncrementalScorer::new();
3184 let results =
3185 inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3186 assert!(!results.is_empty());
3187 }
3188
3189 #[test]
3190 fn incremental_score_corpus_with_lowered_and_words_incremental_path() {
3191 let corpus = vec![
3192 "Open File".to_string(),
3193 "Save File".to_string(),
3194 "Close Tab".to_string(),
3195 ];
3196 let lower = vec![
3197 "open file".to_string(),
3198 "save file".to_string(),
3199 "close tab".to_string(),
3200 ];
3201 let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3202
3203 let mut inc = IncrementalScorer::new();
3204 inc.score_corpus_with_lowered_and_words("f", &corpus, &lower, &word_starts, None);
3205 let results =
3206 inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3207 assert_eq!(inc.stats().incremental_scans, 1);
3208 assert!(!results.is_empty());
3209 }
3210
3211 #[test]
3216 fn bayesian_scorer_default_has_evidence_off() {
3217 let scorer = BayesianScorer::default();
3218 assert!(!scorer.track_evidence);
3219 }
3220
3221 #[test]
3226 fn word_start_match_with_hyphens() {
3227 let scorer = BayesianScorer::new();
3228 let result = scorer.score("fc", "file-commander");
3229 assert_eq!(result.match_type, MatchType::WordStart);
3230 assert_eq!(result.match_positions, vec![0, 5]);
3231 }
3232
3233 #[test]
3234 fn word_start_match_with_underscores() {
3235 let scorer = BayesianScorer::new();
3236 let result = scorer.score("fc", "file_commander");
3237 assert_eq!(result.match_type, MatchType::WordStart);
3238 assert_eq!(result.match_positions, vec![0, 5]);
3239 }
3240
3241 #[test]
3246 fn match_type_ord() {
3247 let mut types = vec![
3248 MatchType::Exact,
3249 MatchType::NoMatch,
3250 MatchType::Fuzzy,
3251 MatchType::Prefix,
3252 MatchType::Substring,
3253 MatchType::WordStart,
3254 ];
3255 types.sort();
3256 assert_eq!(
3257 types,
3258 vec![
3259 MatchType::NoMatch,
3260 MatchType::Fuzzy,
3261 MatchType::Substring,
3262 MatchType::WordStart,
3263 MatchType::Prefix,
3264 MatchType::Exact,
3265 ]
3266 );
3267 }
3268
3269 #[test]
3274 fn evidence_kind_equality() {
3275 assert_eq!(EvidenceKind::MatchType, EvidenceKind::MatchType);
3276 assert_ne!(EvidenceKind::Position, EvidenceKind::GapPenalty);
3277 }
3278
3279 #[test]
3284 fn earlier_substring_scores_higher() {
3285 let scorer = BayesianScorer::new();
3286 let early = scorer.score("set", "set in stone");
3287 let late = scorer.score("set", "the asset");
3288 assert!(
3290 early.score > late.score,
3291 "Earlier match should score higher: {} vs {}",
3292 early.score,
3293 late.score
3294 );
3295 }
3296}
3297
3298#[cfg(test)]
3303mod perf_tests {
3304 use super::*;
3305 use std::time::Instant;
3306
3307 #[derive(Debug, Clone, Copy)]
3308 struct PerfStats {
3309 p50_us: u64,
3310 p95_us: u64,
3311 p99_us: u64,
3312 mean_us: f64,
3313 variance_us: f64,
3314 samples: usize,
3315 }
3316
3317 const SINGLE_SCORE_BUDGET_US: u64 = 10; const CORPUS_100_BUDGET_US: u64 = 500; const CORPUS_1000_BUDGET_US: u64 = 5_000; const CORPUS_5000_BUDGET_US: u64 = 25_000; const INCREMENTAL_7KEY_100_BUDGET_US: u64 = 5_000; const INCREMENTAL_7KEY_1000_BUDGET_US: u64 = 15_000; const COVERAGE_BUDGET_MULTIPLIER: u64 = 5;
3327
3328 fn is_coverage_run() -> bool {
3329 std::env::var("LLVM_PROFILE_FILE").is_ok() || std::env::var("CARGO_LLVM_COV").is_ok()
3330 }
3331
3332 fn coverage_budget_us_with_mode(base: u64, coverage_run: bool) -> u64 {
3333 if coverage_run {
3337 base.saturating_mul(COVERAGE_BUDGET_MULTIPLIER)
3338 } else {
3339 base
3340 }
3341 }
3342
3343 fn coverage_budget_us(base: u64) -> u64 {
3344 coverage_budget_us_with_mode(base, is_coverage_run())
3345 }
3346
3347 #[test]
3348 fn coverage_budget_us_with_mode_respects_flag() {
3349 assert_eq!(coverage_budget_us_with_mode(123, false), 123);
3350 assert_eq!(
3351 coverage_budget_us_with_mode(123, true),
3352 123 * COVERAGE_BUDGET_MULTIPLIER
3353 );
3354 }
3355
3356 fn make_corpus(size: usize) -> Vec<String> {
3358 let base = [
3359 "Open File",
3360 "Save File",
3361 "Close Tab",
3362 "Split Editor Right",
3363 "Split Editor Down",
3364 "Toggle Terminal",
3365 "Go to Line",
3366 "Find in Files",
3367 "Replace in Files",
3368 "Git: Commit",
3369 "Git: Push",
3370 "Git: Pull",
3371 "Debug: Start",
3372 "Debug: Stop",
3373 "Debug: Step Over",
3374 "Format Document",
3375 "Rename Symbol",
3376 "Go to Definition",
3377 "Find All References",
3378 "Toggle Sidebar",
3379 ];
3380 base.iter()
3381 .cycle()
3382 .take(size)
3383 .enumerate()
3384 .map(|(i, cmd)| {
3385 if i < base.len() {
3386 (*cmd).to_string()
3387 } else {
3388 format!("{} ({})", cmd, i)
3389 }
3390 })
3391 .collect()
3392 }
3393
3394 fn measure_stats_us(iterations: usize, mut f: impl FnMut()) -> PerfStats {
3395 let mut times = Vec::with_capacity(iterations);
3396 for _ in 0..3 {
3398 f();
3399 }
3400 for _ in 0..iterations {
3401 let start = Instant::now();
3402 f();
3403 times.push(start.elapsed().as_micros() as u64);
3404 }
3405 times.sort_unstable();
3406 let len = times.len();
3407 let p50 = times[len / 2];
3408 let p95 = times[((len as f64 * 0.95) as usize).min(len.saturating_sub(1))];
3409 let p99 = times[((len as f64 * 0.99) as usize).min(len.saturating_sub(1))];
3410 let mean = times.iter().copied().map(|value| value as f64).sum::<f64>() / len as f64;
3411 let variance = times
3412 .iter()
3413 .map(|value| {
3414 let delta = *value as f64 - mean;
3415 delta * delta
3416 })
3417 .sum::<f64>()
3418 / len as f64;
3419
3420 PerfStats {
3421 p50_us: p50,
3422 p95_us: p95,
3423 p99_us: p99,
3424 mean_us: mean,
3425 variance_us: variance,
3426 samples: len,
3427 }
3428 }
3429
3430 #[test]
3431 fn perf_single_score_under_budget() {
3432 let scorer = BayesianScorer::fast();
3433 let stats = measure_stats_us(200, || {
3434 std::hint::black_box(scorer.score("git co", "Git: Commit"));
3435 });
3436 eprintln!(
3437 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_score_single\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3438 stats.samples,
3439 stats.p50_us,
3440 stats.p95_us,
3441 stats.p99_us,
3442 stats.mean_us,
3443 stats.variance_us
3444 );
3445 let budget = coverage_budget_us(SINGLE_SCORE_BUDGET_US);
3446 assert!(
3447 stats.p50_us <= budget,
3448 "Single score p50 = {}µs exceeds budget {}µs",
3449 stats.p50_us,
3450 budget,
3451 );
3452 }
3453
3454 #[test]
3455 fn perf_corpus_100_under_budget() {
3456 let scorer = BayesianScorer::fast();
3457 let corpus = make_corpus(100);
3458 let stats = measure_stats_us(50, || {
3459 let mut results: Vec<_> = corpus
3460 .iter()
3461 .map(|t| scorer.score("git co", t))
3462 .filter(|r| r.score > 0.0)
3463 .collect();
3464 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3465 std::hint::black_box(&results);
3466 });
3467 eprintln!(
3468 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3469 stats.samples,
3470 stats.p50_us,
3471 stats.p95_us,
3472 stats.p99_us,
3473 stats.mean_us,
3474 stats.variance_us
3475 );
3476 let budget = coverage_budget_us(CORPUS_100_BUDGET_US);
3477 assert!(
3478 stats.p95_us <= budget,
3479 "100-item corpus p95 = {}µs exceeds budget {}µs",
3480 stats.p95_us,
3481 budget,
3482 );
3483 }
3484
3485 #[test]
3486 fn perf_corpus_1000_under_budget() {
3487 let scorer = BayesianScorer::fast();
3488 let corpus = make_corpus(1_000);
3489 let stats = measure_stats_us(20, || {
3490 let mut results: Vec<_> = corpus
3491 .iter()
3492 .map(|t| scorer.score("git co", t))
3493 .filter(|r| r.score > 0.0)
3494 .collect();
3495 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3496 std::hint::black_box(&results);
3497 });
3498 eprintln!(
3499 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3500 stats.samples,
3501 stats.p50_us,
3502 stats.p95_us,
3503 stats.p99_us,
3504 stats.mean_us,
3505 stats.variance_us
3506 );
3507 let budget = coverage_budget_us(CORPUS_1000_BUDGET_US);
3508 assert!(
3509 stats.p95_us <= budget,
3510 "1000-item corpus p95 = {}µs exceeds budget {}µs",
3511 stats.p95_us,
3512 budget,
3513 );
3514 }
3515
3516 #[test]
3517 fn perf_corpus_5000_under_budget() {
3518 let scorer = BayesianScorer::fast();
3519 let corpus = make_corpus(5_000);
3520 let stats = measure_stats_us(10, || {
3521 let mut results: Vec<_> = corpus
3522 .iter()
3523 .map(|t| scorer.score("git co", t))
3524 .filter(|r| r.score > 0.0)
3525 .collect();
3526 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3527 std::hint::black_box(&results);
3528 });
3529 eprintln!(
3530 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_5000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3531 stats.samples,
3532 stats.p50_us,
3533 stats.p95_us,
3534 stats.p99_us,
3535 stats.mean_us,
3536 stats.variance_us
3537 );
3538 let budget = coverage_budget_us(CORPUS_5000_BUDGET_US);
3539 assert!(
3540 stats.p95_us <= budget,
3541 "5000-item corpus p95 = {}µs exceeds budget {}µs",
3542 stats.p95_us,
3543 budget,
3544 );
3545 }
3546
3547 #[test]
3548 fn perf_incremental_7key_100_under_budget() {
3549 let corpus = make_corpus(100);
3550 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3551 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3552
3553 let stats = measure_stats_us(30, || {
3554 let mut inc = IncrementalScorer::new();
3555 for query in &queries {
3556 let results = inc.score_corpus(query, &corpus_refs, None);
3557 std::hint::black_box(&results);
3558 }
3559 });
3560 eprintln!(
3561 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3562 stats.samples,
3563 stats.p50_us,
3564 stats.p95_us,
3565 stats.p99_us,
3566 stats.mean_us,
3567 stats.variance_us
3568 );
3569 let budget = coverage_budget_us(INCREMENTAL_7KEY_100_BUDGET_US);
3570 assert!(
3571 stats.p95_us <= budget,
3572 "Incremental 7-key 100-item p95 = {}µs exceeds budget {}µs",
3573 stats.p95_us,
3574 budget,
3575 );
3576 }
3577
3578 #[test]
3579 fn perf_incremental_7key_1000_under_budget() {
3580 let corpus = make_corpus(1_000);
3581 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3582 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3583
3584 let stats = measure_stats_us(10, || {
3585 let mut inc = IncrementalScorer::new();
3586 for query in &queries {
3587 let results = inc.score_corpus(query, &corpus_refs, None);
3588 std::hint::black_box(&results);
3589 }
3590 });
3591 eprintln!(
3592 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3593 stats.samples,
3594 stats.p50_us,
3595 stats.p95_us,
3596 stats.p99_us,
3597 stats.mean_us,
3598 stats.variance_us
3599 );
3600 let budget = coverage_budget_us(INCREMENTAL_7KEY_1000_BUDGET_US);
3601 assert!(
3602 stats.p95_us <= budget,
3603 "Incremental 7-key 1000-item p95 = {}µs exceeds budget {}µs",
3604 stats.p95_us,
3605 budget,
3606 );
3607 }
3608
3609 #[test]
3610 fn perf_incremental_faster_than_naive() {
3611 let corpus = make_corpus(100);
3612 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3613 let scorer = BayesianScorer::fast();
3614 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3615
3616 let naive_stats = measure_stats_us(30, || {
3617 for query in &queries {
3618 let mut results: Vec<_> = corpus
3619 .iter()
3620 .map(|t| scorer.score(query, t))
3621 .filter(|r| r.score > 0.0)
3622 .collect();
3623 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3624 std::hint::black_box(&results);
3625 }
3626 });
3627
3628 let inc_stats = measure_stats_us(30, || {
3629 let mut inc = IncrementalScorer::new();
3630 for query in &queries {
3631 let results = inc.score_corpus(query, &corpus_refs, None);
3632 std::hint::black_box(&results);
3633 }
3634 });
3635 eprintln!(
3636 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_vs_naive\",\"samples\":{},\"naive_p50_us\":{},\"naive_p95_us\":{},\"inc_p50_us\":{},\"inc_p95_us\":{}}}",
3637 naive_stats.samples,
3638 naive_stats.p50_us,
3639 naive_stats.p95_us,
3640 inc_stats.p50_us,
3641 inc_stats.p95_us
3642 );
3643
3644 assert!(
3647 inc_stats.p50_us <= naive_stats.p50_us * 2 + 50, "Incremental p50 = {}µs is >2x slower than naive p50 = {}µs",
3649 inc_stats.p50_us,
3650 naive_stats.p50_us,
3651 );
3652 }
3653
3654 fn scaling_ratio_us(p50_small_us: u64, p50_large_us: u64) -> f64 {
3655 if p50_small_us > 0 {
3656 p50_large_us as f64 / p50_small_us as f64
3657 } else {
3658 0.0
3659 }
3660 }
3661
3662 #[test]
3663 fn scaling_ratio_handles_zero_divisor() {
3664 assert_eq!(scaling_ratio_us(0, 123), 0.0);
3665 assert_eq!(scaling_ratio_us(10, 25), 2.5);
3666 }
3667
3668 #[test]
3669 fn perf_scaling_sublinear() {
3670 let scorer = BayesianScorer::fast();
3671 let corpus_100 = make_corpus(100);
3672 let corpus_1000 = make_corpus(1_000);
3673
3674 let stats_100 = measure_stats_us(30, || {
3675 let mut results: Vec<_> = corpus_100
3676 .iter()
3677 .map(|t| scorer.score("git", t))
3678 .filter(|r| r.score > 0.0)
3679 .collect();
3680 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3681 std::hint::black_box(&results);
3682 });
3683
3684 let stats_1000 = measure_stats_us(20, || {
3685 let mut results: Vec<_> = corpus_1000
3686 .iter()
3687 .map(|t| scorer.score("git", t))
3688 .filter(|r| r.score > 0.0)
3689 .collect();
3690 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3691 std::hint::black_box(&results);
3692 });
3693 eprintln!(
3694 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_scaling\",\"samples_100\":{},\"p50_100_us\":{},\"samples_1000\":{},\"p50_1000_us\":{}}}",
3695 stats_100.samples, stats_100.p50_us, stats_1000.samples, stats_1000.p50_us
3696 );
3697
3698 let ratio = scaling_ratio_us(stats_100.p50_us, stats_1000.p50_us);
3701 assert!(
3702 ratio < 25.0,
3703 "1000/100 scaling ratio = {:.1}x exceeds 25x threshold (100: {}µs, 1000: {}µs)",
3704 ratio,
3705 stats_100.p50_us,
3706 stats_1000.p50_us,
3707 );
3708 }
3709}