1#![forbid(unsafe_code)]
2
3use std::cmp::Ordering;
47use std::fmt;
48
49#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
55pub enum MatchType {
56 NoMatch,
58 Fuzzy,
60 Substring,
62 WordStart,
64 Prefix,
66 Exact,
68}
69
70impl MatchType {
71 pub fn prior_odds(self) -> f64 {
78 match self {
79 Self::Exact => 99.0, Self::Prefix => 9.0, Self::WordStart => 4.0, Self::Substring => 2.0, Self::Fuzzy => 0.333, Self::NoMatch => 0.0, }
86 }
87
88 pub fn description(self) -> &'static str {
90 match self {
91 Self::Exact => "exact match",
92 Self::Prefix => "prefix match",
93 Self::WordStart => "word-start match",
94 Self::Substring => "contiguous substring",
95 Self::Fuzzy => "fuzzy match",
96 Self::NoMatch => "no match",
97 }
98 }
99}
100
101fn compare_ranked_match_results(
102 left: &(usize, MatchResult),
103 right: &(usize, MatchResult),
104) -> Ordering {
105 right
106 .1
107 .score
108 .partial_cmp(&left.1.score)
109 .unwrap_or(Ordering::Equal)
110 .then_with(|| right.1.match_type.cmp(&left.1.match_type))
111}
112
113#[derive(Debug, Clone)]
119pub struct EvidenceEntry {
120 pub kind: EvidenceKind,
122 pub bayes_factor: f64,
125 pub description: EvidenceDescription,
127}
128
129#[derive(Debug, Clone, Copy, PartialEq, Eq)]
131pub enum EvidenceKind {
132 MatchType,
134 WordBoundary,
136 Position,
138 GapPenalty,
140 TagMatch,
142 TitleLength,
144}
145
146impl fmt::Display for EvidenceEntry {
147 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148 let direction = if self.bayes_factor > 1.0 {
149 "supports"
150 } else if self.bayes_factor < 1.0 {
151 "opposes"
152 } else {
153 "neutral"
154 };
155 write!(
156 f,
157 "{:?}: BF={:.2} ({}) - {}",
158 self.kind, self.bayes_factor, direction, self.description
159 )
160 }
161}
162
163#[derive(Debug, Clone)]
165pub enum EvidenceDescription {
166 Static(&'static str),
167 TitleLengthChars { len: usize },
168 FirstMatchPos { pos: usize },
169 WordBoundaryCount { count: usize },
170 GapTotal { total: usize },
171 CoveragePercent { percent: f64 },
172}
173
174impl fmt::Display for EvidenceDescription {
175 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176 match self {
177 Self::Static(msg) => write!(f, "{msg}"),
178 Self::TitleLengthChars { len } => write!(f, "title length {} chars", len),
179 Self::FirstMatchPos { pos } => write!(f, "first match at position {}", pos),
180 Self::WordBoundaryCount { count } => write!(f, "{} word boundary matches", count),
181 Self::GapTotal { total } => write!(f, "total gap of {} characters", total),
182 Self::CoveragePercent { percent } => write!(f, "query covers {:.0}% of title", percent),
183 }
184 }
185}
186
187#[derive(Debug, Clone, Default)]
196pub struct EvidenceLedger {
197 entries: Vec<EvidenceEntry>,
198}
199
200impl EvidenceLedger {
201 pub fn new() -> Self {
203 Self::default()
204 }
205
206 pub fn add(&mut self, kind: EvidenceKind, bayes_factor: f64, description: EvidenceDescription) {
208 self.entries.push(EvidenceEntry {
209 kind,
210 bayes_factor,
211 description,
212 });
213 }
214
215 pub fn entries(&self) -> &[EvidenceEntry] {
217 &self.entries
218 }
219
220 pub fn combined_bayes_factor(&self) -> f64 {
222 self.entries.iter().map(|e| e.bayes_factor).product()
223 }
224
225 #[must_use = "use the prior odds (if any) for diagnostics"]
227 pub fn prior_odds(&self) -> Option<f64> {
228 self.entries
229 .iter()
230 .find(|e| e.kind == EvidenceKind::MatchType)
231 .map(|e| e.bayes_factor)
232 }
233
234 pub fn posterior_probability(&self) -> f64 {
239 let prior = self.prior_odds().unwrap_or(1.0);
240 let bf: f64 = self
242 .entries
243 .iter()
244 .filter(|e| e.kind != EvidenceKind::MatchType)
245 .map(|e| e.bayes_factor)
246 .product();
247
248 let posterior_odds = prior * bf;
249 if posterior_odds.is_infinite() {
250 1.0
251 } else {
252 posterior_odds / (1.0 + posterior_odds)
253 }
254 }
255}
256
257impl EvidenceLedger {
258 #[must_use]
260 pub fn to_jsonl(&self) -> String {
261 let entries_json: Vec<String> = self
262 .entries
263 .iter()
264 .map(|e| {
265 format!(
266 r#"{{"kind":"{:?}","bf":{:.4},"desc":"{}"}}"#,
267 e.kind, e.bayes_factor, e.description
268 )
269 })
270 .collect();
271 format!(
272 r#"{{"schema":"palette-scoring-v1","entries":[{}],"combined_bf":{:.6},"posterior_prob":{:.6}}}"#,
273 entries_json.join(","),
274 self.combined_bayes_factor(),
275 self.posterior_probability(),
276 )
277 }
278}
279
280impl fmt::Display for EvidenceLedger {
281 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282 writeln!(f, "Evidence Ledger:")?;
283 for entry in &self.entries {
284 writeln!(f, " {}", entry)?;
285 }
286 writeln!(f, " Combined BF: {:.3}", self.combined_bayes_factor())?;
287 writeln!(f, " Posterior P: {:.3}", self.posterior_probability())?;
288 Ok(())
289 }
290}
291
292#[derive(Debug, Clone)]
298pub struct MatchResult {
299 pub score: f64,
301 pub match_type: MatchType,
303 pub match_positions: Vec<usize>,
305 pub evidence: EvidenceLedger,
307}
308
309impl MatchResult {
310 pub fn no_match() -> Self {
312 let mut evidence = EvidenceLedger::new();
313 evidence.add(
314 EvidenceKind::MatchType,
315 0.0,
316 EvidenceDescription::Static("no matching characters found"),
317 );
318 Self {
319 score: 0.0,
320 match_type: MatchType::NoMatch,
321 match_positions: Vec::new(),
322 evidence,
323 }
324 }
325}
326
327#[derive(Debug, Clone, Default)]
336pub struct BayesianScorer {
337 pub track_evidence: bool,
339}
340
341impl BayesianScorer {
342 pub fn new() -> Self {
344 Self {
345 track_evidence: true,
346 }
347 }
348
349 pub fn fast() -> Self {
351 Self {
352 track_evidence: false,
353 }
354 }
355
356 pub fn score(&self, query: &str, title: &str) -> MatchResult {
360 if query.len() > title.len() {
362 return MatchResult::no_match();
363 }
364
365 if query.is_empty() {
367 return self.score_empty_query(title);
368 }
369
370 let query_lower = query.to_lowercase();
372 let title_lower = title.to_lowercase();
373
374 let (match_type, positions) = self.detect_match_type(&query_lower, &title_lower, title);
376
377 if match_type == MatchType::NoMatch {
378 return MatchResult::no_match();
379 }
380
381 self.compute_score(match_type, positions, &query_lower, title)
383 }
384
385 pub fn score_with_query_lower(
389 &self,
390 query: &str,
391 query_lower: &str,
392 title: &str,
393 ) -> MatchResult {
394 let title_lower = title.to_lowercase();
395 self.score_with_lowered_title(query, query_lower, title, &title_lower)
396 }
397
398 pub fn score_with_lowered_title(
402 &self,
403 query: &str,
404 query_lower: &str,
405 title: &str,
406 title_lower: &str,
407 ) -> MatchResult {
408 self.score_with_lowered_title_and_words(query, query_lower, title, title_lower, None)
409 }
410
411 pub fn score_with_lowered_title_and_words(
413 &self,
414 query: &str,
415 query_lower: &str,
416 title: &str,
417 title_lower: &str,
418 word_starts: Option<&[usize]>,
419 ) -> MatchResult {
420 if query.len() > title.len() {
422 return MatchResult::no_match();
423 }
424
425 if query.is_empty() {
427 return self.score_empty_query(title);
428 }
429
430 let (match_type, positions) =
432 self.detect_match_type_with_words(query_lower, title_lower, title, word_starts);
433
434 if match_type == MatchType::NoMatch {
435 return MatchResult::no_match();
436 }
437
438 self.compute_score(match_type, positions, query_lower, title)
440 }
441
442 pub fn score_with_tags(&self, query: &str, title: &str, tags: &[&str]) -> MatchResult {
444 let mut result = self.score(query, title);
445
446 let query_lower = query.to_lowercase();
448 let tag_match = tags
449 .iter()
450 .any(|tag| crate::contains_ignore_case(tag, &query_lower));
451
452 if tag_match && result.match_type != MatchType::NoMatch {
453 if self.track_evidence {
455 result.evidence.add(
456 EvidenceKind::TagMatch,
457 3.0, EvidenceDescription::Static("query matches tag"),
459 );
460 result.score = result.evidence.posterior_probability();
461 } else if (0.0..1.0).contains(&result.score) {
462 let odds = result.score / (1.0 - result.score);
463 let boosted = odds * 3.0;
464 result.score = boosted / (1.0 + boosted);
465 }
466 }
467
468 result
469 }
470
471 fn score_empty_query(&self, title: &str) -> MatchResult {
473 let length_factor = 1.0 + (1.0 / (title.len() as f64 + 1.0)) * 0.1;
475 if self.track_evidence {
476 let mut evidence = EvidenceLedger::new();
477 evidence.add(
478 EvidenceKind::MatchType,
479 1.0, EvidenceDescription::Static("empty query matches all"),
481 );
482 evidence.add(
483 EvidenceKind::TitleLength,
484 length_factor,
485 EvidenceDescription::TitleLengthChars { len: title.len() },
486 );
487 let score = evidence.posterior_probability();
488 MatchResult {
489 score,
490 match_type: MatchType::Fuzzy, match_positions: Vec::new(),
492 evidence,
493 }
494 } else {
495 let odds = length_factor;
496 let score = odds / (1.0 + odds);
497 MatchResult {
498 score,
499 match_type: MatchType::Fuzzy,
500 match_positions: Vec::new(),
501 evidence: EvidenceLedger::new(),
502 }
503 }
504 }
505
506 fn detect_match_type(
508 &self,
509 query_lower: &str,
510 title_lower: &str,
511 title: &str,
512 ) -> (MatchType, Vec<usize>) {
513 self.detect_match_type_with_words(query_lower, title_lower, title, None)
514 }
515
516 fn detect_match_type_with_words(
518 &self,
519 query_lower: &str,
520 title_lower: &str,
521 title: &str,
522 word_starts: Option<&[usize]>,
523 ) -> (MatchType, Vec<usize>) {
524 if query_lower.is_ascii() && title_lower.is_ascii() {
525 return self.detect_match_type_ascii(query_lower, title_lower, word_starts);
526 }
527
528 if query_lower == title_lower {
530 let positions: Vec<usize> = (0..title.chars().count()).collect();
531 return (MatchType::Exact, positions);
532 }
533
534 if title_lower.starts_with(query_lower) {
536 let positions: Vec<usize> = (0..query_lower.chars().count()).collect();
537 return (MatchType::Prefix, positions);
538 }
539
540 if let Some(positions) = self.word_start_match(query_lower, title_lower) {
542 return (MatchType::WordStart, positions);
543 }
544
545 if let Some(byte_start) = title_lower.find(query_lower) {
547 let char_start = title_lower[..byte_start].chars().count();
548 let char_len = query_lower.chars().count();
549 let positions: Vec<usize> = (char_start..char_start + char_len).collect();
550 return (MatchType::Substring, positions);
551 }
552
553 if let Some(positions) = self.fuzzy_match(query_lower, title_lower) {
555 return (MatchType::Fuzzy, positions);
556 }
557
558 (MatchType::NoMatch, Vec::new())
559 }
560
561 fn detect_match_type_ascii(
563 &self,
564 query_lower: &str,
565 title_lower: &str,
566 word_starts: Option<&[usize]>,
567 ) -> (MatchType, Vec<usize>) {
568 let query_bytes = query_lower.as_bytes();
569 let title_bytes = title_lower.as_bytes();
570
571 if query_bytes == title_bytes {
572 let positions: Vec<usize> = (0..title_bytes.len()).collect();
573 return (MatchType::Exact, positions);
574 }
575
576 if title_bytes.starts_with(query_bytes) {
577 let positions: Vec<usize> = (0..query_bytes.len()).collect();
578 return (MatchType::Prefix, positions);
579 }
580
581 if let Some(positions) = self.word_start_match_ascii(query_bytes, title_bytes, word_starts)
582 {
583 return (MatchType::WordStart, positions);
584 }
585
586 if let Some(start) = title_lower.find(query_lower) {
587 let positions: Vec<usize> = (start..start + query_bytes.len()).collect();
588 return (MatchType::Substring, positions);
589 }
590
591 if let Some(positions) = self.fuzzy_match_ascii(query_bytes, title_bytes) {
592 return (MatchType::Fuzzy, positions);
593 }
594
595 (MatchType::NoMatch, Vec::new())
596 }
597
598 fn word_start_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
600 let mut positions = Vec::new();
601 let mut query_chars = query.chars().peekable();
602
603 let title_bytes = title.as_bytes();
604 for (char_index, (i, c)) in title.char_indices().enumerate() {
605 let is_word_start = i == 0 || {
607 let prev = title_bytes
608 .get(i.saturating_sub(1))
609 .copied()
610 .unwrap_or(b' ');
611 prev == b' ' || prev == b'-' || prev == b'_'
612 };
613
614 if is_word_start
615 && let Some(&qc) = query_chars.peek()
616 && c == qc
617 {
618 positions.push(char_index);
619 query_chars.next();
620 }
621 }
622
623 if query_chars.peek().is_none() {
624 Some(positions)
625 } else {
626 None
627 }
628 }
629
630 fn word_start_match_ascii(
632 &self,
633 query: &[u8],
634 title: &[u8],
635 word_starts: Option<&[usize]>,
636 ) -> Option<Vec<usize>> {
637 let mut positions = Vec::new();
638 let mut query_idx = 0;
639 if query.is_empty() {
640 return Some(positions);
641 }
642
643 if let Some(starts) = word_starts {
644 for &pos in starts {
645 if pos >= title.len() {
646 continue;
647 }
648 if title[pos] == query[query_idx] {
649 positions.push(pos);
650 query_idx += 1;
651 if query_idx == query.len() {
652 return Some(positions);
653 }
654 }
655 }
656 } else {
657 for (i, &b) in title.iter().enumerate() {
658 let is_word_start = i == 0 || matches!(title[i - 1], b' ' | b'-' | b'_');
659 if is_word_start && b == query[query_idx] {
660 positions.push(i);
661 query_idx += 1;
662 if query_idx == query.len() {
663 return Some(positions);
664 }
665 }
666 }
667 }
668
669 None
670 }
671
672 fn fuzzy_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
674 let mut positions = Vec::new();
675 let mut query_chars = query.chars().peekable();
676
677 for (char_index, c) in title.chars().enumerate() {
678 if let Some(&qc) = query_chars.peek()
679 && c == qc
680 {
681 positions.push(char_index);
682 query_chars.next();
683 }
684 }
685
686 if query_chars.peek().is_none() {
687 Some(positions)
688 } else {
689 None
690 }
691 }
692
693 fn fuzzy_match_ascii(&self, query: &[u8], title: &[u8]) -> Option<Vec<usize>> {
695 let mut positions = Vec::new();
696 let mut query_idx = 0;
697 if query.is_empty() {
698 return Some(positions);
699 }
700
701 for (i, &b) in title.iter().enumerate() {
702 if b == query[query_idx] {
703 positions.push(i);
704 query_idx += 1;
705 if query_idx == query.len() {
706 return Some(positions);
707 }
708 }
709 }
710
711 None
712 }
713
714 fn compute_score(
716 &self,
717 match_type: MatchType,
718 positions: Vec<usize>,
719 query: &str,
720 title: &str,
721 ) -> MatchResult {
722 let positions_ref = positions.as_slice();
723 if !self.track_evidence {
724 let mut combined_bf = match_type.prior_odds();
725
726 if let Some(&first_pos) = positions_ref.first() {
727 let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
728 combined_bf *= position_factor;
729 }
730
731 let word_boundary_count = self.count_word_boundaries(positions_ref, title);
732 if word_boundary_count > 0 {
733 let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
734 combined_bf *= boundary_factor;
735 }
736
737 if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
738 let total_gap = self.total_gap(positions_ref);
739 let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
740 combined_bf *= gap_factor;
741 }
742
743 let title_len = title.len().max(1);
744 let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
745 combined_bf *= length_factor;
746
747 let score = combined_bf / (1.0 + combined_bf);
748 return MatchResult {
749 score,
750 match_type,
751 match_positions: positions,
752 evidence: EvidenceLedger::new(),
753 };
754 }
755
756 let mut evidence = EvidenceLedger::new();
757
758 let prior_odds = match_type.prior_odds();
760 evidence.add(
761 EvidenceKind::MatchType,
762 prior_odds,
763 EvidenceDescription::Static(match_type.description()),
764 );
765
766 if let Some(&first_pos) = positions_ref.first() {
768 let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
769 evidence.add(
770 EvidenceKind::Position,
771 position_factor,
772 EvidenceDescription::FirstMatchPos { pos: first_pos },
773 );
774 }
775
776 let word_boundary_count = self.count_word_boundaries(positions_ref, title);
778 if word_boundary_count > 0 {
779 let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
780 evidence.add(
781 EvidenceKind::WordBoundary,
782 boundary_factor,
783 EvidenceDescription::WordBoundaryCount {
784 count: word_boundary_count,
785 },
786 );
787 }
788
789 if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
791 let total_gap = self.total_gap(positions_ref);
792 let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
793 evidence.add(
794 EvidenceKind::GapPenalty,
795 gap_factor,
796 EvidenceDescription::GapTotal { total: total_gap },
797 );
798 }
799
800 let title_len = title.len().max(1);
802 let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
803 evidence.add(
804 EvidenceKind::TitleLength,
805 length_factor,
806 EvidenceDescription::CoveragePercent {
807 percent: (query.len() as f64 / title_len as f64) * 100.0,
808 },
809 );
810
811 let score = evidence.posterior_probability();
812
813 MatchResult {
814 score,
815 match_type,
816 match_positions: positions,
817 evidence,
818 }
819 }
820
821 fn count_word_boundaries(&self, positions: &[usize], title: &str) -> usize {
823 let title_bytes = title.as_bytes();
824 let mut count = 0;
825 for &pos in positions {
826 let is_boundary = if pos == 0 {
827 true
828 } else {
829 let prev = title_bytes
830 .get(pos.saturating_sub(1))
831 .copied()
832 .unwrap_or(b' ');
833 prev == b' ' || prev == b'-' || prev == b'_'
834 };
835 if is_boundary {
836 count += 1;
837 }
838 }
839 count
840 }
841
842 fn total_gap(&self, positions: &[usize]) -> usize {
844 if positions.len() < 2 {
845 return 0;
846 }
847 let mut total = 0;
848 for window in positions.windows(2) {
849 total += window[1].saturating_sub(window[0]).saturating_sub(1);
850 }
851 total
852 }
853}
854
855#[derive(Debug, Clone)]
871pub struct RankConfidence {
872 pub confidence: f64,
876
877 pub gap_to_next: f64,
879
880 pub stability: RankStability,
882}
883
884#[derive(Debug, Clone, Copy, PartialEq, Eq)]
886pub enum RankStability {
887 Stable,
889 Marginal,
891 Unstable,
893}
894
895impl RankStability {
896 pub fn label(self) -> &'static str {
898 match self {
899 Self::Stable => "stable",
900 Self::Marginal => "marginal",
901 Self::Unstable => "unstable",
902 }
903 }
904}
905
906#[derive(Debug, Clone)]
908pub struct RankedResults {
909 pub items: Vec<RankedItem>,
911
912 pub summary: RankingSummary,
914}
915
916#[derive(Debug, Clone)]
918pub struct RankedItem {
919 pub original_index: usize,
921
922 pub result: MatchResult,
924
925 pub rank_confidence: RankConfidence,
927}
928
929#[derive(Debug, Clone)]
931pub struct RankingSummary {
932 pub count: usize,
934
935 pub stable_count: usize,
937
938 pub tie_group_count: usize,
940
941 pub median_gap: f64,
943}
944
945#[derive(Debug, Clone)]
973pub struct ConformalRanker {
974 pub tie_epsilon: f64,
976
977 pub stable_threshold: f64,
979
980 pub marginal_threshold: f64,
982}
983
984impl Default for ConformalRanker {
985 fn default() -> Self {
986 Self {
987 tie_epsilon: 1e-9,
988 stable_threshold: 0.7,
989 marginal_threshold: 0.3,
990 }
991 }
992}
993
994impl ConformalRanker {
995 pub fn new() -> Self {
997 Self::default()
998 }
999
1000 pub fn rank(&self, results: Vec<MatchResult>) -> RankedResults {
1005 let count = results.len();
1006
1007 if count == 0 {
1008 return RankedResults {
1009 items: Vec::new(),
1010 summary: RankingSummary {
1011 count: 0,
1012 stable_count: 0,
1013 tie_group_count: 0,
1014 median_gap: 0.0,
1015 },
1016 };
1017 }
1018
1019 let mut indexed: Vec<(usize, MatchResult)> = results.into_iter().enumerate().collect();
1023 indexed.sort_by(|a, b| {
1024 b.1.score
1025 .partial_cmp(&a.1.score)
1026 .unwrap_or(std::cmp::Ordering::Equal)
1027 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1028 });
1029
1030 let gaps: Vec<f64> = if count > 1 {
1032 indexed
1033 .windows(2)
1034 .map(|w| (w[0].1.score - w[1].1.score).max(0.0))
1035 .collect()
1036 } else {
1037 Vec::new()
1038 };
1039
1040 let mut sorted_gaps = gaps.clone();
1042 sorted_gaps.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1043
1044 let mut items: Vec<RankedItem> = Vec::with_capacity(count);
1046 let mut stable_count = 0;
1047 let mut tie_group_count = 0;
1048 let mut in_tie_group = false;
1049
1050 for (rank, (orig_idx, result)) in indexed.into_iter().enumerate() {
1051 let gap_to_next = gaps.get(rank).copied().unwrap_or(0.0);
1052
1053 let confidence = if sorted_gaps.is_empty() {
1055 1.0
1057 } else {
1058 let leq_count =
1059 sorted_gaps.partition_point(|&g| g <= gap_to_next + self.tie_epsilon * 0.5);
1060 leq_count as f64 / sorted_gaps.len() as f64
1061 };
1062
1063 let is_tie = gap_to_next < self.tie_epsilon;
1064 let stability = if is_tie {
1065 if !in_tie_group {
1066 tie_group_count += 1;
1067 in_tie_group = true;
1068 }
1069 RankStability::Unstable
1070 } else {
1071 in_tie_group = false;
1072 if confidence >= self.stable_threshold {
1073 stable_count += 1;
1074 RankStability::Stable
1075 } else if confidence >= self.marginal_threshold {
1076 RankStability::Marginal
1077 } else {
1078 RankStability::Unstable
1079 }
1080 };
1081
1082 items.push(RankedItem {
1083 original_index: orig_idx,
1084 result,
1085 rank_confidence: RankConfidence {
1086 confidence,
1087 gap_to_next,
1088 stability,
1089 },
1090 });
1091 }
1092
1093 let median_gap = if sorted_gaps.is_empty() {
1094 0.0
1095 } else {
1096 let mid = sorted_gaps.len() / 2;
1097 if sorted_gaps.len().is_multiple_of(2) {
1098 (sorted_gaps[mid - 1] + sorted_gaps[mid]) / 2.0
1099 } else {
1100 sorted_gaps[mid]
1101 }
1102 };
1103
1104 RankedResults {
1105 items,
1106 summary: RankingSummary {
1107 count,
1108 stable_count,
1109 tie_group_count,
1110 median_gap,
1111 },
1112 }
1113 }
1114
1115 pub fn rank_top_k(&self, results: Vec<MatchResult>, k: usize) -> RankedResults {
1120 let mut ranked = self.rank(results);
1121 ranked.items.truncate(k);
1122 ranked.summary.stable_count = ranked
1124 .items
1125 .iter()
1126 .filter(|item| item.rank_confidence.stability == RankStability::Stable)
1127 .count();
1128 ranked
1129 }
1130}
1131
1132impl fmt::Display for RankConfidence {
1133 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1134 write!(
1135 f,
1136 "confidence={:.3} gap={:.4} ({})",
1137 self.confidence,
1138 self.gap_to_next,
1139 self.stability.label()
1140 )
1141 }
1142}
1143
1144impl fmt::Display for RankingSummary {
1145 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1146 write!(
1147 f,
1148 "{} items, {} stable, {} tie groups, median gap {:.4}",
1149 self.count, self.stable_count, self.tie_group_count, self.median_gap
1150 )
1151 }
1152}
1153
1154#[derive(Debug, Clone)]
1160#[allow(dead_code)]
1161struct CachedEntry {
1162 corpus_index: usize,
1164}
1165
1166#[derive(Debug)]
1194pub struct IncrementalScorer {
1195 scorer: BayesianScorer,
1197 prev_query: String,
1199 cache: Vec<CachedEntry>,
1201 corpus_generation: u64,
1203 corpus_len: usize,
1205 stats: IncrementalStats,
1207}
1208
1209#[derive(Debug, Clone, Default)]
1211pub struct IncrementalStats {
1212 pub full_scans: u64,
1214 pub incremental_scans: u64,
1216 pub total_evaluated: u64,
1218 pub total_pruned: u64,
1220}
1221
1222impl IncrementalStats {
1223 pub fn prune_ratio(&self) -> f64 {
1225 let total = self.total_evaluated + self.total_pruned;
1226 if total == 0 {
1227 0.0
1228 } else {
1229 self.total_pruned as f64 / total as f64
1230 }
1231 }
1232}
1233
1234impl IncrementalScorer {
1235 pub fn new() -> Self {
1237 Self {
1238 scorer: BayesianScorer::fast(),
1239 prev_query: String::new(),
1240 cache: Vec::new(),
1241 corpus_generation: 0,
1242 corpus_len: 0,
1243 stats: IncrementalStats::default(),
1244 }
1245 }
1246
1247 pub fn with_scorer(scorer: BayesianScorer) -> Self {
1249 Self {
1250 scorer,
1251 prev_query: String::new(),
1252 cache: Vec::new(),
1253 corpus_generation: 0,
1254 corpus_len: 0,
1255 stats: IncrementalStats::default(),
1256 }
1257 }
1258
1259 pub fn invalidate(&mut self) {
1261 self.prev_query.clear();
1262 self.cache.clear();
1263 self.corpus_generation = self.corpus_generation.wrapping_add(1);
1264 }
1265
1266 pub fn stats(&self) -> &IncrementalStats {
1268 &self.stats
1269 }
1270
1271 pub fn score_corpus(
1283 &mut self,
1284 query: &str,
1285 corpus: &[&str],
1286 generation: Option<u64>,
1287 ) -> Vec<(usize, MatchResult)> {
1288 let generation_val = generation.unwrap_or(self.corpus_generation);
1290 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1291 self.invalidate();
1292 self.corpus_generation = generation_val;
1293 self.corpus_len = corpus.len();
1294 }
1295
1296 let can_prune = !self.prev_query.is_empty()
1298 && query.starts_with(&self.prev_query)
1299 && !self.cache.is_empty();
1300
1301 let query_lower = query.to_lowercase();
1302 let results = if can_prune {
1303 self.score_incremental(query, &query_lower, corpus)
1304 } else {
1305 self.score_full(query, &query_lower, corpus)
1306 };
1307
1308 self.prev_query.clear();
1310 self.prev_query.push_str(query);
1311 self.cache = results
1312 .iter()
1313 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1314 .collect();
1315
1316 results
1317 }
1318
1319 pub fn score_corpus_with_lowered(
1323 &mut self,
1324 query: &str,
1325 corpus: &[&str],
1326 corpus_lower: &[&str],
1327 generation: Option<u64>,
1328 ) -> Vec<(usize, MatchResult)> {
1329 debug_assert_eq!(
1330 corpus.len(),
1331 corpus_lower.len(),
1332 "corpus_lower must match corpus length"
1333 );
1334
1335 let generation_val = generation.unwrap_or(self.corpus_generation);
1337 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1338 self.invalidate();
1339 self.corpus_generation = generation_val;
1340 self.corpus_len = corpus.len();
1341 }
1342
1343 let can_prune = !self.prev_query.is_empty()
1345 && query.starts_with(&self.prev_query)
1346 && !self.cache.is_empty();
1347
1348 let query_lower = query.to_lowercase();
1349 let results = if can_prune {
1350 self.score_incremental_lowered(query, &query_lower, corpus, corpus_lower)
1351 } else {
1352 self.score_full_lowered(query, &query_lower, corpus, corpus_lower)
1353 };
1354
1355 self.prev_query.clear();
1357 self.prev_query.push_str(query);
1358 self.cache = results
1359 .iter()
1360 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1361 .collect();
1362
1363 results
1364 }
1365
1366 pub fn score_corpus_with_lowered_and_words(
1370 &mut self,
1371 query: &str,
1372 corpus: &[String],
1373 corpus_lower: &[String],
1374 word_starts: &[Vec<usize>],
1375 generation: Option<u64>,
1376 ) -> Vec<(usize, MatchResult)> {
1377 debug_assert_eq!(
1378 corpus.len(),
1379 corpus_lower.len(),
1380 "corpus_lower must match corpus length"
1381 );
1382 debug_assert_eq!(
1383 corpus.len(),
1384 word_starts.len(),
1385 "word_starts must match corpus length"
1386 );
1387
1388 let generation_val = generation.unwrap_or(self.corpus_generation);
1390 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1391 self.invalidate();
1392 self.corpus_generation = generation_val;
1393 self.corpus_len = corpus.len();
1394 }
1395
1396 let can_prune = !self.prev_query.is_empty()
1398 && query.starts_with(&self.prev_query)
1399 && !self.cache.is_empty();
1400
1401 let query_lower = query.to_lowercase();
1402 let results = if can_prune {
1403 self.score_incremental_lowered_with_words(
1404 query,
1405 &query_lower,
1406 corpus,
1407 corpus_lower,
1408 word_starts,
1409 )
1410 } else {
1411 self.score_full_lowered_with_words(
1412 query,
1413 &query_lower,
1414 corpus,
1415 corpus_lower,
1416 word_starts,
1417 )
1418 };
1419
1420 self.prev_query.clear();
1422 self.prev_query.push_str(query);
1423 self.cache = results
1424 .iter()
1425 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1426 .collect();
1427
1428 results
1429 }
1430
1431 fn score_full(
1433 &mut self,
1434 query: &str,
1435 query_lower: &str,
1436 corpus: &[&str],
1437 ) -> Vec<(usize, MatchResult)> {
1438 self.stats.full_scans += 1;
1439 self.stats.total_evaluated += corpus.len() as u64;
1440
1441 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1442 for (i, title) in corpus.iter().enumerate() {
1443 let result = self
1444 .scorer
1445 .score_with_query_lower(query, query_lower, title);
1446 if result.score > 0.0 {
1447 results.push((i, result));
1448 }
1449 }
1450
1451 results.sort_unstable_by(compare_ranked_match_results);
1452
1453 results
1454 }
1455
1456 fn score_full_lowered(
1458 &mut self,
1459 query: &str,
1460 query_lower: &str,
1461 corpus: &[&str],
1462 corpus_lower: &[&str],
1463 ) -> Vec<(usize, MatchResult)> {
1464 self.stats.full_scans += 1;
1465 self.stats.total_evaluated += corpus.len() as u64;
1466
1467 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1468 for (i, (title, title_lower)) in corpus.iter().zip(corpus_lower.iter()).enumerate() {
1469 let result =
1470 self.scorer
1471 .score_with_lowered_title(query, query_lower, title, title_lower);
1472 if result.score > 0.0 {
1473 results.push((i, result));
1474 }
1475 }
1476
1477 results.sort_unstable_by(compare_ranked_match_results);
1478
1479 results
1480 }
1481
1482 fn score_full_lowered_with_words(
1484 &mut self,
1485 query: &str,
1486 query_lower: &str,
1487 corpus: &[String],
1488 corpus_lower: &[String],
1489 word_starts: &[Vec<usize>],
1490 ) -> Vec<(usize, MatchResult)> {
1491 self.stats.full_scans += 1;
1492 self.stats.total_evaluated += corpus.len() as u64;
1493
1494 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1495 for (i, ((title, title_lower), starts)) in corpus
1496 .iter()
1497 .zip(corpus_lower.iter())
1498 .zip(word_starts.iter())
1499 .enumerate()
1500 {
1501 let result = self.scorer.score_with_lowered_title_and_words(
1502 query,
1503 query_lower,
1504 title,
1505 title_lower,
1506 Some(starts),
1507 );
1508 if result.score > 0.0 {
1509 results.push((i, result));
1510 }
1511 }
1512
1513 results.sort_unstable_by(compare_ranked_match_results);
1514
1515 results
1516 }
1517 fn score_incremental(
1519 &mut self,
1520 query: &str,
1521 query_lower: &str,
1522 corpus: &[&str],
1523 ) -> Vec<(usize, MatchResult)> {
1524 self.stats.incremental_scans += 1;
1525
1526 let prev_match_count = self.cache.len();
1527 let pruned = corpus.len().saturating_sub(prev_match_count);
1528 self.stats.total_pruned += pruned as u64;
1529 self.stats.total_evaluated += prev_match_count as u64;
1530
1531 let mut results: Vec<(usize, MatchResult)> =
1532 Vec::with_capacity(self.cache.len().min(corpus.len()));
1533 for entry in &self.cache {
1534 if entry.corpus_index < corpus.len() {
1535 let result = self.scorer.score_with_query_lower(
1536 query,
1537 query_lower,
1538 corpus[entry.corpus_index],
1539 );
1540 if result.score > 0.0 {
1541 results.push((entry.corpus_index, result));
1542 }
1543 }
1544 }
1545
1546 results.sort_unstable_by(compare_ranked_match_results);
1547
1548 results
1549 }
1550
1551 fn score_incremental_lowered(
1553 &mut self,
1554 query: &str,
1555 query_lower: &str,
1556 corpus: &[&str],
1557 corpus_lower: &[&str],
1558 ) -> Vec<(usize, MatchResult)> {
1559 self.stats.incremental_scans += 1;
1560
1561 let prev_match_count = self.cache.len();
1562 let pruned = corpus.len().saturating_sub(prev_match_count);
1563 self.stats.total_pruned += pruned as u64;
1564 self.stats.total_evaluated += prev_match_count as u64;
1565
1566 let mut results: Vec<(usize, MatchResult)> =
1567 Vec::with_capacity(self.cache.len().min(corpus.len()));
1568 for entry in &self.cache {
1569 if entry.corpus_index < corpus.len() {
1570 let title = corpus[entry.corpus_index];
1571 let title_lower = corpus_lower[entry.corpus_index];
1572 let result =
1573 self.scorer
1574 .score_with_lowered_title(query, query_lower, title, title_lower);
1575 if result.score > 0.0 {
1576 results.push((entry.corpus_index, result));
1577 }
1578 }
1579 }
1580
1581 results.sort_unstable_by(compare_ranked_match_results);
1582
1583 results
1584 }
1585
1586 fn score_incremental_lowered_with_words(
1588 &mut self,
1589 query: &str,
1590 query_lower: &str,
1591 corpus: &[String],
1592 corpus_lower: &[String],
1593 word_starts: &[Vec<usize>],
1594 ) -> Vec<(usize, MatchResult)> {
1595 self.stats.incremental_scans += 1;
1596
1597 let prev_match_count = self.cache.len();
1598 let pruned = corpus.len().saturating_sub(prev_match_count);
1599 self.stats.total_pruned += pruned as u64;
1600 self.stats.total_evaluated += prev_match_count as u64;
1601
1602 let mut results: Vec<(usize, MatchResult)> =
1603 Vec::with_capacity(self.cache.len().min(corpus.len()));
1604 for entry in &self.cache {
1605 if entry.corpus_index < corpus.len() {
1606 let title = &corpus[entry.corpus_index];
1607 let title_lower = &corpus_lower[entry.corpus_index];
1608 let starts = &word_starts[entry.corpus_index];
1609 let result = self.scorer.score_with_lowered_title_and_words(
1610 query,
1611 query_lower,
1612 title,
1613 title_lower,
1614 Some(starts),
1615 );
1616 if result.score > 0.0 {
1617 results.push((entry.corpus_index, result));
1618 }
1619 }
1620 }
1621
1622 results.sort_by(|a, b| {
1623 b.1.score
1624 .partial_cmp(&a.1.score)
1625 .unwrap_or(std::cmp::Ordering::Equal)
1626 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1627 });
1628
1629 results
1630 }
1631}
1632
1633impl Default for IncrementalScorer {
1634 fn default() -> Self {
1635 Self::new()
1636 }
1637}
1638
1639#[cfg(test)]
1644mod tests {
1645 use super::*;
1646
1647 #[test]
1650 fn exact_match_highest_score() {
1651 let scorer = BayesianScorer::new();
1652 let result = scorer.score("Settings", "Settings");
1653 assert_eq!(result.match_type, MatchType::Exact);
1654 assert!(result.score > 0.95, "Exact match should score > 0.95");
1655 }
1656
1657 #[test]
1658 fn prefix_match_high_score() {
1659 let scorer = BayesianScorer::new();
1660 let result = scorer.score("set", "Settings");
1661 assert_eq!(result.match_type, MatchType::Prefix);
1662 assert!(result.score > 0.85, "Prefix match should score > 0.85");
1663 }
1664
1665 #[test]
1666 fn word_start_match_score() {
1667 let scorer = BayesianScorer::new();
1668 let result = scorer.score("gd", "Go Dashboard");
1669 assert_eq!(result.match_type, MatchType::WordStart);
1670 assert!(result.score > 0.75, "Word-start should score > 0.75");
1671 }
1672
1673 #[test]
1674 fn substring_match_moderate_score() {
1675 let scorer = BayesianScorer::new();
1676 let result = scorer.score("set", "Asset Manager");
1677 assert_eq!(result.match_type, MatchType::Substring);
1678 assert!(result.score > 0.5, "Substring should score > 0.5");
1679 }
1680
1681 #[test]
1682 fn fuzzy_match_low_score() {
1683 let scorer = BayesianScorer::new();
1684 let result = scorer.score("stg", "Settings");
1685 assert_eq!(result.match_type, MatchType::Fuzzy);
1686 assert!(result.score > 0.2, "Fuzzy should score > 0.2");
1687 assert!(result.score < 0.7, "Fuzzy should score < 0.7");
1688 }
1689
1690 #[test]
1691 fn no_match_returns_zero() {
1692 let scorer = BayesianScorer::new();
1693 let result = scorer.score("xyz", "Settings");
1694 assert_eq!(result.match_type, MatchType::NoMatch);
1695 assert_eq!(result.score, 0.0);
1696 }
1697
1698 #[test]
1701 fn word_start_match_ascii_empty_query_matches() {
1702 let scorer = BayesianScorer::new();
1703 let positions = scorer
1704 .word_start_match_ascii(b"", b"go dashboard", None)
1705 .expect("empty query should match");
1706 assert!(positions.is_empty());
1707 }
1708
1709 #[test]
1710 fn word_start_match_ascii_skips_out_of_bounds_precomputed_positions() {
1711 let scorer = BayesianScorer::new();
1712 let starts = [999, 0, 3];
1713 let positions = scorer
1714 .word_start_match_ascii(b"gd", b"go dashboard", Some(&starts))
1715 .expect("should still match despite out-of-bounds precomputed starts");
1716 assert_eq!(positions, vec![0, 3]);
1717 }
1718
1719 #[test]
1720 fn fuzzy_match_ascii_empty_query_matches() {
1721 let scorer = BayesianScorer::new();
1722 let positions = scorer
1723 .fuzzy_match_ascii(b"", b"settings")
1724 .expect("empty query should match");
1725 assert!(positions.is_empty());
1726 }
1727
1728 #[test]
1731 fn score_bounded() {
1732 let scorer = BayesianScorer::new();
1733 let test_cases = [
1734 ("a", "abcdefghijklmnop"),
1735 ("full", "full"),
1736 ("", "anything"),
1737 ("xyz", "abc"),
1738 ("stg", "Settings"),
1739 ];
1740
1741 for (query, title) in test_cases {
1742 let result = scorer.score(query, title);
1743 assert!(
1744 result.score >= 0.0 && result.score <= 1.0,
1745 "Score for ({}, {}) = {} not in [0, 1]",
1746 query,
1747 title,
1748 result.score
1749 );
1750 }
1751 }
1752
1753 #[test]
1754 fn score_deterministic() {
1755 let scorer = BayesianScorer::new();
1756 let result1 = scorer.score("nav", "Navigation");
1757 let result2 = scorer.score("nav", "Navigation");
1758 assert!(
1759 (result1.score - result2.score).abs() < f64::EPSILON,
1760 "Same input should produce identical scores"
1761 );
1762 }
1763
1764 #[test]
1765 fn tiebreak_shorter_first() {
1766 let scorer = BayesianScorer::new();
1767 let short = scorer.score("set", "Set");
1768 let long = scorer.score("set", "Settings");
1769 assert!(
1770 short.score >= long.score,
1771 "Shorter title should score >= longer: {} vs {}",
1772 short.score,
1773 long.score
1774 );
1775 }
1776
1777 #[test]
1780 fn case_insensitive() {
1781 let scorer = BayesianScorer::new();
1782 let lower = scorer.score("set", "Settings");
1783 let upper = scorer.score("SET", "Settings");
1784 assert!(
1785 (lower.score - upper.score).abs() < f64::EPSILON,
1786 "Case should not affect score"
1787 );
1788 }
1789
1790 #[test]
1793 fn match_positions_correct() {
1794 let scorer = BayesianScorer::new();
1795 let result = scorer.score("gd", "Go Dashboard");
1796 assert_eq!(result.match_positions, vec![0, 3]);
1797 }
1798
1799 #[test]
1800 fn fuzzy_match_positions() {
1801 let scorer = BayesianScorer::new();
1802 let result = scorer.score("stg", "Settings");
1803 assert_eq!(result.match_positions.len(), 3);
1805 assert_eq!(result.match_positions[0], 0); }
1807
1808 #[test]
1811 fn empty_query_returns_all() {
1812 let scorer = BayesianScorer::new();
1813 let result = scorer.score("", "Any Command");
1814 assert!(result.score > 0.0, "Empty query should have positive score");
1815 assert!(result.score < 1.0, "Empty query should not be max score");
1816 }
1817
1818 #[test]
1819 fn empty_title_is_safe() {
1820 let scorer = BayesianScorer::new();
1821 let result = scorer.score("x", "");
1822 assert_eq!(result.match_type, MatchType::NoMatch);
1823 assert!(result.score.is_finite(), "Score should remain finite");
1824 }
1825
1826 #[test]
1827 fn empty_query_empty_title_is_finite() {
1828 let scorer = BayesianScorer::new();
1829 let result = scorer.score("", "");
1830 assert!(result.score.is_finite(), "Score should remain finite");
1831 assert_eq!(result.match_type, MatchType::Fuzzy);
1832 }
1833
1834 #[test]
1837 fn query_longer_than_title() {
1838 let scorer = BayesianScorer::new();
1839 let result = scorer.score("verylongquery", "short");
1840 assert_eq!(result.match_type, MatchType::NoMatch);
1841 assert_eq!(result.score, 0.0);
1842 }
1843
1844 #[test]
1845 fn long_query_exact_match_is_handled() {
1846 let scorer = BayesianScorer::new();
1847 let query = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
1848 let result = scorer.score(query, query);
1849 assert_eq!(result.match_type, MatchType::Exact);
1850 assert!(result.score.is_finite());
1851 assert!(result.score > 0.9, "Exact long query should score high");
1852 }
1853
1854 #[test]
1855 fn gap_penalty_prefers_tight_fuzzy_matches() {
1856 let scorer = BayesianScorer::new();
1857 let tight = scorer.score("ace", "abcde");
1858 let gappy = scorer.score("ace", "a...c...e");
1859 assert_eq!(tight.match_type, MatchType::Fuzzy);
1860 assert_eq!(gappy.match_type, MatchType::Fuzzy);
1861 assert!(
1862 tight.score > gappy.score,
1863 "Tight fuzzy match should score higher than gappy: {} vs {}",
1864 tight.score,
1865 gappy.score
1866 );
1867 }
1868
1869 #[test]
1872 fn tag_match_boosts_score() {
1873 let scorer = BayesianScorer::new();
1874 let without_tag = scorer.score("set", "Settings");
1876 let with_tag = scorer.score_with_tags("set", "Settings", &["config", "setup"]);
1877 assert!(
1879 with_tag.score > without_tag.score,
1880 "Tag match should boost score: {} > {}",
1881 with_tag.score,
1882 without_tag.score
1883 );
1884 }
1885
1886 #[test]
1889 fn evidence_ledger_tracks_factors() {
1890 let scorer = BayesianScorer::new();
1891 let result = scorer.score("set", "Settings");
1892
1893 assert!(!result.evidence.entries().is_empty());
1894
1895 assert!(
1897 result
1898 .evidence
1899 .entries()
1900 .iter()
1901 .any(|e| e.kind == EvidenceKind::MatchType)
1902 );
1903 }
1904
1905 #[test]
1906 fn evidence_ledger_display() {
1907 let scorer = BayesianScorer::new();
1908 let result = scorer.score("gd", "Go Dashboard");
1909 let display = format!("{}", result.evidence);
1910 assert!(display.contains("Evidence Ledger"));
1911 assert!(display.contains("Posterior P:"));
1912 }
1913
1914 #[test]
1917 fn property_ordering_total() {
1918 let scorer = BayesianScorer::new();
1919 let titles = ["Settings", "Set Theme", "Reset View", "Asset"];
1920
1921 let mut scores: Vec<(f64, &str)> = titles
1922 .iter()
1923 .map(|t| (scorer.score("set", t).score, *t))
1924 .collect();
1925
1926 scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
1928
1929 for (score, _) in &scores {
1931 assert!(score.is_finite());
1932 }
1933 }
1934
1935 #[test]
1936 fn property_prefix_monotonic() {
1937 let scorer = BayesianScorer::new();
1938 let one_char = scorer.score("s", "Settings");
1940 let three_char = scorer.score("set", "Settings");
1941 assert!(
1943 three_char.score >= one_char.score,
1944 "Longer prefix should score >= shorter"
1945 );
1946 }
1947
1948 #[test]
1951 fn match_type_prior_ordering() {
1952 assert!(MatchType::Exact.prior_odds() > MatchType::Prefix.prior_odds());
1953 assert!(MatchType::Prefix.prior_odds() > MatchType::WordStart.prior_odds());
1954 assert!(MatchType::WordStart.prior_odds() > MatchType::Substring.prior_odds());
1955 assert!(MatchType::Substring.prior_odds() > MatchType::Fuzzy.prior_odds());
1956 assert!(MatchType::Fuzzy.prior_odds() > MatchType::NoMatch.prior_odds());
1957 }
1958
1959 #[test]
1964 fn conformal_tie_breaks_by_match_type() {
1965 let mut exact = MatchResult::no_match();
1966 exact.score = 0.5;
1967 exact.match_type = MatchType::Exact;
1968
1969 let mut prefix = MatchResult::no_match();
1970 prefix.score = 0.5;
1971 prefix.match_type = MatchType::Prefix;
1972
1973 let mut word_start = MatchResult::no_match();
1974 word_start.score = 0.5;
1975 word_start.match_type = MatchType::WordStart;
1976
1977 let mut substring = MatchResult::no_match();
1978 substring.score = 0.5;
1979 substring.match_type = MatchType::Substring;
1980
1981 let ranker = ConformalRanker::new();
1982 let ranked = ranker.rank(vec![substring, word_start, prefix, exact]);
1983 let order: Vec<MatchType> = ranked
1984 .items
1985 .iter()
1986 .map(|item| item.result.match_type)
1987 .collect();
1988
1989 assert_eq!(
1990 order,
1991 vec![
1992 MatchType::Exact,
1993 MatchType::Prefix,
1994 MatchType::WordStart,
1995 MatchType::Substring
1996 ]
1997 );
1998 }
1999
2000 #[test]
2001 fn conformal_empty_input() {
2002 let ranker = ConformalRanker::new();
2003 let ranked = ranker.rank(Vec::new());
2004 assert_eq!(ranked.items.len(), 0);
2005 assert_eq!(ranked.summary.count, 0);
2006 assert_eq!(ranked.summary.stable_count, 0);
2007 assert_eq!(ranked.summary.tie_group_count, 0);
2008 assert_eq!(ranked.summary.median_gap, 0.0);
2009 }
2010
2011 #[test]
2012 fn conformal_single_item() {
2013 let scorer = BayesianScorer::new();
2014 let results = vec![scorer.score("set", "Settings")];
2015 let ranker = ConformalRanker::new();
2016 let ranked = ranker.rank(results);
2017
2018 assert_eq!(ranked.items.len(), 1);
2019 assert_eq!(ranked.items[0].rank_confidence.confidence, 1.0);
2020 assert_eq!(ranked.summary.count, 1);
2021 }
2022
2023 #[test]
2024 fn conformal_sorted_descending() {
2025 let scorer = BayesianScorer::new();
2026 let results = vec![
2027 scorer.score("set", "Settings"),
2028 scorer.score("set", "Asset Manager"),
2029 scorer.score("set", "Reset Configuration Panel"),
2030 ];
2031 let ranker = ConformalRanker::new();
2032 let ranked = ranker.rank(results);
2033
2034 for w in ranked.items.windows(2) {
2036 let a = w[0].result.score;
2037 let b = w[1].result.score;
2038 assert!(a >= b, "Items should be sorted descending: {a} >= {b}");
2039 }
2040 }
2041
2042 #[test]
2043 fn conformal_confidence_bounded() {
2044 let scorer = BayesianScorer::new();
2045 let titles = [
2046 "Settings",
2047 "Set Theme",
2048 "Asset Manager",
2049 "Reset View",
2050 "Offset Tool",
2051 "Test Suite",
2052 ];
2053 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2054 let ranker = ConformalRanker::new();
2055 let ranked = ranker.rank(results);
2056
2057 for item in &ranked.items {
2058 assert!(
2059 item.rank_confidence.confidence >= 0.0 && item.rank_confidence.confidence <= 1.0,
2060 "Confidence must be in [0, 1], got {}",
2061 item.rank_confidence.confidence
2062 );
2063 assert!(
2064 item.rank_confidence.gap_to_next >= 0.0,
2065 "Gap must be non-negative"
2066 );
2067 }
2068 }
2069
2070 #[test]
2071 fn conformal_deterministic() {
2072 let scorer = BayesianScorer::new();
2073 let titles = ["Settings", "Set Theme", "Asset", "Reset"];
2074
2075 let results1: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2076 let results2: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2077
2078 let ranker = ConformalRanker::new();
2079 let ranked1 = ranker.rank(results1);
2080 let ranked2 = ranker.rank(results2);
2081
2082 for (a, b) in ranked1.items.iter().zip(ranked2.items.iter()) {
2083 assert!(
2084 (a.rank_confidence.confidence - b.rank_confidence.confidence).abs() < f64::EPSILON,
2085 "Confidence should be deterministic"
2086 );
2087 assert_eq!(
2088 a.original_index, b.original_index,
2089 "Rank order should be deterministic"
2090 );
2091 }
2092 }
2093
2094 #[test]
2095 fn conformal_ties_detected() {
2096 let mut r1 = MatchResult::no_match();
2098 r1.score = 0.8;
2099 r1.match_type = MatchType::Prefix;
2100 let mut r2 = MatchResult::no_match();
2101 r2.score = 0.8;
2102 r2.match_type = MatchType::Prefix;
2103 let mut r3 = MatchResult::no_match();
2104 r3.score = 0.5;
2105 r3.match_type = MatchType::Substring;
2106
2107 let ranker = ConformalRanker::new();
2108 let ranked = ranker.rank(vec![r1, r2, r3]);
2109
2110 assert_eq!(
2112 ranked.items[0].rank_confidence.stability,
2113 RankStability::Unstable,
2114 "Tied items should be Unstable"
2115 );
2116 assert!(
2117 ranked.summary.tie_group_count >= 1,
2118 "Should detect at least one tie group"
2119 );
2120 }
2121
2122 #[test]
2123 fn conformal_all_identical_scores() {
2124 let mut results = Vec::new();
2125 for _ in 0..5 {
2126 let mut r = MatchResult::no_match();
2127 r.score = 0.5;
2128 r.match_type = MatchType::Fuzzy;
2129 results.push(r);
2130 }
2131
2132 let ranker = ConformalRanker::new();
2133 let ranked = ranker.rank(results);
2134
2135 for item in &ranked.items {
2137 assert_eq!(item.rank_confidence.stability, RankStability::Unstable);
2138 }
2139 }
2140
2141 #[test]
2142 fn conformal_well_separated_scores_are_stable() {
2143 let mut results = Vec::new();
2144 for &s in &[0.9, 0.6, 0.3, 0.1] {
2146 let mut r = MatchResult::no_match();
2147 r.score = s;
2148 r.match_type = MatchType::Prefix;
2149 results.push(r);
2150 }
2151
2152 let ranker = ConformalRanker::new();
2153 let ranked = ranker.rank(results);
2154
2155 assert!(
2157 ranked.summary.stable_count >= 2,
2158 "Well-separated scores should yield stable positions, got {}",
2159 ranked.summary.stable_count
2160 );
2161 }
2162
2163 #[test]
2164 fn conformal_top_k_truncates() {
2165 let scorer = BayesianScorer::new();
2166 let titles = [
2167 "Settings",
2168 "Set Theme",
2169 "Asset Manager",
2170 "Reset View",
2171 "Offset Tool",
2172 ];
2173 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2174
2175 let ranker = ConformalRanker::new();
2176 let ranked = ranker.rank_top_k(results, 2);
2177
2178 assert_eq!(ranked.items.len(), 2);
2179 assert!(ranked.items[0].rank_confidence.confidence > 0.0);
2181 }
2182
2183 #[test]
2184 fn conformal_original_indices_preserved() {
2185 let scorer = BayesianScorer::new();
2186 let titles = ["Zebra Tool", "Settings", "Apple"];
2187 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2188
2189 let ranker = ConformalRanker::new();
2190 let ranked = ranker.rank(results);
2191
2192 assert_eq!(
2194 ranked.items[0].original_index, 1,
2195 "Settings should be first; original_index should be 1"
2196 );
2197 }
2198
2199 #[test]
2200 fn conformal_summary_display() {
2201 let scorer = BayesianScorer::new();
2202 let results = vec![
2203 scorer.score("set", "Settings"),
2204 scorer.score("set", "Set Theme"),
2205 ];
2206 let ranker = ConformalRanker::new();
2207 let ranked = ranker.rank(results);
2208
2209 let display = format!("{}", ranked.summary);
2210 assert!(display.contains("2 items"));
2211 }
2212
2213 #[test]
2214 fn conformal_rank_confidence_display() {
2215 let rc = RankConfidence {
2216 confidence: 0.85,
2217 gap_to_next: 0.1234,
2218 stability: RankStability::Stable,
2219 };
2220 let display = format!("{}", rc);
2221 assert!(display.contains("0.850"));
2222 assert!(display.contains("stable"));
2223 }
2224
2225 #[test]
2226 fn conformal_stability_labels() {
2227 assert_eq!(RankStability::Stable.label(), "stable");
2228 assert_eq!(RankStability::Marginal.label(), "marginal");
2229 assert_eq!(RankStability::Unstable.label(), "unstable");
2230 }
2231
2232 #[test]
2235 fn conformal_last_item_gap_zero() {
2236 let scorer = BayesianScorer::new();
2237 let results = vec![
2238 scorer.score("set", "Settings"),
2239 scorer.score("set", "Asset"),
2240 scorer.score("set", "Reset"),
2241 ];
2242 let ranker = ConformalRanker::new();
2243 let ranked = ranker.rank(results);
2244
2245 let last = ranked.items.last().unwrap();
2246 assert_eq!(
2247 last.rank_confidence.gap_to_next, 0.0,
2248 "Last item gap should be 0"
2249 );
2250 }
2251
2252 #[test]
2255 fn conformal_median_gap_non_negative() {
2256 let scorer = BayesianScorer::new();
2257 let titles = [
2258 "Settings",
2259 "Set Theme",
2260 "Asset Manager",
2261 "Reset View",
2262 "Offset Tool",
2263 "Test Suite",
2264 "System Settings",
2265 "Reset Defaults",
2266 ];
2267 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2268 let ranker = ConformalRanker::new();
2269 let ranked = ranker.rank(results);
2270
2271 assert!(
2272 ranked.summary.median_gap >= 0.0,
2273 "Median gap must be non-negative"
2274 );
2275 }
2276
2277 fn test_corpus() -> Vec<&'static str> {
2282 vec![
2283 "Open File",
2284 "Save File",
2285 "Close Tab",
2286 "Git: Commit",
2287 "Git: Push",
2288 "Git: Pull",
2289 "Go to Line",
2290 "Find in Files",
2291 "Toggle Terminal",
2292 "Format Document",
2293 ]
2294 }
2295
2296 #[test]
2297 fn incremental_full_scan_on_first_call() {
2298 let mut scorer = IncrementalScorer::new();
2299 let corpus = test_corpus();
2300 let results = scorer.score_corpus("git", &corpus, None);
2301
2302 assert!(!results.is_empty(), "Should find matches for 'git'");
2303 assert_eq!(scorer.stats().full_scans, 1);
2304 assert_eq!(scorer.stats().incremental_scans, 0);
2305 }
2306
2307 #[test]
2308 fn incremental_prunes_on_query_extension() {
2309 let mut scorer = IncrementalScorer::new();
2310 let corpus = test_corpus();
2311
2312 let r1 = scorer.score_corpus("g", &corpus, None);
2314 assert_eq!(scorer.stats().full_scans, 1);
2315
2316 let r2 = scorer.score_corpus("gi", &corpus, None);
2318 assert_eq!(scorer.stats().incremental_scans, 1);
2319 assert!(r2.len() <= r1.len(), "Extended query should match <= items");
2320
2321 let r3 = scorer.score_corpus("git", &corpus, None);
2323 assert_eq!(scorer.stats().incremental_scans, 2);
2324 assert!(r3.len() <= r2.len());
2325 }
2326
2327 #[test]
2328 fn incremental_correctness_matches_full_scan() {
2329 let corpus = test_corpus();
2330
2331 let mut inc = IncrementalScorer::new();
2333 inc.score_corpus("g", &corpus, None);
2334 let inc_results = inc.score_corpus("git", &corpus, None);
2335
2336 let mut full = IncrementalScorer::new();
2338 let full_results = full.score_corpus("git", &corpus, None);
2339
2340 assert_eq!(
2342 inc_results.len(),
2343 full_results.len(),
2344 "Incremental and full scan should return same count"
2345 );
2346
2347 for (a, b) in inc_results.iter().zip(full_results.iter()) {
2348 assert_eq!(a.0, b.0, "Same corpus indices");
2349 assert!(
2350 (a.1.score - b.1.score).abs() < f64::EPSILON,
2351 "Same scores: {} vs {}",
2352 a.1.score,
2353 b.1.score
2354 );
2355 }
2356 }
2357
2358 #[test]
2359 fn incremental_falls_back_on_non_extension() {
2360 let mut scorer = IncrementalScorer::new();
2361 let corpus = test_corpus();
2362
2363 scorer.score_corpus("git", &corpus, None);
2364 assert_eq!(scorer.stats().full_scans, 1);
2365
2366 scorer.score_corpus("fi", &corpus, None);
2368 assert_eq!(scorer.stats().full_scans, 2);
2369 }
2370
2371 #[test]
2372 fn incremental_invalidate_forces_full_scan() {
2373 let mut scorer = IncrementalScorer::new();
2374 let corpus = test_corpus();
2375
2376 scorer.score_corpus("g", &corpus, None);
2377 scorer.invalidate();
2378
2379 scorer.score_corpus("gi", &corpus, None);
2381 assert_eq!(scorer.stats().full_scans, 2);
2382 assert_eq!(scorer.stats().incremental_scans, 0);
2383 }
2384
2385 #[test]
2386 fn incremental_generation_change_invalidates() {
2387 let mut scorer = IncrementalScorer::new();
2388 let corpus = test_corpus();
2389
2390 scorer.score_corpus("g", &corpus, Some(1));
2391
2392 scorer.score_corpus("gi", &corpus, Some(2));
2394 assert_eq!(scorer.stats().full_scans, 2);
2395 }
2396
2397 #[test]
2398 fn incremental_corpus_size_change_invalidates() {
2399 let mut scorer = IncrementalScorer::new();
2400 let corpus1 = test_corpus();
2401 let corpus2 = &corpus1[..5];
2402
2403 scorer.score_corpus("g", &corpus1, None);
2404 scorer.score_corpus("gi", corpus2, None);
2405 assert_eq!(scorer.stats().full_scans, 2);
2407 }
2408
2409 #[test]
2410 fn incremental_skips_out_of_bounds_cache_entries() {
2411 let mut scorer = IncrementalScorer::new();
2412
2413 scorer.cache = vec![
2416 CachedEntry { corpus_index: 0 },
2417 CachedEntry { corpus_index: 999 },
2418 ];
2419
2420 let corpus = ["a"];
2421 let results = scorer.score_incremental("a", "a", &corpus);
2422 assert_eq!(results.len(), 1);
2423 assert_eq!(results[0].0, 0);
2424
2425 let corpus_lower = ["a"];
2426 let results = scorer.score_incremental_lowered("a", "a", &corpus, &corpus_lower);
2427 assert_eq!(results.len(), 1);
2428 assert_eq!(results[0].0, 0);
2429
2430 let corpus_s = vec!["a".to_string()];
2431 let corpus_lower_s = vec!["a".to_string()];
2432 let word_starts = vec![vec![0]];
2433 let results = scorer.score_incremental_lowered_with_words(
2434 "a",
2435 "a",
2436 &corpus_s,
2437 &corpus_lower_s,
2438 &word_starts,
2439 );
2440 assert_eq!(results.len(), 1);
2441 assert_eq!(results[0].0, 0);
2442 }
2443
2444 #[test]
2445 fn incremental_empty_query() {
2446 let mut scorer = IncrementalScorer::new();
2447 let corpus = test_corpus();
2448
2449 let results = scorer.score_corpus("", &corpus, None);
2450 assert_eq!(results.len(), corpus.len());
2452 }
2453
2454 #[test]
2455 fn incremental_no_match_query() {
2456 let mut scorer = IncrementalScorer::new();
2457 let corpus = test_corpus();
2458
2459 let results = scorer.score_corpus("zzz", &corpus, None);
2460 assert!(results.is_empty());
2461 }
2462
2463 #[test]
2464 fn incremental_stats_prune_ratio() {
2465 let mut scorer = IncrementalScorer::new();
2466 let corpus = test_corpus();
2467
2468 scorer.score_corpus("g", &corpus, None);
2469 scorer.score_corpus("gi", &corpus, None);
2470 scorer.score_corpus("git", &corpus, None);
2471
2472 let stats = scorer.stats();
2473 assert!(
2474 stats.prune_ratio() > 0.0,
2475 "Prune ratio should be > 0 after incremental scans"
2476 );
2477 assert!(stats.total_pruned > 0, "Should have pruned some items");
2478 }
2479
2480 #[test]
2481 fn incremental_results_sorted_descending() {
2482 let mut scorer = IncrementalScorer::new();
2483 let corpus = test_corpus();
2484
2485 let results = scorer.score_corpus("o", &corpus, None);
2486 for w in results.windows(2) {
2487 let a = w[0].1.score;
2488 let b = w[1].1.score;
2489 assert!(a >= b, "Results should be sorted descending: {a} >= {b}");
2490 }
2491 }
2492
2493 #[test]
2494 fn incremental_lowered_matches_full() {
2495 let corpus = vec![
2496 "Open File".to_string(),
2497 "Save File".to_string(),
2498 "Close".to_string(),
2499 "Launch 🚀".to_string(),
2500 ];
2501 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
2502 let lower: Vec<String> = corpus.iter().map(|s| s.to_lowercase()).collect();
2503 let word_starts: Vec<Vec<usize>> = lower
2504 .iter()
2505 .map(|title_lower| {
2506 let bytes = title_lower.as_bytes();
2507 title_lower
2508 .char_indices()
2509 .filter_map(|(i, _)| {
2510 let is_word_start = i == 0 || {
2511 let prev = bytes.get(i.saturating_sub(1)).copied().unwrap_or(b' ');
2512 prev == b' ' || prev == b'-' || prev == b'_'
2513 };
2514 is_word_start.then_some(i)
2515 })
2516 .collect()
2517 })
2518 .collect();
2519
2520 let mut full = IncrementalScorer::new();
2521 let mut lowered = IncrementalScorer::new();
2522
2523 let a = full.score_corpus("fi", &corpus_refs, None);
2524 let b =
2525 lowered.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
2526
2527 assert_eq!(a.len(), b.len());
2528 for ((ia, ra), (ib, rb)) in a.iter().zip(b.iter()) {
2529 assert_eq!(ia, ib);
2530 assert_eq!(ra.match_type, rb.match_type);
2531 assert_eq!(ra.match_positions, rb.match_positions);
2532 assert!(
2533 (ra.score - rb.score).abs() < 1e-12,
2534 "score mismatch: {} vs {}",
2535 ra.score,
2536 rb.score
2537 );
2538 }
2539 }
2540
2541 #[test]
2542 fn incremental_deterministic() {
2543 let corpus = test_corpus();
2544
2545 let mut s1 = IncrementalScorer::new();
2546 let r1 = s1.score_corpus("git", &corpus, None);
2547
2548 let mut s2 = IncrementalScorer::new();
2549 let r2 = s2.score_corpus("git", &corpus, None);
2550
2551 assert_eq!(r1.len(), r2.len());
2552 for (a, b) in r1.iter().zip(r2.iter()) {
2553 assert_eq!(a.0, b.0);
2554 assert!((a.1.score - b.1.score).abs() < f64::EPSILON);
2555 }
2556 }
2557
2558 #[test]
2563 fn match_type_descriptions() {
2564 assert_eq!(MatchType::Exact.description(), "exact match");
2565 assert_eq!(MatchType::Prefix.description(), "prefix match");
2566 assert_eq!(MatchType::WordStart.description(), "word-start match");
2567 assert_eq!(MatchType::Substring.description(), "contiguous substring");
2568 assert_eq!(MatchType::Fuzzy.description(), "fuzzy match");
2569 assert_eq!(MatchType::NoMatch.description(), "no match");
2570 }
2571
2572 #[test]
2573 fn match_type_no_match_prior_is_zero() {
2574 assert_eq!(MatchType::NoMatch.prior_odds(), 0.0);
2575 }
2576
2577 #[test]
2582 fn evidence_description_static_display() {
2583 let d = EvidenceDescription::Static("test message");
2584 assert_eq!(format!("{d}"), "test message");
2585 }
2586
2587 #[test]
2588 fn evidence_description_title_length_display() {
2589 let d = EvidenceDescription::TitleLengthChars { len: 42 };
2590 assert_eq!(format!("{d}"), "title length 42 chars");
2591 }
2592
2593 #[test]
2594 fn evidence_description_first_match_pos_display() {
2595 let d = EvidenceDescription::FirstMatchPos { pos: 5 };
2596 assert_eq!(format!("{d}"), "first match at position 5");
2597 }
2598
2599 #[test]
2600 fn evidence_description_word_boundary_count_display() {
2601 let d = EvidenceDescription::WordBoundaryCount { count: 3 };
2602 assert_eq!(format!("{d}"), "3 word boundary matches");
2603 }
2604
2605 #[test]
2606 fn evidence_description_gap_total_display() {
2607 let d = EvidenceDescription::GapTotal { total: 7 };
2608 assert_eq!(format!("{d}"), "total gap of 7 characters");
2609 }
2610
2611 #[test]
2612 fn evidence_description_coverage_percent_display() {
2613 let d = EvidenceDescription::CoveragePercent { percent: 83.5 };
2614 let s = format!("{d}");
2615 assert!(s.contains("84%"), "Should round: {s}");
2616 }
2617
2618 #[test]
2623 fn evidence_entry_display_supports() {
2624 let entry = EvidenceEntry {
2625 kind: EvidenceKind::Position,
2626 bayes_factor: 1.5,
2627 description: EvidenceDescription::FirstMatchPos { pos: 0 },
2628 };
2629 let s = format!("{entry}");
2630 assert!(s.contains("supports"));
2631 assert!(s.contains("Position"));
2632 }
2633
2634 #[test]
2635 fn evidence_entry_display_opposes() {
2636 let entry = EvidenceEntry {
2637 kind: EvidenceKind::GapPenalty,
2638 bayes_factor: 0.5,
2639 description: EvidenceDescription::GapTotal { total: 10 },
2640 };
2641 let s = format!("{entry}");
2642 assert!(s.contains("opposes"));
2643 }
2644
2645 #[test]
2646 fn evidence_entry_display_neutral() {
2647 let entry = EvidenceEntry {
2648 kind: EvidenceKind::TitleLength,
2649 bayes_factor: 1.0,
2650 description: EvidenceDescription::Static("neutral factor"),
2651 };
2652 let s = format!("{entry}");
2653 assert!(s.contains("neutral"));
2654 }
2655
2656 #[test]
2661 fn ledger_combined_bayes_factor() {
2662 let mut ledger = EvidenceLedger::new();
2663 ledger.add(
2664 EvidenceKind::Position,
2665 2.0,
2666 EvidenceDescription::Static("a"),
2667 );
2668 ledger.add(
2669 EvidenceKind::WordBoundary,
2670 3.0,
2671 EvidenceDescription::Static("b"),
2672 );
2673 assert!((ledger.combined_bayes_factor() - 6.0).abs() < f64::EPSILON);
2674 }
2675
2676 #[test]
2677 fn ledger_combined_bayes_factor_empty() {
2678 let ledger = EvidenceLedger::new();
2679 assert!((ledger.combined_bayes_factor() - 1.0).abs() < f64::EPSILON);
2680 }
2681
2682 #[test]
2683 fn ledger_prior_odds_present() {
2684 let mut ledger = EvidenceLedger::new();
2685 ledger.add(
2686 EvidenceKind::MatchType,
2687 9.0,
2688 EvidenceDescription::Static("prefix"),
2689 );
2690 assert_eq!(ledger.prior_odds(), Some(9.0));
2691 }
2692
2693 #[test]
2694 fn ledger_prior_odds_absent() {
2695 let ledger = EvidenceLedger::new();
2696 assert_eq!(ledger.prior_odds(), None);
2697 }
2698
2699 #[test]
2700 fn ledger_posterior_probability_no_prior() {
2701 let mut ledger = EvidenceLedger::new();
2702 ledger.add(
2703 EvidenceKind::Position,
2704 2.0,
2705 EvidenceDescription::Static("pos"),
2706 );
2707 let prob = ledger.posterior_probability();
2710 assert!((prob - 2.0 / 3.0).abs() < 1e-9);
2711 }
2712
2713 #[test]
2714 fn ledger_posterior_probability_infinite_odds() {
2715 let mut ledger = EvidenceLedger::new();
2716 ledger.add(
2717 EvidenceKind::MatchType,
2718 f64::INFINITY,
2719 EvidenceDescription::Static("inf"),
2720 );
2721 assert_eq!(ledger.posterior_probability(), 1.0);
2722 }
2723
2724 #[test]
2729 fn fast_scorer_no_evidence() {
2730 let scorer = BayesianScorer::fast();
2731 let result = scorer.score("set", "Settings");
2732 assert!(result.score > 0.0);
2733 assert!(result.evidence.entries().is_empty());
2734 }
2735
2736 #[test]
2737 fn fast_scorer_score_matches_probability_range() {
2738 let scorer = BayesianScorer::fast();
2739 let result = scorer.score("set", "Settings");
2740 assert!(result.score >= 0.0 && result.score <= 1.0);
2741 }
2742
2743 #[test]
2744 fn fast_scorer_empty_query() {
2745 let scorer = BayesianScorer::fast();
2746 let result = scorer.score("", "Test");
2747 assert!(result.score > 0.0);
2748 assert!(result.evidence.entries().is_empty());
2749 }
2750
2751 #[test]
2752 fn fast_scorer_fuzzy_with_gap_penalty() {
2753 let scorer = BayesianScorer::fast();
2754 let result = scorer.score("ace", "a...b...c...d...e");
2755 assert_eq!(result.match_type, MatchType::Fuzzy);
2756 assert!(result.score > 0.0);
2757 }
2758
2759 #[test]
2760 fn fast_scorer_tag_boost() {
2761 let scorer = BayesianScorer::fast();
2762 let without = scorer.score("set", "Settings");
2763 let with = scorer.score_with_tags("set", "Settings", &["setup"]);
2764 assert!(with.score > without.score);
2765 }
2766
2767 #[test]
2768 fn fast_scorer_tag_no_boost_at_score_one() {
2769 let scorer = BayesianScorer::fast();
2770 let result = scorer.score_with_tags("Settings", "Settings", &["settings"]);
2772 assert!(result.score <= 1.0);
2773 }
2774
2775 #[test]
2780 fn unicode_exact_match() {
2781 let scorer = BayesianScorer::new();
2782 let result = scorer.score("日本語", "日本語");
2783 assert_eq!(result.match_type, MatchType::Exact);
2784 assert!(result.score > 0.9);
2785 }
2786
2787 #[test]
2788 fn unicode_prefix_match() {
2789 let scorer = BayesianScorer::new();
2790 let result = scorer.score("日本", "日本語テスト");
2791 assert_eq!(result.match_type, MatchType::Prefix);
2792 }
2793
2794 #[test]
2795 fn unicode_substring_match() {
2796 let scorer = BayesianScorer::new();
2797 let result = scorer.score("本語", "日本語テスト");
2798 assert_eq!(result.match_type, MatchType::Substring);
2799 }
2800
2801 #[test]
2802 fn unicode_fuzzy_match() {
2803 let scorer = BayesianScorer::new();
2804 let result = scorer.score("日テ", "日本語テスト");
2805 assert_eq!(result.match_type, MatchType::Fuzzy);
2806 }
2807
2808 #[test]
2809 fn unicode_no_match() {
2810 let scorer = BayesianScorer::new();
2811 let result = scorer.score("ωψξ", "αβγ");
2812 assert_eq!(result.match_type, MatchType::NoMatch);
2813 }
2814
2815 #[test]
2816 fn unicode_word_start_match() {
2817 let scorer = BayesianScorer::new();
2818 let result = scorer.score("gd", "go dashboard");
2820 assert_eq!(result.match_type, MatchType::WordStart);
2821 }
2822
2823 #[test]
2828 fn score_with_query_lower_matches_score() {
2829 let scorer = BayesianScorer::new();
2830 let direct = scorer.score("Set", "Settings");
2831 let pre = scorer.score_with_query_lower("Set", "set", "Settings");
2832 assert!((direct.score - pre.score).abs() < f64::EPSILON);
2833 assert_eq!(direct.match_type, pre.match_type);
2834 }
2835
2836 #[test]
2837 fn score_with_lowered_title_matches_score() {
2838 let scorer = BayesianScorer::new();
2839 let direct = scorer.score("set", "Settings");
2840 let pre = scorer.score_with_lowered_title("set", "set", "Settings", "settings");
2841 assert!((direct.score - pre.score).abs() < f64::EPSILON);
2842 }
2843
2844 #[test]
2845 fn score_with_lowered_title_and_words_matches() {
2846 let scorer = BayesianScorer::new();
2847 let direct = scorer.score("gd", "Go Dashboard");
2848 let pre = scorer.score_with_lowered_title_and_words(
2849 "gd",
2850 "gd",
2851 "Go Dashboard",
2852 "go dashboard",
2853 Some(&[0, 3]),
2854 );
2855 assert_eq!(direct.match_type, pre.match_type);
2856 assert!((direct.score - pre.score).abs() < f64::EPSILON);
2857 }
2858
2859 #[test]
2864 fn tag_match_no_title_match_returns_nomatch() {
2865 let scorer = BayesianScorer::new();
2866 let result = scorer.score_with_tags("xyz", "Settings", &["xyz"]);
2867 assert_eq!(result.match_type, MatchType::NoMatch);
2869 assert_eq!(result.score, 0.0);
2870 }
2871
2872 #[test]
2873 fn tag_match_no_matching_tag() {
2874 let scorer = BayesianScorer::new();
2875 let without = scorer.score("set", "Settings");
2876 let with = scorer.score_with_tags("set", "Settings", &["foo", "bar"]);
2877 assert!((without.score - with.score).abs() < f64::EPSILON);
2879 }
2880
2881 #[test]
2882 fn tag_match_case_insensitive() {
2883 let scorer = BayesianScorer::new();
2884 let result = scorer.score_with_tags("set", "Settings", &["SETUP"]);
2885 let without = scorer.score("set", "Settings");
2886 assert!(result.score > without.score);
2887 }
2888
2889 #[test]
2894 fn no_match_result_has_evidence() {
2895 let r = MatchResult::no_match();
2896 assert_eq!(r.score, 0.0);
2897 assert_eq!(r.match_type, MatchType::NoMatch);
2898 assert!(r.match_positions.is_empty());
2899 assert_eq!(r.evidence.entries().len(), 1);
2900 assert_eq!(r.evidence.entries()[0].kind, EvidenceKind::MatchType);
2901 assert_eq!(r.evidence.entries()[0].bayes_factor, 0.0);
2902 }
2903
2904 #[test]
2909 fn count_word_boundaries_at_start() {
2910 let scorer = BayesianScorer::new();
2911 let count = scorer.count_word_boundaries(&[0], "hello");
2913 assert_eq!(count, 1);
2914 }
2915
2916 #[test]
2917 fn count_word_boundaries_after_separators() {
2918 let scorer = BayesianScorer::new();
2919 let count = scorer.count_word_boundaries(&[0, 2, 4, 6], "a-b_c d");
2921 assert_eq!(count, 4);
2922 }
2923
2924 #[test]
2925 fn count_word_boundaries_mid_word() {
2926 let scorer = BayesianScorer::new();
2927 let count = scorer.count_word_boundaries(&[2], "hello");
2929 assert_eq!(count, 0);
2930 }
2931
2932 #[test]
2933 fn total_gap_empty() {
2934 let scorer = BayesianScorer::new();
2935 assert_eq!(scorer.total_gap(&[]), 0);
2936 }
2937
2938 #[test]
2939 fn total_gap_single() {
2940 let scorer = BayesianScorer::new();
2941 assert_eq!(scorer.total_gap(&[5]), 0);
2942 }
2943
2944 #[test]
2945 fn total_gap_contiguous() {
2946 let scorer = BayesianScorer::new();
2947 assert_eq!(scorer.total_gap(&[0, 1, 2]), 0);
2949 }
2950
2951 #[test]
2952 fn total_gap_with_gaps() {
2953 let scorer = BayesianScorer::new();
2954 assert_eq!(scorer.total_gap(&[0, 3, 7]), 5);
2956 }
2957
2958 #[test]
2963 fn conformal_custom_thresholds() {
2964 let ranker = ConformalRanker {
2965 tie_epsilon: 0.1,
2966 stable_threshold: 0.9,
2967 marginal_threshold: 0.5,
2968 };
2969 let mut r1 = MatchResult::no_match();
2971 r1.score = 0.5;
2972 r1.match_type = MatchType::Prefix;
2973 let mut r2 = MatchResult::no_match();
2974 r2.score = 0.45;
2975 r2.match_type = MatchType::Prefix;
2976
2977 let ranked = ranker.rank(vec![r1, r2]);
2978 assert_eq!(
2979 ranked.items[0].rank_confidence.stability,
2980 RankStability::Unstable,
2981 "Gap 0.05 < epsilon 0.1 → tie"
2982 );
2983 }
2984
2985 #[test]
2986 fn conformal_rank_top_k_larger_than_count() {
2987 let scorer = BayesianScorer::new();
2988 let results = vec![scorer.score("set", "Settings")];
2989 let ranker = ConformalRanker::new();
2990 let ranked = ranker.rank_top_k(results, 100);
2991 assert_eq!(ranked.items.len(), 1);
2992 }
2993
2994 #[test]
2995 fn conformal_rank_top_k_zero() {
2996 let scorer = BayesianScorer::new();
2997 let results = vec![
2998 scorer.score("set", "Settings"),
2999 scorer.score("set", "Asset"),
3000 ];
3001 let ranker = ConformalRanker::new();
3002 let ranked = ranker.rank_top_k(results, 0);
3003 assert!(ranked.items.is_empty());
3004 assert_eq!(ranked.summary.stable_count, 0);
3005 }
3006
3007 #[test]
3008 fn conformal_rank_confidence_display_marginal() {
3009 let rc = RankConfidence {
3010 confidence: 0.5,
3011 gap_to_next: 0.05,
3012 stability: RankStability::Marginal,
3013 };
3014 let s = format!("{rc}");
3015 assert!(s.contains("marginal"));
3016 }
3017
3018 #[test]
3019 fn conformal_rank_confidence_display_unstable() {
3020 let rc = RankConfidence {
3021 confidence: 0.1,
3022 gap_to_next: 0.001,
3023 stability: RankStability::Unstable,
3024 };
3025 let s = format!("{rc}");
3026 assert!(s.contains("unstable"));
3027 }
3028
3029 #[test]
3030 fn conformal_median_gap_even_count() {
3031 let mut results = Vec::new();
3033 for &s in &[0.9, 0.7, 0.5, 0.3, 0.1] {
3034 let mut r = MatchResult::no_match();
3035 r.score = s;
3036 r.match_type = MatchType::Prefix;
3037 results.push(r);
3038 }
3039 let ranker = ConformalRanker::new();
3040 let ranked = ranker.rank(results);
3041 assert!(
3043 (ranked.summary.median_gap - 0.2).abs() < 0.01,
3044 "median_gap={}, expected ~0.2",
3045 ranked.summary.median_gap
3046 );
3047 }
3048
3049 #[test]
3054 fn incremental_with_scorer_uses_provided_scorer() {
3055 let scorer = BayesianScorer::new(); let mut inc = IncrementalScorer::with_scorer(scorer);
3057 let corpus = test_corpus();
3058 let results = inc.score_corpus("git", &corpus, None);
3059 assert!(!results.is_empty());
3060 assert!(!results[0].1.evidence.entries().is_empty());
3062 }
3063
3064 #[test]
3065 fn incremental_default_trait() {
3066 let inc = IncrementalScorer::default();
3067 assert_eq!(inc.stats().full_scans, 0);
3068 assert_eq!(inc.stats().incremental_scans, 0);
3069 }
3070
3071 #[test]
3072 fn incremental_stats_prune_ratio_zero_when_empty() {
3073 let stats = IncrementalStats::default();
3074 assert_eq!(stats.prune_ratio(), 0.0);
3075 }
3076
3077 #[test]
3078 fn incremental_score_corpus_with_lowered() {
3079 let corpus = vec!["Open File", "Save File", "Close Tab"];
3080 let lower = vec!["open file", "save file", "close tab"];
3081 let mut inc = IncrementalScorer::new();
3082 let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3083 assert!(!results.is_empty());
3084 for (idx, _) in &results {
3086 assert!(*idx < corpus.len());
3087 }
3088 }
3089
3090 #[test]
3091 fn incremental_score_corpus_with_lowered_incremental_path() {
3092 let corpus = vec!["Open File", "Save File", "Close Tab"];
3093 let lower = vec!["open file", "save file", "close tab"];
3094 let mut inc = IncrementalScorer::new();
3095 inc.score_corpus_with_lowered("f", &corpus, &lower, None);
3096 let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3097 assert_eq!(inc.stats().incremental_scans, 1);
3098 assert!(!results.is_empty());
3099 }
3100
3101 #[test]
3102 fn incremental_score_corpus_with_lowered_and_words() {
3103 let corpus = vec![
3104 "Open File".to_string(),
3105 "Save File".to_string(),
3106 "Close Tab".to_string(),
3107 ];
3108 let lower = vec![
3109 "open file".to_string(),
3110 "save file".to_string(),
3111 "close tab".to_string(),
3112 ];
3113 let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3114
3115 let mut inc = IncrementalScorer::new();
3116 let results =
3117 inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3118 assert!(!results.is_empty());
3119 }
3120
3121 #[test]
3122 fn incremental_score_corpus_with_lowered_and_words_incremental_path() {
3123 let corpus = vec![
3124 "Open File".to_string(),
3125 "Save File".to_string(),
3126 "Close Tab".to_string(),
3127 ];
3128 let lower = vec![
3129 "open file".to_string(),
3130 "save file".to_string(),
3131 "close tab".to_string(),
3132 ];
3133 let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3134
3135 let mut inc = IncrementalScorer::new();
3136 inc.score_corpus_with_lowered_and_words("f", &corpus, &lower, &word_starts, None);
3137 let results =
3138 inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3139 assert_eq!(inc.stats().incremental_scans, 1);
3140 assert!(!results.is_empty());
3141 }
3142
3143 #[test]
3148 fn bayesian_scorer_default_has_evidence_off() {
3149 let scorer = BayesianScorer::default();
3150 assert!(!scorer.track_evidence);
3151 }
3152
3153 #[test]
3158 fn word_start_match_with_hyphens() {
3159 let scorer = BayesianScorer::new();
3160 let result = scorer.score("fc", "file-commander");
3161 assert_eq!(result.match_type, MatchType::WordStart);
3162 assert_eq!(result.match_positions, vec![0, 5]);
3163 }
3164
3165 #[test]
3166 fn word_start_match_with_underscores() {
3167 let scorer = BayesianScorer::new();
3168 let result = scorer.score("fc", "file_commander");
3169 assert_eq!(result.match_type, MatchType::WordStart);
3170 assert_eq!(result.match_positions, vec![0, 5]);
3171 }
3172
3173 #[test]
3178 fn match_type_ord() {
3179 let mut types = vec![
3180 MatchType::Exact,
3181 MatchType::NoMatch,
3182 MatchType::Fuzzy,
3183 MatchType::Prefix,
3184 MatchType::Substring,
3185 MatchType::WordStart,
3186 ];
3187 types.sort();
3188 assert_eq!(
3189 types,
3190 vec![
3191 MatchType::NoMatch,
3192 MatchType::Fuzzy,
3193 MatchType::Substring,
3194 MatchType::WordStart,
3195 MatchType::Prefix,
3196 MatchType::Exact,
3197 ]
3198 );
3199 }
3200
3201 #[test]
3206 fn evidence_kind_equality() {
3207 assert_eq!(EvidenceKind::MatchType, EvidenceKind::MatchType);
3208 assert_ne!(EvidenceKind::Position, EvidenceKind::GapPenalty);
3209 }
3210
3211 #[test]
3216 fn earlier_substring_scores_higher() {
3217 let scorer = BayesianScorer::new();
3218 let early = scorer.score("set", "set in stone");
3219 let late = scorer.score("set", "the asset");
3220 assert!(
3222 early.score > late.score,
3223 "Earlier match should score higher: {} vs {}",
3224 early.score,
3225 late.score
3226 );
3227 }
3228}
3229
3230#[cfg(test)]
3235mod perf_tests {
3236 use super::*;
3237 use std::time::Instant;
3238
3239 #[derive(Debug, Clone, Copy)]
3240 struct PerfStats {
3241 p50_us: u64,
3242 p95_us: u64,
3243 p99_us: u64,
3244 mean_us: f64,
3245 variance_us: f64,
3246 samples: usize,
3247 }
3248
3249 const SINGLE_SCORE_BUDGET_US: u64 = 10; const CORPUS_100_BUDGET_US: u64 = 500; const CORPUS_1000_BUDGET_US: u64 = 5_000; const CORPUS_5000_BUDGET_US: u64 = 25_000; const INCREMENTAL_7KEY_100_BUDGET_US: u64 = 5_000; const INCREMENTAL_7KEY_1000_BUDGET_US: u64 = 15_000; const COVERAGE_BUDGET_MULTIPLIER: u64 = 5;
3259
3260 fn is_coverage_run() -> bool {
3261 std::env::var("LLVM_PROFILE_FILE").is_ok() || std::env::var("CARGO_LLVM_COV").is_ok()
3262 }
3263
3264 fn coverage_budget_us_with_mode(base: u64, coverage_run: bool) -> u64 {
3265 if coverage_run {
3269 base.saturating_mul(COVERAGE_BUDGET_MULTIPLIER)
3270 } else {
3271 base
3272 }
3273 }
3274
3275 fn coverage_budget_us(base: u64) -> u64 {
3276 coverage_budget_us_with_mode(base, is_coverage_run())
3277 }
3278
3279 #[test]
3280 fn coverage_budget_us_with_mode_respects_flag() {
3281 assert_eq!(coverage_budget_us_with_mode(123, false), 123);
3282 assert_eq!(
3283 coverage_budget_us_with_mode(123, true),
3284 123 * COVERAGE_BUDGET_MULTIPLIER
3285 );
3286 }
3287
3288 fn make_corpus(size: usize) -> Vec<String> {
3290 let base = [
3291 "Open File",
3292 "Save File",
3293 "Close Tab",
3294 "Split Editor Right",
3295 "Split Editor Down",
3296 "Toggle Terminal",
3297 "Go to Line",
3298 "Find in Files",
3299 "Replace in Files",
3300 "Git: Commit",
3301 "Git: Push",
3302 "Git: Pull",
3303 "Debug: Start",
3304 "Debug: Stop",
3305 "Debug: Step Over",
3306 "Format Document",
3307 "Rename Symbol",
3308 "Go to Definition",
3309 "Find All References",
3310 "Toggle Sidebar",
3311 ];
3312 base.iter()
3313 .cycle()
3314 .take(size)
3315 .enumerate()
3316 .map(|(i, cmd)| {
3317 if i < base.len() {
3318 (*cmd).to_string()
3319 } else {
3320 format!("{} ({})", cmd, i)
3321 }
3322 })
3323 .collect()
3324 }
3325
3326 fn measure_stats_us(iterations: usize, mut f: impl FnMut()) -> PerfStats {
3327 let mut times = Vec::with_capacity(iterations);
3328 for _ in 0..3 {
3330 f();
3331 }
3332 for _ in 0..iterations {
3333 let start = Instant::now();
3334 f();
3335 times.push(start.elapsed().as_micros() as u64);
3336 }
3337 times.sort_unstable();
3338 let len = times.len();
3339 let p50 = times[len / 2];
3340 let p95 = times[((len as f64 * 0.95) as usize).min(len.saturating_sub(1))];
3341 let p99 = times[((len as f64 * 0.99) as usize).min(len.saturating_sub(1))];
3342 let mean = times.iter().copied().map(|value| value as f64).sum::<f64>() / len as f64;
3343 let variance = times
3344 .iter()
3345 .map(|value| {
3346 let delta = *value as f64 - mean;
3347 delta * delta
3348 })
3349 .sum::<f64>()
3350 / len as f64;
3351
3352 PerfStats {
3353 p50_us: p50,
3354 p95_us: p95,
3355 p99_us: p99,
3356 mean_us: mean,
3357 variance_us: variance,
3358 samples: len,
3359 }
3360 }
3361
3362 #[test]
3363 fn perf_single_score_under_budget() {
3364 let scorer = BayesianScorer::fast();
3365 let stats = measure_stats_us(200, || {
3366 std::hint::black_box(scorer.score("git co", "Git: Commit"));
3367 });
3368 eprintln!(
3369 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_score_single\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3370 stats.samples,
3371 stats.p50_us,
3372 stats.p95_us,
3373 stats.p99_us,
3374 stats.mean_us,
3375 stats.variance_us
3376 );
3377 let budget = coverage_budget_us(SINGLE_SCORE_BUDGET_US);
3378 assert!(
3379 stats.p50_us <= budget,
3380 "Single score p50 = {}µs exceeds budget {}µs",
3381 stats.p50_us,
3382 budget,
3383 );
3384 }
3385
3386 #[test]
3387 fn perf_corpus_100_under_budget() {
3388 let scorer = BayesianScorer::fast();
3389 let corpus = make_corpus(100);
3390 let stats = measure_stats_us(50, || {
3391 let mut results: Vec<_> = corpus
3392 .iter()
3393 .map(|t| scorer.score("git co", t))
3394 .filter(|r| r.score > 0.0)
3395 .collect();
3396 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3397 std::hint::black_box(&results);
3398 });
3399 eprintln!(
3400 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3401 stats.samples,
3402 stats.p50_us,
3403 stats.p95_us,
3404 stats.p99_us,
3405 stats.mean_us,
3406 stats.variance_us
3407 );
3408 let budget = coverage_budget_us(CORPUS_100_BUDGET_US);
3409 assert!(
3410 stats.p95_us <= budget,
3411 "100-item corpus p95 = {}µs exceeds budget {}µs",
3412 stats.p95_us,
3413 budget,
3414 );
3415 }
3416
3417 #[test]
3418 fn perf_corpus_1000_under_budget() {
3419 let scorer = BayesianScorer::fast();
3420 let corpus = make_corpus(1_000);
3421 let stats = measure_stats_us(20, || {
3422 let mut results: Vec<_> = corpus
3423 .iter()
3424 .map(|t| scorer.score("git co", t))
3425 .filter(|r| r.score > 0.0)
3426 .collect();
3427 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3428 std::hint::black_box(&results);
3429 });
3430 eprintln!(
3431 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3432 stats.samples,
3433 stats.p50_us,
3434 stats.p95_us,
3435 stats.p99_us,
3436 stats.mean_us,
3437 stats.variance_us
3438 );
3439 let budget = coverage_budget_us(CORPUS_1000_BUDGET_US);
3440 assert!(
3441 stats.p95_us <= budget,
3442 "1000-item corpus p95 = {}µs exceeds budget {}µs",
3443 stats.p95_us,
3444 budget,
3445 );
3446 }
3447
3448 #[test]
3449 fn perf_corpus_5000_under_budget() {
3450 let scorer = BayesianScorer::fast();
3451 let corpus = make_corpus(5_000);
3452 let stats = measure_stats_us(10, || {
3453 let mut results: Vec<_> = corpus
3454 .iter()
3455 .map(|t| scorer.score("git co", t))
3456 .filter(|r| r.score > 0.0)
3457 .collect();
3458 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3459 std::hint::black_box(&results);
3460 });
3461 eprintln!(
3462 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_5000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3463 stats.samples,
3464 stats.p50_us,
3465 stats.p95_us,
3466 stats.p99_us,
3467 stats.mean_us,
3468 stats.variance_us
3469 );
3470 let budget = coverage_budget_us(CORPUS_5000_BUDGET_US);
3471 assert!(
3472 stats.p95_us <= budget,
3473 "5000-item corpus p95 = {}µs exceeds budget {}µs",
3474 stats.p95_us,
3475 budget,
3476 );
3477 }
3478
3479 #[test]
3480 fn perf_incremental_7key_100_under_budget() {
3481 let corpus = make_corpus(100);
3482 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3483 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3484
3485 let stats = measure_stats_us(30, || {
3486 let mut inc = IncrementalScorer::new();
3487 for query in &queries {
3488 let results = inc.score_corpus(query, &corpus_refs, None);
3489 std::hint::black_box(&results);
3490 }
3491 });
3492 eprintln!(
3493 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3494 stats.samples,
3495 stats.p50_us,
3496 stats.p95_us,
3497 stats.p99_us,
3498 stats.mean_us,
3499 stats.variance_us
3500 );
3501 let budget = coverage_budget_us(INCREMENTAL_7KEY_100_BUDGET_US);
3502 assert!(
3503 stats.p95_us <= budget,
3504 "Incremental 7-key 100-item p95 = {}µs exceeds budget {}µs",
3505 stats.p95_us,
3506 budget,
3507 );
3508 }
3509
3510 #[test]
3511 fn perf_incremental_7key_1000_under_budget() {
3512 let corpus = make_corpus(1_000);
3513 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3514 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3515
3516 let stats = measure_stats_us(10, || {
3517 let mut inc = IncrementalScorer::new();
3518 for query in &queries {
3519 let results = inc.score_corpus(query, &corpus_refs, None);
3520 std::hint::black_box(&results);
3521 }
3522 });
3523 eprintln!(
3524 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3525 stats.samples,
3526 stats.p50_us,
3527 stats.p95_us,
3528 stats.p99_us,
3529 stats.mean_us,
3530 stats.variance_us
3531 );
3532 let budget = coverage_budget_us(INCREMENTAL_7KEY_1000_BUDGET_US);
3533 assert!(
3534 stats.p95_us <= budget,
3535 "Incremental 7-key 1000-item p95 = {}µs exceeds budget {}µs",
3536 stats.p95_us,
3537 budget,
3538 );
3539 }
3540
3541 #[test]
3542 fn perf_incremental_faster_than_naive() {
3543 let corpus = make_corpus(100);
3544 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3545 let scorer = BayesianScorer::fast();
3546 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3547
3548 let naive_stats = measure_stats_us(30, || {
3549 for query in &queries {
3550 let mut results: Vec<_> = corpus
3551 .iter()
3552 .map(|t| scorer.score(query, t))
3553 .filter(|r| r.score > 0.0)
3554 .collect();
3555 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3556 std::hint::black_box(&results);
3557 }
3558 });
3559
3560 let inc_stats = measure_stats_us(30, || {
3561 let mut inc = IncrementalScorer::new();
3562 for query in &queries {
3563 let results = inc.score_corpus(query, &corpus_refs, None);
3564 std::hint::black_box(&results);
3565 }
3566 });
3567 eprintln!(
3568 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_vs_naive\",\"samples\":{},\"naive_p50_us\":{},\"naive_p95_us\":{},\"inc_p50_us\":{},\"inc_p95_us\":{}}}",
3569 naive_stats.samples,
3570 naive_stats.p50_us,
3571 naive_stats.p95_us,
3572 inc_stats.p50_us,
3573 inc_stats.p95_us
3574 );
3575
3576 assert!(
3579 inc_stats.p50_us <= naive_stats.p50_us * 2 + 50, "Incremental p50 = {}µs is >2x slower than naive p50 = {}µs",
3581 inc_stats.p50_us,
3582 naive_stats.p50_us,
3583 );
3584 }
3585
3586 fn scaling_ratio_us(p50_small_us: u64, p50_large_us: u64) -> f64 {
3587 if p50_small_us > 0 {
3588 p50_large_us as f64 / p50_small_us as f64
3589 } else {
3590 0.0
3591 }
3592 }
3593
3594 #[test]
3595 fn scaling_ratio_handles_zero_divisor() {
3596 assert_eq!(scaling_ratio_us(0, 123), 0.0);
3597 assert_eq!(scaling_ratio_us(10, 25), 2.5);
3598 }
3599
3600 #[test]
3601 fn perf_scaling_sublinear() {
3602 let scorer = BayesianScorer::fast();
3603 let corpus_100 = make_corpus(100);
3604 let corpus_1000 = make_corpus(1_000);
3605
3606 let stats_100 = measure_stats_us(30, || {
3607 let mut results: Vec<_> = corpus_100
3608 .iter()
3609 .map(|t| scorer.score("git", t))
3610 .filter(|r| r.score > 0.0)
3611 .collect();
3612 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3613 std::hint::black_box(&results);
3614 });
3615
3616 let stats_1000 = measure_stats_us(20, || {
3617 let mut results: Vec<_> = corpus_1000
3618 .iter()
3619 .map(|t| scorer.score("git", t))
3620 .filter(|r| r.score > 0.0)
3621 .collect();
3622 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3623 std::hint::black_box(&results);
3624 });
3625 eprintln!(
3626 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_scaling\",\"samples_100\":{},\"p50_100_us\":{},\"samples_1000\":{},\"p50_1000_us\":{}}}",
3627 stats_100.samples, stats_100.p50_us, stats_1000.samples, stats_1000.p50_us
3628 );
3629
3630 let ratio = scaling_ratio_us(stats_100.p50_us, stats_1000.p50_us);
3633 assert!(
3634 ratio < 25.0,
3635 "1000/100 scaling ratio = {:.1}x exceeds 25x threshold (100: {}µs, 1000: {}µs)",
3636 ratio,
3637 stats_100.p50_us,
3638 stats_1000.p50_us,
3639 );
3640 }
3641}