1#![forbid(unsafe_code)]
2
3use std::fmt;
47
48#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
54pub enum MatchType {
55 NoMatch,
57 Fuzzy,
59 Substring,
61 WordStart,
63 Prefix,
65 Exact,
67}
68
69impl MatchType {
70 pub fn prior_odds(self) -> f64 {
77 match self {
78 Self::Exact => 99.0, Self::Prefix => 9.0, Self::WordStart => 4.0, Self::Substring => 2.0, Self::Fuzzy => 0.333, Self::NoMatch => 0.0, }
85 }
86
87 pub fn description(self) -> &'static str {
89 match self {
90 Self::Exact => "exact match",
91 Self::Prefix => "prefix match",
92 Self::WordStart => "word-start match",
93 Self::Substring => "contiguous substring",
94 Self::Fuzzy => "fuzzy match",
95 Self::NoMatch => "no match",
96 }
97 }
98}
99
100#[derive(Debug, Clone)]
106pub struct EvidenceEntry {
107 pub kind: EvidenceKind,
109 pub bayes_factor: f64,
112 pub description: EvidenceDescription,
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub enum EvidenceKind {
119 MatchType,
121 WordBoundary,
123 Position,
125 GapPenalty,
127 TagMatch,
129 TitleLength,
131}
132
133impl fmt::Display for EvidenceEntry {
134 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135 let direction = if self.bayes_factor > 1.0 {
136 "supports"
137 } else if self.bayes_factor < 1.0 {
138 "opposes"
139 } else {
140 "neutral"
141 };
142 write!(
143 f,
144 "{:?}: BF={:.2} ({}) - {}",
145 self.kind, self.bayes_factor, direction, self.description
146 )
147 }
148}
149
150#[derive(Debug, Clone)]
152pub enum EvidenceDescription {
153 Static(&'static str),
154 TitleLengthChars { len: usize },
155 FirstMatchPos { pos: usize },
156 WordBoundaryCount { count: usize },
157 GapTotal { total: usize },
158 CoveragePercent { percent: f64 },
159}
160
161impl fmt::Display for EvidenceDescription {
162 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
163 match self {
164 Self::Static(msg) => write!(f, "{msg}"),
165 Self::TitleLengthChars { len } => write!(f, "title length {} chars", len),
166 Self::FirstMatchPos { pos } => write!(f, "first match at position {}", pos),
167 Self::WordBoundaryCount { count } => write!(f, "{} word boundary matches", count),
168 Self::GapTotal { total } => write!(f, "total gap of {} characters", total),
169 Self::CoveragePercent { percent } => write!(f, "query covers {:.0}% of title", percent),
170 }
171 }
172}
173
174#[derive(Debug, Clone, Default)]
183pub struct EvidenceLedger {
184 entries: Vec<EvidenceEntry>,
185}
186
187impl EvidenceLedger {
188 pub fn new() -> Self {
190 Self::default()
191 }
192
193 pub fn add(&mut self, kind: EvidenceKind, bayes_factor: f64, description: EvidenceDescription) {
195 self.entries.push(EvidenceEntry {
196 kind,
197 bayes_factor,
198 description,
199 });
200 }
201
202 pub fn entries(&self) -> &[EvidenceEntry] {
204 &self.entries
205 }
206
207 pub fn combined_bayes_factor(&self) -> f64 {
209 self.entries.iter().map(|e| e.bayes_factor).product()
210 }
211
212 #[must_use = "use the prior odds (if any) for diagnostics"]
214 pub fn prior_odds(&self) -> Option<f64> {
215 self.entries
216 .iter()
217 .find(|e| e.kind == EvidenceKind::MatchType)
218 .map(|e| e.bayes_factor)
219 }
220
221 pub fn posterior_probability(&self) -> f64 {
226 let prior = self.prior_odds().unwrap_or(1.0);
227 let bf: f64 = self
229 .entries
230 .iter()
231 .filter(|e| e.kind != EvidenceKind::MatchType)
232 .map(|e| e.bayes_factor)
233 .product();
234
235 let posterior_odds = prior * bf;
236 if posterior_odds.is_infinite() {
237 1.0
238 } else {
239 posterior_odds / (1.0 + posterior_odds)
240 }
241 }
242}
243
244impl EvidenceLedger {
245 #[must_use]
247 pub fn to_jsonl(&self) -> String {
248 let entries_json: Vec<String> = self
249 .entries
250 .iter()
251 .map(|e| {
252 format!(
253 r#"{{"kind":"{:?}","bf":{:.4},"desc":"{}"}}"#,
254 e.kind, e.bayes_factor, e.description
255 )
256 })
257 .collect();
258 format!(
259 r#"{{"schema":"palette-scoring-v1","entries":[{}],"combined_bf":{:.6},"posterior_prob":{:.6}}}"#,
260 entries_json.join(","),
261 self.combined_bayes_factor(),
262 self.posterior_probability(),
263 )
264 }
265}
266
267impl fmt::Display for EvidenceLedger {
268 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
269 writeln!(f, "Evidence Ledger:")?;
270 for entry in &self.entries {
271 writeln!(f, " {}", entry)?;
272 }
273 writeln!(f, " Combined BF: {:.3}", self.combined_bayes_factor())?;
274 writeln!(f, " Posterior P: {:.3}", self.posterior_probability())?;
275 Ok(())
276 }
277}
278
279#[derive(Debug, Clone)]
285pub struct MatchResult {
286 pub score: f64,
288 pub match_type: MatchType,
290 pub match_positions: Vec<usize>,
292 pub evidence: EvidenceLedger,
294}
295
296impl MatchResult {
297 pub fn no_match() -> Self {
299 let mut evidence = EvidenceLedger::new();
300 evidence.add(
301 EvidenceKind::MatchType,
302 0.0,
303 EvidenceDescription::Static("no matching characters found"),
304 );
305 Self {
306 score: 0.0,
307 match_type: MatchType::NoMatch,
308 match_positions: Vec::new(),
309 evidence,
310 }
311 }
312}
313
314#[derive(Debug, Clone, Default)]
323pub struct BayesianScorer {
324 pub track_evidence: bool,
326}
327
328impl BayesianScorer {
329 pub fn new() -> Self {
331 Self {
332 track_evidence: true,
333 }
334 }
335
336 pub fn fast() -> Self {
338 Self {
339 track_evidence: false,
340 }
341 }
342
343 pub fn score(&self, query: &str, title: &str) -> MatchResult {
347 if query.len() > title.len() {
349 return MatchResult::no_match();
350 }
351
352 if query.is_empty() {
354 return self.score_empty_query(title);
355 }
356
357 let query_lower = query.to_lowercase();
359 let title_lower = title.to_lowercase();
360
361 let (match_type, positions) = self.detect_match_type(&query_lower, &title_lower, title);
363
364 if match_type == MatchType::NoMatch {
365 return MatchResult::no_match();
366 }
367
368 self.compute_score(match_type, positions, &query_lower, title)
370 }
371
372 pub fn score_with_query_lower(
376 &self,
377 query: &str,
378 query_lower: &str,
379 title: &str,
380 ) -> MatchResult {
381 let title_lower = title.to_lowercase();
382 self.score_with_lowered_title(query, query_lower, title, &title_lower)
383 }
384
385 pub fn score_with_lowered_title(
389 &self,
390 query: &str,
391 query_lower: &str,
392 title: &str,
393 title_lower: &str,
394 ) -> MatchResult {
395 self.score_with_lowered_title_and_words(query, query_lower, title, title_lower, None)
396 }
397
398 pub fn score_with_lowered_title_and_words(
400 &self,
401 query: &str,
402 query_lower: &str,
403 title: &str,
404 title_lower: &str,
405 word_starts: Option<&[usize]>,
406 ) -> MatchResult {
407 if query.len() > title.len() {
409 return MatchResult::no_match();
410 }
411
412 if query.is_empty() {
414 return self.score_empty_query(title);
415 }
416
417 let (match_type, positions) =
419 self.detect_match_type_with_words(query_lower, title_lower, title, word_starts);
420
421 if match_type == MatchType::NoMatch {
422 return MatchResult::no_match();
423 }
424
425 self.compute_score(match_type, positions, query_lower, title)
427 }
428
429 pub fn score_with_tags(&self, query: &str, title: &str, tags: &[&str]) -> MatchResult {
431 let mut result = self.score(query, title);
432
433 let query_lower = query.to_lowercase();
435 let tag_match = tags
436 .iter()
437 .any(|tag| tag.to_lowercase().contains(&query_lower));
438
439 if tag_match && result.match_type != MatchType::NoMatch {
440 if self.track_evidence {
442 result.evidence.add(
443 EvidenceKind::TagMatch,
444 3.0, EvidenceDescription::Static("query matches tag"),
446 );
447 result.score = result.evidence.posterior_probability();
448 } else if (0.0..1.0).contains(&result.score) {
449 let odds = result.score / (1.0 - result.score);
450 let boosted = odds * 3.0;
451 result.score = boosted / (1.0 + boosted);
452 }
453 }
454
455 result
456 }
457
458 fn score_empty_query(&self, title: &str) -> MatchResult {
460 let length_factor = 1.0 + (1.0 / (title.len() as f64 + 1.0)) * 0.1;
462 if self.track_evidence {
463 let mut evidence = EvidenceLedger::new();
464 evidence.add(
465 EvidenceKind::MatchType,
466 1.0, EvidenceDescription::Static("empty query matches all"),
468 );
469 evidence.add(
470 EvidenceKind::TitleLength,
471 length_factor,
472 EvidenceDescription::TitleLengthChars { len: title.len() },
473 );
474 let score = evidence.posterior_probability();
475 MatchResult {
476 score,
477 match_type: MatchType::Fuzzy, match_positions: Vec::new(),
479 evidence,
480 }
481 } else {
482 let odds = length_factor;
483 let score = odds / (1.0 + odds);
484 MatchResult {
485 score,
486 match_type: MatchType::Fuzzy,
487 match_positions: Vec::new(),
488 evidence: EvidenceLedger::new(),
489 }
490 }
491 }
492
493 fn detect_match_type(
495 &self,
496 query_lower: &str,
497 title_lower: &str,
498 title: &str,
499 ) -> (MatchType, Vec<usize>) {
500 self.detect_match_type_with_words(query_lower, title_lower, title, None)
501 }
502
503 fn detect_match_type_with_words(
505 &self,
506 query_lower: &str,
507 title_lower: &str,
508 title: &str,
509 word_starts: Option<&[usize]>,
510 ) -> (MatchType, Vec<usize>) {
511 if query_lower.is_ascii() && title_lower.is_ascii() {
512 return self.detect_match_type_ascii(query_lower, title_lower, word_starts);
513 }
514
515 if query_lower == title_lower {
517 let positions: Vec<usize> = (0..title.len()).collect();
518 return (MatchType::Exact, positions);
519 }
520
521 if title_lower.starts_with(query_lower) {
523 let positions: Vec<usize> = (0..query_lower.len()).collect();
524 return (MatchType::Prefix, positions);
525 }
526
527 if let Some(positions) = self.word_start_match(query_lower, title_lower) {
529 return (MatchType::WordStart, positions);
530 }
531
532 if let Some(start) = title_lower.find(query_lower) {
534 let positions: Vec<usize> = (start..start + query_lower.len()).collect();
535 return (MatchType::Substring, positions);
536 }
537
538 if let Some(positions) = self.fuzzy_match(query_lower, title_lower) {
540 return (MatchType::Fuzzy, positions);
541 }
542
543 (MatchType::NoMatch, Vec::new())
544 }
545
546 fn detect_match_type_ascii(
548 &self,
549 query_lower: &str,
550 title_lower: &str,
551 word_starts: Option<&[usize]>,
552 ) -> (MatchType, Vec<usize>) {
553 let query_bytes = query_lower.as_bytes();
554 let title_bytes = title_lower.as_bytes();
555
556 if query_bytes == title_bytes {
557 let positions: Vec<usize> = (0..title_bytes.len()).collect();
558 return (MatchType::Exact, positions);
559 }
560
561 if title_bytes.starts_with(query_bytes) {
562 let positions: Vec<usize> = (0..query_bytes.len()).collect();
563 return (MatchType::Prefix, positions);
564 }
565
566 if let Some(positions) = self.word_start_match_ascii(query_bytes, title_bytes, word_starts)
567 {
568 return (MatchType::WordStart, positions);
569 }
570
571 if let Some(start) = title_lower.find(query_lower) {
572 let positions: Vec<usize> = (start..start + query_bytes.len()).collect();
573 return (MatchType::Substring, positions);
574 }
575
576 if let Some(positions) = self.fuzzy_match_ascii(query_bytes, title_bytes) {
577 return (MatchType::Fuzzy, positions);
578 }
579
580 (MatchType::NoMatch, Vec::new())
581 }
582
583 fn word_start_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
585 let mut positions = Vec::new();
586 let mut query_chars = query.chars().peekable();
587
588 let title_bytes = title.as_bytes();
589 for (i, c) in title.char_indices() {
590 let is_word_start = i == 0 || {
592 let prev = title_bytes
593 .get(i.saturating_sub(1))
594 .copied()
595 .unwrap_or(b' ');
596 prev == b' ' || prev == b'-' || prev == b'_'
597 };
598
599 if is_word_start
600 && let Some(&qc) = query_chars.peek()
601 && c == qc
602 {
603 positions.push(i);
604 query_chars.next();
605 }
606 }
607
608 if query_chars.peek().is_none() {
609 Some(positions)
610 } else {
611 None
612 }
613 }
614
615 fn word_start_match_ascii(
617 &self,
618 query: &[u8],
619 title: &[u8],
620 word_starts: Option<&[usize]>,
621 ) -> Option<Vec<usize>> {
622 let mut positions = Vec::new();
623 let mut query_idx = 0;
624 if query.is_empty() {
625 return Some(positions);
626 }
627
628 if let Some(starts) = word_starts {
629 for &pos in starts {
630 if pos >= title.len() {
631 continue;
632 }
633 if title[pos] == query[query_idx] {
634 positions.push(pos);
635 query_idx += 1;
636 if query_idx == query.len() {
637 return Some(positions);
638 }
639 }
640 }
641 } else {
642 for (i, &b) in title.iter().enumerate() {
643 let is_word_start = i == 0 || matches!(title[i - 1], b' ' | b'-' | b'_');
644 if is_word_start && b == query[query_idx] {
645 positions.push(i);
646 query_idx += 1;
647 if query_idx == query.len() {
648 return Some(positions);
649 }
650 }
651 }
652 }
653
654 None
655 }
656
657 fn fuzzy_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
659 let mut positions = Vec::new();
660 let mut query_chars = query.chars().peekable();
661
662 for (i, c) in title.char_indices() {
663 if let Some(&qc) = query_chars.peek()
664 && c == qc
665 {
666 positions.push(i);
667 query_chars.next();
668 }
669 }
670
671 if query_chars.peek().is_none() {
672 Some(positions)
673 } else {
674 None
675 }
676 }
677
678 fn fuzzy_match_ascii(&self, query: &[u8], title: &[u8]) -> Option<Vec<usize>> {
680 let mut positions = Vec::new();
681 let mut query_idx = 0;
682 if query.is_empty() {
683 return Some(positions);
684 }
685
686 for (i, &b) in title.iter().enumerate() {
687 if b == query[query_idx] {
688 positions.push(i);
689 query_idx += 1;
690 if query_idx == query.len() {
691 return Some(positions);
692 }
693 }
694 }
695
696 None
697 }
698
699 fn compute_score(
701 &self,
702 match_type: MatchType,
703 positions: Vec<usize>,
704 query: &str,
705 title: &str,
706 ) -> MatchResult {
707 let positions_ref = positions.as_slice();
708 if !self.track_evidence {
709 let mut combined_bf = match_type.prior_odds();
710
711 if let Some(&first_pos) = positions_ref.first() {
712 let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
713 combined_bf *= position_factor;
714 }
715
716 let word_boundary_count = self.count_word_boundaries(positions_ref, title);
717 if word_boundary_count > 0 {
718 let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
719 combined_bf *= boundary_factor;
720 }
721
722 if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
723 let total_gap = self.total_gap(positions_ref);
724 let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
725 combined_bf *= gap_factor;
726 }
727
728 let title_len = title.len().max(1);
729 let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
730 combined_bf *= length_factor;
731
732 let score = combined_bf / (1.0 + combined_bf);
733 return MatchResult {
734 score,
735 match_type,
736 match_positions: positions,
737 evidence: EvidenceLedger::new(),
738 };
739 }
740
741 let mut evidence = EvidenceLedger::new();
742
743 let prior_odds = match_type.prior_odds();
745 evidence.add(
746 EvidenceKind::MatchType,
747 prior_odds,
748 EvidenceDescription::Static(match_type.description()),
749 );
750
751 if let Some(&first_pos) = positions_ref.first() {
753 let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
754 evidence.add(
755 EvidenceKind::Position,
756 position_factor,
757 EvidenceDescription::FirstMatchPos { pos: first_pos },
758 );
759 }
760
761 let word_boundary_count = self.count_word_boundaries(positions_ref, title);
763 if word_boundary_count > 0 {
764 let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
765 evidence.add(
766 EvidenceKind::WordBoundary,
767 boundary_factor,
768 EvidenceDescription::WordBoundaryCount {
769 count: word_boundary_count,
770 },
771 );
772 }
773
774 if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
776 let total_gap = self.total_gap(positions_ref);
777 let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
778 evidence.add(
779 EvidenceKind::GapPenalty,
780 gap_factor,
781 EvidenceDescription::GapTotal { total: total_gap },
782 );
783 }
784
785 let title_len = title.len().max(1);
787 let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
788 evidence.add(
789 EvidenceKind::TitleLength,
790 length_factor,
791 EvidenceDescription::CoveragePercent {
792 percent: (query.len() as f64 / title_len as f64) * 100.0,
793 },
794 );
795
796 let score = evidence.posterior_probability();
797
798 MatchResult {
799 score,
800 match_type,
801 match_positions: positions,
802 evidence,
803 }
804 }
805
806 fn count_word_boundaries(&self, positions: &[usize], title: &str) -> usize {
808 let title_bytes = title.as_bytes();
809 positions
810 .iter()
811 .filter(|&&pos| {
812 pos == 0 || {
813 let prev = title_bytes
814 .get(pos.saturating_sub(1))
815 .copied()
816 .unwrap_or(b' ');
817 prev == b' ' || prev == b'-' || prev == b'_'
818 }
819 })
820 .count()
821 }
822
823 fn total_gap(&self, positions: &[usize]) -> usize {
825 if positions.len() < 2 {
826 return 0;
827 }
828 positions
829 .windows(2)
830 .map(|w| w[1].saturating_sub(w[0]).saturating_sub(1))
831 .sum()
832 }
833}
834
835#[derive(Debug, Clone)]
851pub struct RankConfidence {
852 pub confidence: f64,
856
857 pub gap_to_next: f64,
859
860 pub stability: RankStability,
862}
863
864#[derive(Debug, Clone, Copy, PartialEq, Eq)]
866pub enum RankStability {
867 Stable,
869 Marginal,
871 Unstable,
873}
874
875impl RankStability {
876 pub fn label(self) -> &'static str {
878 match self {
879 Self::Stable => "stable",
880 Self::Marginal => "marginal",
881 Self::Unstable => "unstable",
882 }
883 }
884}
885
886#[derive(Debug, Clone)]
888pub struct RankedResults {
889 pub items: Vec<RankedItem>,
891
892 pub summary: RankingSummary,
894}
895
896#[derive(Debug, Clone)]
898pub struct RankedItem {
899 pub original_index: usize,
901
902 pub result: MatchResult,
904
905 pub rank_confidence: RankConfidence,
907}
908
909#[derive(Debug, Clone)]
911pub struct RankingSummary {
912 pub count: usize,
914
915 pub stable_count: usize,
917
918 pub tie_group_count: usize,
920
921 pub median_gap: f64,
923}
924
925#[derive(Debug, Clone)]
953pub struct ConformalRanker {
954 pub tie_epsilon: f64,
956
957 pub stable_threshold: f64,
959
960 pub marginal_threshold: f64,
962}
963
964impl Default for ConformalRanker {
965 fn default() -> Self {
966 Self {
967 tie_epsilon: 1e-9,
968 stable_threshold: 0.7,
969 marginal_threshold: 0.3,
970 }
971 }
972}
973
974impl ConformalRanker {
975 pub fn new() -> Self {
977 Self::default()
978 }
979
980 pub fn rank(&self, results: Vec<MatchResult>) -> RankedResults {
985 let count = results.len();
986
987 if count == 0 {
988 return RankedResults {
989 items: Vec::new(),
990 summary: RankingSummary {
991 count: 0,
992 stable_count: 0,
993 tie_group_count: 0,
994 median_gap: 0.0,
995 },
996 };
997 }
998
999 let mut indexed: Vec<(usize, MatchResult)> = results.into_iter().enumerate().collect();
1003 indexed.sort_by(|a, b| {
1004 b.1.score
1005 .partial_cmp(&a.1.score)
1006 .unwrap_or(std::cmp::Ordering::Equal)
1007 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1008 });
1009
1010 let gaps: Vec<f64> = if count > 1 {
1012 indexed
1013 .windows(2)
1014 .map(|w| (w[0].1.score - w[1].1.score).max(0.0))
1015 .collect()
1016 } else {
1017 Vec::new()
1018 };
1019
1020 let mut sorted_gaps = gaps.clone();
1022 sorted_gaps.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1023
1024 let mut items: Vec<RankedItem> = Vec::with_capacity(count);
1026 let mut stable_count = 0;
1027 let mut tie_group_count = 0;
1028 let mut in_tie_group = false;
1029
1030 for (rank, (orig_idx, result)) in indexed.into_iter().enumerate() {
1031 let gap_to_next = gaps.get(rank).copied().unwrap_or(0.0);
1032
1033 let confidence = if sorted_gaps.is_empty() {
1035 1.0
1037 } else {
1038 let leq_count =
1039 sorted_gaps.partition_point(|&g| g <= gap_to_next + self.tie_epsilon * 0.5);
1040 leq_count as f64 / sorted_gaps.len() as f64
1041 };
1042
1043 let is_tie = gap_to_next < self.tie_epsilon;
1044 let stability = if is_tie {
1045 if !in_tie_group {
1046 tie_group_count += 1;
1047 in_tie_group = true;
1048 }
1049 RankStability::Unstable
1050 } else {
1051 in_tie_group = false;
1052 if confidence >= self.stable_threshold {
1053 stable_count += 1;
1054 RankStability::Stable
1055 } else if confidence >= self.marginal_threshold {
1056 RankStability::Marginal
1057 } else {
1058 RankStability::Unstable
1059 }
1060 };
1061
1062 items.push(RankedItem {
1063 original_index: orig_idx,
1064 result,
1065 rank_confidence: RankConfidence {
1066 confidence,
1067 gap_to_next,
1068 stability,
1069 },
1070 });
1071 }
1072
1073 let median_gap = if sorted_gaps.is_empty() {
1074 0.0
1075 } else {
1076 let mid = sorted_gaps.len() / 2;
1077 if sorted_gaps.len().is_multiple_of(2) {
1078 (sorted_gaps[mid - 1] + sorted_gaps[mid]) / 2.0
1079 } else {
1080 sorted_gaps[mid]
1081 }
1082 };
1083
1084 RankedResults {
1085 items,
1086 summary: RankingSummary {
1087 count,
1088 stable_count,
1089 tie_group_count,
1090 median_gap,
1091 },
1092 }
1093 }
1094
1095 pub fn rank_top_k(&self, results: Vec<MatchResult>, k: usize) -> RankedResults {
1100 let mut ranked = self.rank(results);
1101 ranked.items.truncate(k);
1102 ranked.summary.stable_count = ranked
1104 .items
1105 .iter()
1106 .filter(|item| item.rank_confidence.stability == RankStability::Stable)
1107 .count();
1108 ranked
1109 }
1110}
1111
1112impl fmt::Display for RankConfidence {
1113 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1114 write!(
1115 f,
1116 "confidence={:.3} gap={:.4} ({})",
1117 self.confidence,
1118 self.gap_to_next,
1119 self.stability.label()
1120 )
1121 }
1122}
1123
1124impl fmt::Display for RankingSummary {
1125 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1126 write!(
1127 f,
1128 "{} items, {} stable, {} tie groups, median gap {:.4}",
1129 self.count, self.stable_count, self.tie_group_count, self.median_gap
1130 )
1131 }
1132}
1133
1134#[derive(Debug, Clone)]
1140#[allow(dead_code)]
1141struct CachedEntry {
1142 corpus_index: usize,
1144}
1145
1146#[derive(Debug)]
1174pub struct IncrementalScorer {
1175 scorer: BayesianScorer,
1177 prev_query: String,
1179 cache: Vec<CachedEntry>,
1181 corpus_generation: u64,
1183 corpus_len: usize,
1185 stats: IncrementalStats,
1187}
1188
1189#[derive(Debug, Clone, Default)]
1191pub struct IncrementalStats {
1192 pub full_scans: u64,
1194 pub incremental_scans: u64,
1196 pub total_evaluated: u64,
1198 pub total_pruned: u64,
1200}
1201
1202impl IncrementalStats {
1203 pub fn prune_ratio(&self) -> f64 {
1205 let total = self.total_evaluated + self.total_pruned;
1206 if total == 0 {
1207 0.0
1208 } else {
1209 self.total_pruned as f64 / total as f64
1210 }
1211 }
1212}
1213
1214impl IncrementalScorer {
1215 pub fn new() -> Self {
1217 Self {
1218 scorer: BayesianScorer::fast(),
1219 prev_query: String::new(),
1220 cache: Vec::new(),
1221 corpus_generation: 0,
1222 corpus_len: 0,
1223 stats: IncrementalStats::default(),
1224 }
1225 }
1226
1227 pub fn with_scorer(scorer: BayesianScorer) -> Self {
1229 Self {
1230 scorer,
1231 prev_query: String::new(),
1232 cache: Vec::new(),
1233 corpus_generation: 0,
1234 corpus_len: 0,
1235 stats: IncrementalStats::default(),
1236 }
1237 }
1238
1239 pub fn invalidate(&mut self) {
1241 self.prev_query.clear();
1242 self.cache.clear();
1243 self.corpus_generation = self.corpus_generation.wrapping_add(1);
1244 }
1245
1246 pub fn stats(&self) -> &IncrementalStats {
1248 &self.stats
1249 }
1250
1251 pub fn score_corpus(
1263 &mut self,
1264 query: &str,
1265 corpus: &[&str],
1266 generation: Option<u64>,
1267 ) -> Vec<(usize, MatchResult)> {
1268 let generation_val = generation.unwrap_or(self.corpus_generation);
1270 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1271 self.invalidate();
1272 self.corpus_generation = generation_val;
1273 self.corpus_len = corpus.len();
1274 }
1275
1276 let can_prune = !self.prev_query.is_empty()
1278 && query.starts_with(&self.prev_query)
1279 && !self.cache.is_empty();
1280
1281 let query_lower = query.to_lowercase();
1282 let results = if can_prune {
1283 self.score_incremental(query, &query_lower, corpus)
1284 } else {
1285 self.score_full(query, &query_lower, corpus)
1286 };
1287
1288 self.prev_query.clear();
1290 self.prev_query.push_str(query);
1291 self.cache = results
1292 .iter()
1293 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1294 .collect();
1295
1296 results
1297 }
1298
1299 pub fn score_corpus_with_lowered(
1303 &mut self,
1304 query: &str,
1305 corpus: &[&str],
1306 corpus_lower: &[&str],
1307 generation: Option<u64>,
1308 ) -> Vec<(usize, MatchResult)> {
1309 debug_assert_eq!(
1310 corpus.len(),
1311 corpus_lower.len(),
1312 "corpus_lower must match corpus length"
1313 );
1314
1315 let generation_val = generation.unwrap_or(self.corpus_generation);
1317 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1318 self.invalidate();
1319 self.corpus_generation = generation_val;
1320 self.corpus_len = corpus.len();
1321 }
1322
1323 let can_prune = !self.prev_query.is_empty()
1325 && query.starts_with(&self.prev_query)
1326 && !self.cache.is_empty();
1327
1328 let query_lower = query.to_lowercase();
1329 let results = if can_prune {
1330 self.score_incremental_lowered(query, &query_lower, corpus, corpus_lower)
1331 } else {
1332 self.score_full_lowered(query, &query_lower, corpus, corpus_lower)
1333 };
1334
1335 self.prev_query.clear();
1337 self.prev_query.push_str(query);
1338 self.cache = results
1339 .iter()
1340 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1341 .collect();
1342
1343 results
1344 }
1345
1346 pub fn score_corpus_with_lowered_and_words(
1350 &mut self,
1351 query: &str,
1352 corpus: &[String],
1353 corpus_lower: &[String],
1354 word_starts: &[Vec<usize>],
1355 generation: Option<u64>,
1356 ) -> Vec<(usize, MatchResult)> {
1357 debug_assert_eq!(
1358 corpus.len(),
1359 corpus_lower.len(),
1360 "corpus_lower must match corpus length"
1361 );
1362 debug_assert_eq!(
1363 corpus.len(),
1364 word_starts.len(),
1365 "word_starts must match corpus length"
1366 );
1367
1368 let generation_val = generation.unwrap_or(self.corpus_generation);
1370 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1371 self.invalidate();
1372 self.corpus_generation = generation_val;
1373 self.corpus_len = corpus.len();
1374 }
1375
1376 let can_prune = !self.prev_query.is_empty()
1378 && query.starts_with(&self.prev_query)
1379 && !self.cache.is_empty();
1380
1381 let query_lower = query.to_lowercase();
1382 let results = if can_prune {
1383 self.score_incremental_lowered_with_words(
1384 query,
1385 &query_lower,
1386 corpus,
1387 corpus_lower,
1388 word_starts,
1389 )
1390 } else {
1391 self.score_full_lowered_with_words(
1392 query,
1393 &query_lower,
1394 corpus,
1395 corpus_lower,
1396 word_starts,
1397 )
1398 };
1399
1400 self.prev_query.clear();
1402 self.prev_query.push_str(query);
1403 self.cache = results
1404 .iter()
1405 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1406 .collect();
1407
1408 results
1409 }
1410
1411 fn score_full(
1413 &mut self,
1414 query: &str,
1415 query_lower: &str,
1416 corpus: &[&str],
1417 ) -> Vec<(usize, MatchResult)> {
1418 self.stats.full_scans += 1;
1419 self.stats.total_evaluated += corpus.len() as u64;
1420
1421 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1422 for (i, title) in corpus.iter().enumerate() {
1423 let result = self
1424 .scorer
1425 .score_with_query_lower(query, query_lower, title);
1426 if result.score > 0.0 {
1427 results.push((i, result));
1428 }
1429 }
1430
1431 results.sort_by(|a, b| {
1432 b.1.score
1433 .partial_cmp(&a.1.score)
1434 .unwrap_or(std::cmp::Ordering::Equal)
1435 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1436 });
1437
1438 results
1439 }
1440
1441 fn score_full_lowered(
1443 &mut self,
1444 query: &str,
1445 query_lower: &str,
1446 corpus: &[&str],
1447 corpus_lower: &[&str],
1448 ) -> Vec<(usize, MatchResult)> {
1449 self.stats.full_scans += 1;
1450 self.stats.total_evaluated += corpus.len() as u64;
1451
1452 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1453 for (i, (title, title_lower)) in corpus.iter().zip(corpus_lower.iter()).enumerate() {
1454 let result =
1455 self.scorer
1456 .score_with_lowered_title(query, query_lower, title, title_lower);
1457 if result.score > 0.0 {
1458 results.push((i, result));
1459 }
1460 }
1461
1462 results.sort_by(|a, b| {
1463 b.1.score
1464 .partial_cmp(&a.1.score)
1465 .unwrap_or(std::cmp::Ordering::Equal)
1466 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1467 });
1468
1469 results
1470 }
1471
1472 fn score_full_lowered_with_words(
1474 &mut self,
1475 query: &str,
1476 query_lower: &str,
1477 corpus: &[String],
1478 corpus_lower: &[String],
1479 word_starts: &[Vec<usize>],
1480 ) -> Vec<(usize, MatchResult)> {
1481 self.stats.full_scans += 1;
1482 self.stats.total_evaluated += corpus.len() as u64;
1483
1484 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1485 for (i, ((title, title_lower), starts)) in corpus
1486 .iter()
1487 .zip(corpus_lower.iter())
1488 .zip(word_starts.iter())
1489 .enumerate()
1490 {
1491 let result = self.scorer.score_with_lowered_title_and_words(
1492 query,
1493 query_lower,
1494 title,
1495 title_lower,
1496 Some(starts),
1497 );
1498 if result.score > 0.0 {
1499 results.push((i, result));
1500 }
1501 }
1502
1503 results.sort_by(|a, b| {
1504 b.1.score
1505 .partial_cmp(&a.1.score)
1506 .unwrap_or(std::cmp::Ordering::Equal)
1507 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1508 });
1509
1510 results
1511 }
1512 fn score_incremental(
1514 &mut self,
1515 query: &str,
1516 query_lower: &str,
1517 corpus: &[&str],
1518 ) -> Vec<(usize, MatchResult)> {
1519 self.stats.incremental_scans += 1;
1520
1521 let prev_match_count = self.cache.len();
1522 let pruned = corpus.len().saturating_sub(prev_match_count);
1523 self.stats.total_pruned += pruned as u64;
1524 self.stats.total_evaluated += prev_match_count as u64;
1525
1526 let mut results: Vec<(usize, MatchResult)> =
1527 Vec::with_capacity(self.cache.len().min(corpus.len()));
1528 for entry in &self.cache {
1529 if entry.corpus_index < corpus.len() {
1530 let result = self.scorer.score_with_query_lower(
1531 query,
1532 query_lower,
1533 corpus[entry.corpus_index],
1534 );
1535 if result.score > 0.0 {
1536 results.push((entry.corpus_index, result));
1537 }
1538 }
1539 }
1540
1541 results.sort_by(|a, b| {
1542 b.1.score
1543 .partial_cmp(&a.1.score)
1544 .unwrap_or(std::cmp::Ordering::Equal)
1545 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1546 });
1547
1548 results
1549 }
1550
1551 fn score_incremental_lowered(
1553 &mut self,
1554 query: &str,
1555 query_lower: &str,
1556 corpus: &[&str],
1557 corpus_lower: &[&str],
1558 ) -> Vec<(usize, MatchResult)> {
1559 self.stats.incremental_scans += 1;
1560
1561 let prev_match_count = self.cache.len();
1562 let pruned = corpus.len().saturating_sub(prev_match_count);
1563 self.stats.total_pruned += pruned as u64;
1564 self.stats.total_evaluated += prev_match_count as u64;
1565
1566 let mut results: Vec<(usize, MatchResult)> =
1567 Vec::with_capacity(self.cache.len().min(corpus.len()));
1568 for entry in &self.cache {
1569 if entry.corpus_index < corpus.len() {
1570 let title = corpus[entry.corpus_index];
1571 let title_lower = corpus_lower[entry.corpus_index];
1572 let result =
1573 self.scorer
1574 .score_with_lowered_title(query, query_lower, title, title_lower);
1575 if result.score > 0.0 {
1576 results.push((entry.corpus_index, result));
1577 }
1578 }
1579 }
1580
1581 results.sort_by(|a, b| {
1582 b.1.score
1583 .partial_cmp(&a.1.score)
1584 .unwrap_or(std::cmp::Ordering::Equal)
1585 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1586 });
1587
1588 results
1589 }
1590
1591 fn score_incremental_lowered_with_words(
1593 &mut self,
1594 query: &str,
1595 query_lower: &str,
1596 corpus: &[String],
1597 corpus_lower: &[String],
1598 word_starts: &[Vec<usize>],
1599 ) -> Vec<(usize, MatchResult)> {
1600 self.stats.incremental_scans += 1;
1601
1602 let prev_match_count = self.cache.len();
1603 let pruned = corpus.len().saturating_sub(prev_match_count);
1604 self.stats.total_pruned += pruned as u64;
1605 self.stats.total_evaluated += prev_match_count as u64;
1606
1607 let mut results: Vec<(usize, MatchResult)> =
1608 Vec::with_capacity(self.cache.len().min(corpus.len()));
1609 for entry in &self.cache {
1610 if entry.corpus_index < corpus.len() {
1611 let title = &corpus[entry.corpus_index];
1612 let title_lower = &corpus_lower[entry.corpus_index];
1613 let starts = &word_starts[entry.corpus_index];
1614 let result = self.scorer.score_with_lowered_title_and_words(
1615 query,
1616 query_lower,
1617 title,
1618 title_lower,
1619 Some(starts),
1620 );
1621 if result.score > 0.0 {
1622 results.push((entry.corpus_index, result));
1623 }
1624 }
1625 }
1626
1627 results.sort_by(|a, b| {
1628 b.1.score
1629 .partial_cmp(&a.1.score)
1630 .unwrap_or(std::cmp::Ordering::Equal)
1631 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1632 });
1633
1634 results
1635 }
1636}
1637
1638impl Default for IncrementalScorer {
1639 fn default() -> Self {
1640 Self::new()
1641 }
1642}
1643
1644#[cfg(test)]
1649mod tests {
1650 use super::*;
1651
1652 #[test]
1655 fn exact_match_highest_score() {
1656 let scorer = BayesianScorer::new();
1657 let result = scorer.score("Settings", "Settings");
1658 assert_eq!(result.match_type, MatchType::Exact);
1659 assert!(result.score > 0.95, "Exact match should score > 0.95");
1660 }
1661
1662 #[test]
1663 fn prefix_match_high_score() {
1664 let scorer = BayesianScorer::new();
1665 let result = scorer.score("set", "Settings");
1666 assert_eq!(result.match_type, MatchType::Prefix);
1667 assert!(result.score > 0.85, "Prefix match should score > 0.85");
1668 }
1669
1670 #[test]
1671 fn word_start_match_score() {
1672 let scorer = BayesianScorer::new();
1673 let result = scorer.score("gd", "Go Dashboard");
1674 assert_eq!(result.match_type, MatchType::WordStart);
1675 assert!(result.score > 0.75, "Word-start should score > 0.75");
1676 }
1677
1678 #[test]
1679 fn substring_match_moderate_score() {
1680 let scorer = BayesianScorer::new();
1681 let result = scorer.score("set", "Asset Manager");
1682 assert_eq!(result.match_type, MatchType::Substring);
1683 assert!(result.score > 0.5, "Substring should score > 0.5");
1684 }
1685
1686 #[test]
1687 fn fuzzy_match_low_score() {
1688 let scorer = BayesianScorer::new();
1689 let result = scorer.score("stg", "Settings");
1690 assert_eq!(result.match_type, MatchType::Fuzzy);
1691 assert!(result.score > 0.2, "Fuzzy should score > 0.2");
1692 assert!(result.score < 0.7, "Fuzzy should score < 0.7");
1693 }
1694
1695 #[test]
1696 fn no_match_returns_zero() {
1697 let scorer = BayesianScorer::new();
1698 let result = scorer.score("xyz", "Settings");
1699 assert_eq!(result.match_type, MatchType::NoMatch);
1700 assert_eq!(result.score, 0.0);
1701 }
1702
1703 #[test]
1706 fn word_start_match_ascii_empty_query_matches() {
1707 let scorer = BayesianScorer::new();
1708 let positions = scorer
1709 .word_start_match_ascii(b"", b"go dashboard", None)
1710 .expect("empty query should match");
1711 assert!(positions.is_empty());
1712 }
1713
1714 #[test]
1715 fn word_start_match_ascii_skips_out_of_bounds_precomputed_positions() {
1716 let scorer = BayesianScorer::new();
1717 let starts = [999, 0, 3];
1718 let positions = scorer
1719 .word_start_match_ascii(b"gd", b"go dashboard", Some(&starts))
1720 .expect("should still match despite out-of-bounds precomputed starts");
1721 assert_eq!(positions, vec![0, 3]);
1722 }
1723
1724 #[test]
1725 fn fuzzy_match_ascii_empty_query_matches() {
1726 let scorer = BayesianScorer::new();
1727 let positions = scorer
1728 .fuzzy_match_ascii(b"", b"settings")
1729 .expect("empty query should match");
1730 assert!(positions.is_empty());
1731 }
1732
1733 #[test]
1736 fn score_bounded() {
1737 let scorer = BayesianScorer::new();
1738 let test_cases = [
1739 ("a", "abcdefghijklmnop"),
1740 ("full", "full"),
1741 ("", "anything"),
1742 ("xyz", "abc"),
1743 ("stg", "Settings"),
1744 ];
1745
1746 for (query, title) in test_cases {
1747 let result = scorer.score(query, title);
1748 assert!(
1749 result.score >= 0.0 && result.score <= 1.0,
1750 "Score for ({}, {}) = {} not in [0, 1]",
1751 query,
1752 title,
1753 result.score
1754 );
1755 }
1756 }
1757
1758 #[test]
1759 fn score_deterministic() {
1760 let scorer = BayesianScorer::new();
1761 let result1 = scorer.score("nav", "Navigation");
1762 let result2 = scorer.score("nav", "Navigation");
1763 assert!(
1764 (result1.score - result2.score).abs() < f64::EPSILON,
1765 "Same input should produce identical scores"
1766 );
1767 }
1768
1769 #[test]
1770 fn tiebreak_shorter_first() {
1771 let scorer = BayesianScorer::new();
1772 let short = scorer.score("set", "Set");
1773 let long = scorer.score("set", "Settings");
1774 assert!(
1775 short.score >= long.score,
1776 "Shorter title should score >= longer: {} vs {}",
1777 short.score,
1778 long.score
1779 );
1780 }
1781
1782 #[test]
1785 fn case_insensitive() {
1786 let scorer = BayesianScorer::new();
1787 let lower = scorer.score("set", "Settings");
1788 let upper = scorer.score("SET", "Settings");
1789 assert!(
1790 (lower.score - upper.score).abs() < f64::EPSILON,
1791 "Case should not affect score"
1792 );
1793 }
1794
1795 #[test]
1798 fn match_positions_correct() {
1799 let scorer = BayesianScorer::new();
1800 let result = scorer.score("gd", "Go Dashboard");
1801 assert_eq!(result.match_positions, vec![0, 3]);
1802 }
1803
1804 #[test]
1805 fn fuzzy_match_positions() {
1806 let scorer = BayesianScorer::new();
1807 let result = scorer.score("stg", "Settings");
1808 assert_eq!(result.match_positions.len(), 3);
1810 assert_eq!(result.match_positions[0], 0); }
1812
1813 #[test]
1816 fn empty_query_returns_all() {
1817 let scorer = BayesianScorer::new();
1818 let result = scorer.score("", "Any Command");
1819 assert!(result.score > 0.0, "Empty query should have positive score");
1820 assert!(result.score < 1.0, "Empty query should not be max score");
1821 }
1822
1823 #[test]
1824 fn empty_title_is_safe() {
1825 let scorer = BayesianScorer::new();
1826 let result = scorer.score("x", "");
1827 assert_eq!(result.match_type, MatchType::NoMatch);
1828 assert!(result.score.is_finite(), "Score should remain finite");
1829 }
1830
1831 #[test]
1832 fn empty_query_empty_title_is_finite() {
1833 let scorer = BayesianScorer::new();
1834 let result = scorer.score("", "");
1835 assert!(result.score.is_finite(), "Score should remain finite");
1836 assert_eq!(result.match_type, MatchType::Fuzzy);
1837 }
1838
1839 #[test]
1842 fn query_longer_than_title() {
1843 let scorer = BayesianScorer::new();
1844 let result = scorer.score("verylongquery", "short");
1845 assert_eq!(result.match_type, MatchType::NoMatch);
1846 assert_eq!(result.score, 0.0);
1847 }
1848
1849 #[test]
1850 fn long_query_exact_match_is_handled() {
1851 let scorer = BayesianScorer::new();
1852 let query = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
1853 let result = scorer.score(query, query);
1854 assert_eq!(result.match_type, MatchType::Exact);
1855 assert!(result.score.is_finite());
1856 assert!(result.score > 0.9, "Exact long query should score high");
1857 }
1858
1859 #[test]
1860 fn gap_penalty_prefers_tight_fuzzy_matches() {
1861 let scorer = BayesianScorer::new();
1862 let tight = scorer.score("ace", "abcde");
1863 let gappy = scorer.score("ace", "a...c...e");
1864 assert_eq!(tight.match_type, MatchType::Fuzzy);
1865 assert_eq!(gappy.match_type, MatchType::Fuzzy);
1866 assert!(
1867 tight.score > gappy.score,
1868 "Tight fuzzy match should score higher than gappy: {} vs {}",
1869 tight.score,
1870 gappy.score
1871 );
1872 }
1873
1874 #[test]
1877 fn tag_match_boosts_score() {
1878 let scorer = BayesianScorer::new();
1879 let without_tag = scorer.score("set", "Settings");
1881 let with_tag = scorer.score_with_tags("set", "Settings", &["config", "setup"]);
1882 assert!(
1884 with_tag.score > without_tag.score,
1885 "Tag match should boost score: {} > {}",
1886 with_tag.score,
1887 without_tag.score
1888 );
1889 }
1890
1891 #[test]
1894 fn evidence_ledger_tracks_factors() {
1895 let scorer = BayesianScorer::new();
1896 let result = scorer.score("set", "Settings");
1897
1898 assert!(!result.evidence.entries().is_empty());
1899
1900 assert!(
1902 result
1903 .evidence
1904 .entries()
1905 .iter()
1906 .any(|e| e.kind == EvidenceKind::MatchType)
1907 );
1908 }
1909
1910 #[test]
1911 fn evidence_ledger_display() {
1912 let scorer = BayesianScorer::new();
1913 let result = scorer.score("gd", "Go Dashboard");
1914 let display = format!("{}", result.evidence);
1915 assert!(display.contains("Evidence Ledger"));
1916 assert!(display.contains("Posterior P:"));
1917 }
1918
1919 #[test]
1922 fn property_ordering_total() {
1923 let scorer = BayesianScorer::new();
1924 let titles = ["Settings", "Set Theme", "Reset View", "Asset"];
1925
1926 let mut scores: Vec<(f64, &str)> = titles
1927 .iter()
1928 .map(|t| (scorer.score("set", t).score, *t))
1929 .collect();
1930
1931 scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
1933
1934 for (score, _) in &scores {
1936 assert!(score.is_finite());
1937 }
1938 }
1939
1940 #[test]
1941 fn property_prefix_monotonic() {
1942 let scorer = BayesianScorer::new();
1943 let one_char = scorer.score("s", "Settings");
1945 let three_char = scorer.score("set", "Settings");
1946 assert!(
1948 three_char.score >= one_char.score,
1949 "Longer prefix should score >= shorter"
1950 );
1951 }
1952
1953 #[test]
1956 fn match_type_prior_ordering() {
1957 assert!(MatchType::Exact.prior_odds() > MatchType::Prefix.prior_odds());
1958 assert!(MatchType::Prefix.prior_odds() > MatchType::WordStart.prior_odds());
1959 assert!(MatchType::WordStart.prior_odds() > MatchType::Substring.prior_odds());
1960 assert!(MatchType::Substring.prior_odds() > MatchType::Fuzzy.prior_odds());
1961 assert!(MatchType::Fuzzy.prior_odds() > MatchType::NoMatch.prior_odds());
1962 }
1963
1964 #[test]
1969 fn conformal_tie_breaks_by_match_type() {
1970 let mut exact = MatchResult::no_match();
1971 exact.score = 0.5;
1972 exact.match_type = MatchType::Exact;
1973
1974 let mut prefix = MatchResult::no_match();
1975 prefix.score = 0.5;
1976 prefix.match_type = MatchType::Prefix;
1977
1978 let mut word_start = MatchResult::no_match();
1979 word_start.score = 0.5;
1980 word_start.match_type = MatchType::WordStart;
1981
1982 let mut substring = MatchResult::no_match();
1983 substring.score = 0.5;
1984 substring.match_type = MatchType::Substring;
1985
1986 let ranker = ConformalRanker::new();
1987 let ranked = ranker.rank(vec![substring, word_start, prefix, exact]);
1988 let order: Vec<MatchType> = ranked
1989 .items
1990 .iter()
1991 .map(|item| item.result.match_type)
1992 .collect();
1993
1994 assert_eq!(
1995 order,
1996 vec![
1997 MatchType::Exact,
1998 MatchType::Prefix,
1999 MatchType::WordStart,
2000 MatchType::Substring
2001 ]
2002 );
2003 }
2004
2005 #[test]
2006 fn conformal_empty_input() {
2007 let ranker = ConformalRanker::new();
2008 let ranked = ranker.rank(Vec::new());
2009 assert_eq!(ranked.items.len(), 0);
2010 assert_eq!(ranked.summary.count, 0);
2011 assert_eq!(ranked.summary.stable_count, 0);
2012 assert_eq!(ranked.summary.tie_group_count, 0);
2013 assert_eq!(ranked.summary.median_gap, 0.0);
2014 }
2015
2016 #[test]
2017 fn conformal_single_item() {
2018 let scorer = BayesianScorer::new();
2019 let results = vec![scorer.score("set", "Settings")];
2020 let ranker = ConformalRanker::new();
2021 let ranked = ranker.rank(results);
2022
2023 assert_eq!(ranked.items.len(), 1);
2024 assert_eq!(ranked.items[0].rank_confidence.confidence, 1.0);
2025 assert_eq!(ranked.summary.count, 1);
2026 }
2027
2028 #[test]
2029 fn conformal_sorted_descending() {
2030 let scorer = BayesianScorer::new();
2031 let results = vec![
2032 scorer.score("set", "Settings"),
2033 scorer.score("set", "Asset Manager"),
2034 scorer.score("set", "Reset Configuration Panel"),
2035 ];
2036 let ranker = ConformalRanker::new();
2037 let ranked = ranker.rank(results);
2038
2039 for w in ranked.items.windows(2) {
2041 let a = w[0].result.score;
2042 let b = w[1].result.score;
2043 assert!(a >= b, "Items should be sorted descending: {a} >= {b}");
2044 }
2045 }
2046
2047 #[test]
2048 fn conformal_confidence_bounded() {
2049 let scorer = BayesianScorer::new();
2050 let titles = [
2051 "Settings",
2052 "Set Theme",
2053 "Asset Manager",
2054 "Reset View",
2055 "Offset Tool",
2056 "Test Suite",
2057 ];
2058 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2059 let ranker = ConformalRanker::new();
2060 let ranked = ranker.rank(results);
2061
2062 for item in &ranked.items {
2063 assert!(
2064 item.rank_confidence.confidence >= 0.0 && item.rank_confidence.confidence <= 1.0,
2065 "Confidence must be in [0, 1], got {}",
2066 item.rank_confidence.confidence
2067 );
2068 assert!(
2069 item.rank_confidence.gap_to_next >= 0.0,
2070 "Gap must be non-negative"
2071 );
2072 }
2073 }
2074
2075 #[test]
2076 fn conformal_deterministic() {
2077 let scorer = BayesianScorer::new();
2078 let titles = ["Settings", "Set Theme", "Asset", "Reset"];
2079
2080 let results1: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2081 let results2: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2082
2083 let ranker = ConformalRanker::new();
2084 let ranked1 = ranker.rank(results1);
2085 let ranked2 = ranker.rank(results2);
2086
2087 for (a, b) in ranked1.items.iter().zip(ranked2.items.iter()) {
2088 assert!(
2089 (a.rank_confidence.confidence - b.rank_confidence.confidence).abs() < f64::EPSILON,
2090 "Confidence should be deterministic"
2091 );
2092 assert_eq!(
2093 a.original_index, b.original_index,
2094 "Rank order should be deterministic"
2095 );
2096 }
2097 }
2098
2099 #[test]
2100 fn conformal_ties_detected() {
2101 let mut r1 = MatchResult::no_match();
2103 r1.score = 0.8;
2104 r1.match_type = MatchType::Prefix;
2105 let mut r2 = MatchResult::no_match();
2106 r2.score = 0.8;
2107 r2.match_type = MatchType::Prefix;
2108 let mut r3 = MatchResult::no_match();
2109 r3.score = 0.5;
2110 r3.match_type = MatchType::Substring;
2111
2112 let ranker = ConformalRanker::new();
2113 let ranked = ranker.rank(vec![r1, r2, r3]);
2114
2115 assert_eq!(
2117 ranked.items[0].rank_confidence.stability,
2118 RankStability::Unstable,
2119 "Tied items should be Unstable"
2120 );
2121 assert!(
2122 ranked.summary.tie_group_count >= 1,
2123 "Should detect at least one tie group"
2124 );
2125 }
2126
2127 #[test]
2128 fn conformal_all_identical_scores() {
2129 let mut results = Vec::new();
2130 for _ in 0..5 {
2131 let mut r = MatchResult::no_match();
2132 r.score = 0.5;
2133 r.match_type = MatchType::Fuzzy;
2134 results.push(r);
2135 }
2136
2137 let ranker = ConformalRanker::new();
2138 let ranked = ranker.rank(results);
2139
2140 for item in &ranked.items {
2142 assert_eq!(item.rank_confidence.stability, RankStability::Unstable);
2143 }
2144 }
2145
2146 #[test]
2147 fn conformal_well_separated_scores_are_stable() {
2148 let mut results = Vec::new();
2149 for &s in &[0.9, 0.6, 0.3, 0.1] {
2151 let mut r = MatchResult::no_match();
2152 r.score = s;
2153 r.match_type = MatchType::Prefix;
2154 results.push(r);
2155 }
2156
2157 let ranker = ConformalRanker::new();
2158 let ranked = ranker.rank(results);
2159
2160 assert!(
2162 ranked.summary.stable_count >= 2,
2163 "Well-separated scores should yield stable positions, got {}",
2164 ranked.summary.stable_count
2165 );
2166 }
2167
2168 #[test]
2169 fn conformal_top_k_truncates() {
2170 let scorer = BayesianScorer::new();
2171 let titles = [
2172 "Settings",
2173 "Set Theme",
2174 "Asset Manager",
2175 "Reset View",
2176 "Offset Tool",
2177 ];
2178 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2179
2180 let ranker = ConformalRanker::new();
2181 let ranked = ranker.rank_top_k(results, 2);
2182
2183 assert_eq!(ranked.items.len(), 2);
2184 assert!(ranked.items[0].rank_confidence.confidence > 0.0);
2186 }
2187
2188 #[test]
2189 fn conformal_original_indices_preserved() {
2190 let scorer = BayesianScorer::new();
2191 let titles = ["Zebra Tool", "Settings", "Apple"];
2192 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2193
2194 let ranker = ConformalRanker::new();
2195 let ranked = ranker.rank(results);
2196
2197 assert_eq!(
2199 ranked.items[0].original_index, 1,
2200 "Settings should be first; original_index should be 1"
2201 );
2202 }
2203
2204 #[test]
2205 fn conformal_summary_display() {
2206 let scorer = BayesianScorer::new();
2207 let results = vec![
2208 scorer.score("set", "Settings"),
2209 scorer.score("set", "Set Theme"),
2210 ];
2211 let ranker = ConformalRanker::new();
2212 let ranked = ranker.rank(results);
2213
2214 let display = format!("{}", ranked.summary);
2215 assert!(display.contains("2 items"));
2216 }
2217
2218 #[test]
2219 fn conformal_rank_confidence_display() {
2220 let rc = RankConfidence {
2221 confidence: 0.85,
2222 gap_to_next: 0.1234,
2223 stability: RankStability::Stable,
2224 };
2225 let display = format!("{}", rc);
2226 assert!(display.contains("0.850"));
2227 assert!(display.contains("stable"));
2228 }
2229
2230 #[test]
2231 fn conformal_stability_labels() {
2232 assert_eq!(RankStability::Stable.label(), "stable");
2233 assert_eq!(RankStability::Marginal.label(), "marginal");
2234 assert_eq!(RankStability::Unstable.label(), "unstable");
2235 }
2236
2237 #[test]
2240 fn conformal_last_item_gap_zero() {
2241 let scorer = BayesianScorer::new();
2242 let results = vec![
2243 scorer.score("set", "Settings"),
2244 scorer.score("set", "Asset"),
2245 scorer.score("set", "Reset"),
2246 ];
2247 let ranker = ConformalRanker::new();
2248 let ranked = ranker.rank(results);
2249
2250 let last = ranked.items.last().unwrap();
2251 assert_eq!(
2252 last.rank_confidence.gap_to_next, 0.0,
2253 "Last item gap should be 0"
2254 );
2255 }
2256
2257 #[test]
2260 fn conformal_median_gap_non_negative() {
2261 let scorer = BayesianScorer::new();
2262 let titles = [
2263 "Settings",
2264 "Set Theme",
2265 "Asset Manager",
2266 "Reset View",
2267 "Offset Tool",
2268 "Test Suite",
2269 "System Settings",
2270 "Reset Defaults",
2271 ];
2272 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2273 let ranker = ConformalRanker::new();
2274 let ranked = ranker.rank(results);
2275
2276 assert!(
2277 ranked.summary.median_gap >= 0.0,
2278 "Median gap must be non-negative"
2279 );
2280 }
2281
2282 fn test_corpus() -> Vec<&'static str> {
2287 vec![
2288 "Open File",
2289 "Save File",
2290 "Close Tab",
2291 "Git: Commit",
2292 "Git: Push",
2293 "Git: Pull",
2294 "Go to Line",
2295 "Find in Files",
2296 "Toggle Terminal",
2297 "Format Document",
2298 ]
2299 }
2300
2301 #[test]
2302 fn incremental_full_scan_on_first_call() {
2303 let mut scorer = IncrementalScorer::new();
2304 let corpus = test_corpus();
2305 let results = scorer.score_corpus("git", &corpus, None);
2306
2307 assert!(!results.is_empty(), "Should find matches for 'git'");
2308 assert_eq!(scorer.stats().full_scans, 1);
2309 assert_eq!(scorer.stats().incremental_scans, 0);
2310 }
2311
2312 #[test]
2313 fn incremental_prunes_on_query_extension() {
2314 let mut scorer = IncrementalScorer::new();
2315 let corpus = test_corpus();
2316
2317 let r1 = scorer.score_corpus("g", &corpus, None);
2319 assert_eq!(scorer.stats().full_scans, 1);
2320
2321 let r2 = scorer.score_corpus("gi", &corpus, None);
2323 assert_eq!(scorer.stats().incremental_scans, 1);
2324 assert!(r2.len() <= r1.len(), "Extended query should match <= items");
2325
2326 let r3 = scorer.score_corpus("git", &corpus, None);
2328 assert_eq!(scorer.stats().incremental_scans, 2);
2329 assert!(r3.len() <= r2.len());
2330 }
2331
2332 #[test]
2333 fn incremental_correctness_matches_full_scan() {
2334 let corpus = test_corpus();
2335
2336 let mut inc = IncrementalScorer::new();
2338 inc.score_corpus("g", &corpus, None);
2339 let inc_results = inc.score_corpus("git", &corpus, None);
2340
2341 let mut full = IncrementalScorer::new();
2343 let full_results = full.score_corpus("git", &corpus, None);
2344
2345 assert_eq!(
2347 inc_results.len(),
2348 full_results.len(),
2349 "Incremental and full scan should return same count"
2350 );
2351
2352 for (a, b) in inc_results.iter().zip(full_results.iter()) {
2353 assert_eq!(a.0, b.0, "Same corpus indices");
2354 assert!(
2355 (a.1.score - b.1.score).abs() < f64::EPSILON,
2356 "Same scores: {} vs {}",
2357 a.1.score,
2358 b.1.score
2359 );
2360 }
2361 }
2362
2363 #[test]
2364 fn incremental_falls_back_on_non_extension() {
2365 let mut scorer = IncrementalScorer::new();
2366 let corpus = test_corpus();
2367
2368 scorer.score_corpus("git", &corpus, None);
2369 assert_eq!(scorer.stats().full_scans, 1);
2370
2371 scorer.score_corpus("fi", &corpus, None);
2373 assert_eq!(scorer.stats().full_scans, 2);
2374 }
2375
2376 #[test]
2377 fn incremental_invalidate_forces_full_scan() {
2378 let mut scorer = IncrementalScorer::new();
2379 let corpus = test_corpus();
2380
2381 scorer.score_corpus("g", &corpus, None);
2382 scorer.invalidate();
2383
2384 scorer.score_corpus("gi", &corpus, None);
2386 assert_eq!(scorer.stats().full_scans, 2);
2387 assert_eq!(scorer.stats().incremental_scans, 0);
2388 }
2389
2390 #[test]
2391 fn incremental_generation_change_invalidates() {
2392 let mut scorer = IncrementalScorer::new();
2393 let corpus = test_corpus();
2394
2395 scorer.score_corpus("g", &corpus, Some(1));
2396
2397 scorer.score_corpus("gi", &corpus, Some(2));
2399 assert_eq!(scorer.stats().full_scans, 2);
2400 }
2401
2402 #[test]
2403 fn incremental_corpus_size_change_invalidates() {
2404 let mut scorer = IncrementalScorer::new();
2405 let corpus1 = test_corpus();
2406 let corpus2 = &corpus1[..5];
2407
2408 scorer.score_corpus("g", &corpus1, None);
2409 scorer.score_corpus("gi", corpus2, None);
2410 assert_eq!(scorer.stats().full_scans, 2);
2412 }
2413
2414 #[test]
2415 fn incremental_skips_out_of_bounds_cache_entries() {
2416 let mut scorer = IncrementalScorer::new();
2417
2418 scorer.cache = vec![
2421 CachedEntry { corpus_index: 0 },
2422 CachedEntry { corpus_index: 999 },
2423 ];
2424
2425 let corpus = ["a"];
2426 let results = scorer.score_incremental("a", "a", &corpus);
2427 assert_eq!(results.len(), 1);
2428 assert_eq!(results[0].0, 0);
2429
2430 let corpus_lower = ["a"];
2431 let results = scorer.score_incremental_lowered("a", "a", &corpus, &corpus_lower);
2432 assert_eq!(results.len(), 1);
2433 assert_eq!(results[0].0, 0);
2434
2435 let corpus_s = vec!["a".to_string()];
2436 let corpus_lower_s = vec!["a".to_string()];
2437 let word_starts = vec![vec![0]];
2438 let results = scorer.score_incremental_lowered_with_words(
2439 "a",
2440 "a",
2441 &corpus_s,
2442 &corpus_lower_s,
2443 &word_starts,
2444 );
2445 assert_eq!(results.len(), 1);
2446 assert_eq!(results[0].0, 0);
2447 }
2448
2449 #[test]
2450 fn incremental_empty_query() {
2451 let mut scorer = IncrementalScorer::new();
2452 let corpus = test_corpus();
2453
2454 let results = scorer.score_corpus("", &corpus, None);
2455 assert_eq!(results.len(), corpus.len());
2457 }
2458
2459 #[test]
2460 fn incremental_no_match_query() {
2461 let mut scorer = IncrementalScorer::new();
2462 let corpus = test_corpus();
2463
2464 let results = scorer.score_corpus("zzz", &corpus, None);
2465 assert!(results.is_empty());
2466 }
2467
2468 #[test]
2469 fn incremental_stats_prune_ratio() {
2470 let mut scorer = IncrementalScorer::new();
2471 let corpus = test_corpus();
2472
2473 scorer.score_corpus("g", &corpus, None);
2474 scorer.score_corpus("gi", &corpus, None);
2475 scorer.score_corpus("git", &corpus, None);
2476
2477 let stats = scorer.stats();
2478 assert!(
2479 stats.prune_ratio() > 0.0,
2480 "Prune ratio should be > 0 after incremental scans"
2481 );
2482 assert!(stats.total_pruned > 0, "Should have pruned some items");
2483 }
2484
2485 #[test]
2486 fn incremental_results_sorted_descending() {
2487 let mut scorer = IncrementalScorer::new();
2488 let corpus = test_corpus();
2489
2490 let results = scorer.score_corpus("o", &corpus, None);
2491 for w in results.windows(2) {
2492 let a = w[0].1.score;
2493 let b = w[1].1.score;
2494 assert!(a >= b, "Results should be sorted descending: {a} >= {b}");
2495 }
2496 }
2497
2498 #[test]
2499 fn incremental_lowered_matches_full() {
2500 let corpus = vec![
2501 "Open File".to_string(),
2502 "Save File".to_string(),
2503 "Close".to_string(),
2504 "Launch 🚀".to_string(),
2505 ];
2506 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
2507 let lower: Vec<String> = corpus.iter().map(|s| s.to_lowercase()).collect();
2508 let word_starts: Vec<Vec<usize>> = lower
2509 .iter()
2510 .map(|title_lower| {
2511 let bytes = title_lower.as_bytes();
2512 title_lower
2513 .char_indices()
2514 .filter_map(|(i, _)| {
2515 let is_word_start = i == 0 || {
2516 let prev = bytes.get(i.saturating_sub(1)).copied().unwrap_or(b' ');
2517 prev == b' ' || prev == b'-' || prev == b'_'
2518 };
2519 is_word_start.then_some(i)
2520 })
2521 .collect()
2522 })
2523 .collect();
2524
2525 let mut full = IncrementalScorer::new();
2526 let mut lowered = IncrementalScorer::new();
2527
2528 let a = full.score_corpus("fi", &corpus_refs, None);
2529 let b =
2530 lowered.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
2531
2532 assert_eq!(a.len(), b.len());
2533 for ((ia, ra), (ib, rb)) in a.iter().zip(b.iter()) {
2534 assert_eq!(ia, ib);
2535 assert_eq!(ra.match_type, rb.match_type);
2536 assert_eq!(ra.match_positions, rb.match_positions);
2537 assert!(
2538 (ra.score - rb.score).abs() < 1e-12,
2539 "score mismatch: {} vs {}",
2540 ra.score,
2541 rb.score
2542 );
2543 }
2544 }
2545
2546 #[test]
2547 fn incremental_deterministic() {
2548 let corpus = test_corpus();
2549
2550 let mut s1 = IncrementalScorer::new();
2551 let r1 = s1.score_corpus("git", &corpus, None);
2552
2553 let mut s2 = IncrementalScorer::new();
2554 let r2 = s2.score_corpus("git", &corpus, None);
2555
2556 assert_eq!(r1.len(), r2.len());
2557 for (a, b) in r1.iter().zip(r2.iter()) {
2558 assert_eq!(a.0, b.0);
2559 assert!((a.1.score - b.1.score).abs() < f64::EPSILON);
2560 }
2561 }
2562
2563 #[test]
2568 fn match_type_descriptions() {
2569 assert_eq!(MatchType::Exact.description(), "exact match");
2570 assert_eq!(MatchType::Prefix.description(), "prefix match");
2571 assert_eq!(MatchType::WordStart.description(), "word-start match");
2572 assert_eq!(MatchType::Substring.description(), "contiguous substring");
2573 assert_eq!(MatchType::Fuzzy.description(), "fuzzy match");
2574 assert_eq!(MatchType::NoMatch.description(), "no match");
2575 }
2576
2577 #[test]
2578 fn match_type_no_match_prior_is_zero() {
2579 assert_eq!(MatchType::NoMatch.prior_odds(), 0.0);
2580 }
2581
2582 #[test]
2587 fn evidence_description_static_display() {
2588 let d = EvidenceDescription::Static("test message");
2589 assert_eq!(format!("{d}"), "test message");
2590 }
2591
2592 #[test]
2593 fn evidence_description_title_length_display() {
2594 let d = EvidenceDescription::TitleLengthChars { len: 42 };
2595 assert_eq!(format!("{d}"), "title length 42 chars");
2596 }
2597
2598 #[test]
2599 fn evidence_description_first_match_pos_display() {
2600 let d = EvidenceDescription::FirstMatchPos { pos: 5 };
2601 assert_eq!(format!("{d}"), "first match at position 5");
2602 }
2603
2604 #[test]
2605 fn evidence_description_word_boundary_count_display() {
2606 let d = EvidenceDescription::WordBoundaryCount { count: 3 };
2607 assert_eq!(format!("{d}"), "3 word boundary matches");
2608 }
2609
2610 #[test]
2611 fn evidence_description_gap_total_display() {
2612 let d = EvidenceDescription::GapTotal { total: 7 };
2613 assert_eq!(format!("{d}"), "total gap of 7 characters");
2614 }
2615
2616 #[test]
2617 fn evidence_description_coverage_percent_display() {
2618 let d = EvidenceDescription::CoveragePercent { percent: 83.5 };
2619 let s = format!("{d}");
2620 assert!(s.contains("84%"), "Should round: {s}");
2621 }
2622
2623 #[test]
2628 fn evidence_entry_display_supports() {
2629 let entry = EvidenceEntry {
2630 kind: EvidenceKind::Position,
2631 bayes_factor: 1.5,
2632 description: EvidenceDescription::FirstMatchPos { pos: 0 },
2633 };
2634 let s = format!("{entry}");
2635 assert!(s.contains("supports"));
2636 assert!(s.contains("Position"));
2637 }
2638
2639 #[test]
2640 fn evidence_entry_display_opposes() {
2641 let entry = EvidenceEntry {
2642 kind: EvidenceKind::GapPenalty,
2643 bayes_factor: 0.5,
2644 description: EvidenceDescription::GapTotal { total: 10 },
2645 };
2646 let s = format!("{entry}");
2647 assert!(s.contains("opposes"));
2648 }
2649
2650 #[test]
2651 fn evidence_entry_display_neutral() {
2652 let entry = EvidenceEntry {
2653 kind: EvidenceKind::TitleLength,
2654 bayes_factor: 1.0,
2655 description: EvidenceDescription::Static("neutral factor"),
2656 };
2657 let s = format!("{entry}");
2658 assert!(s.contains("neutral"));
2659 }
2660
2661 #[test]
2666 fn ledger_combined_bayes_factor() {
2667 let mut ledger = EvidenceLedger::new();
2668 ledger.add(
2669 EvidenceKind::Position,
2670 2.0,
2671 EvidenceDescription::Static("a"),
2672 );
2673 ledger.add(
2674 EvidenceKind::WordBoundary,
2675 3.0,
2676 EvidenceDescription::Static("b"),
2677 );
2678 assert!((ledger.combined_bayes_factor() - 6.0).abs() < f64::EPSILON);
2679 }
2680
2681 #[test]
2682 fn ledger_combined_bayes_factor_empty() {
2683 let ledger = EvidenceLedger::new();
2684 assert!((ledger.combined_bayes_factor() - 1.0).abs() < f64::EPSILON);
2685 }
2686
2687 #[test]
2688 fn ledger_prior_odds_present() {
2689 let mut ledger = EvidenceLedger::new();
2690 ledger.add(
2691 EvidenceKind::MatchType,
2692 9.0,
2693 EvidenceDescription::Static("prefix"),
2694 );
2695 assert_eq!(ledger.prior_odds(), Some(9.0));
2696 }
2697
2698 #[test]
2699 fn ledger_prior_odds_absent() {
2700 let ledger = EvidenceLedger::new();
2701 assert_eq!(ledger.prior_odds(), None);
2702 }
2703
2704 #[test]
2705 fn ledger_posterior_probability_no_prior() {
2706 let mut ledger = EvidenceLedger::new();
2707 ledger.add(
2708 EvidenceKind::Position,
2709 2.0,
2710 EvidenceDescription::Static("pos"),
2711 );
2712 let prob = ledger.posterior_probability();
2715 assert!((prob - 2.0 / 3.0).abs() < 1e-9);
2716 }
2717
2718 #[test]
2719 fn ledger_posterior_probability_infinite_odds() {
2720 let mut ledger = EvidenceLedger::new();
2721 ledger.add(
2722 EvidenceKind::MatchType,
2723 f64::INFINITY,
2724 EvidenceDescription::Static("inf"),
2725 );
2726 assert_eq!(ledger.posterior_probability(), 1.0);
2727 }
2728
2729 #[test]
2734 fn fast_scorer_no_evidence() {
2735 let scorer = BayesianScorer::fast();
2736 let result = scorer.score("set", "Settings");
2737 assert!(result.score > 0.0);
2738 assert!(result.evidence.entries().is_empty());
2739 }
2740
2741 #[test]
2742 fn fast_scorer_score_matches_probability_range() {
2743 let scorer = BayesianScorer::fast();
2744 let result = scorer.score("set", "Settings");
2745 assert!(result.score >= 0.0 && result.score <= 1.0);
2746 }
2747
2748 #[test]
2749 fn fast_scorer_empty_query() {
2750 let scorer = BayesianScorer::fast();
2751 let result = scorer.score("", "Test");
2752 assert!(result.score > 0.0);
2753 assert!(result.evidence.entries().is_empty());
2754 }
2755
2756 #[test]
2757 fn fast_scorer_fuzzy_with_gap_penalty() {
2758 let scorer = BayesianScorer::fast();
2759 let result = scorer.score("ace", "a...b...c...d...e");
2760 assert_eq!(result.match_type, MatchType::Fuzzy);
2761 assert!(result.score > 0.0);
2762 }
2763
2764 #[test]
2765 fn fast_scorer_tag_boost() {
2766 let scorer = BayesianScorer::fast();
2767 let without = scorer.score("set", "Settings");
2768 let with = scorer.score_with_tags("set", "Settings", &["setup"]);
2769 assert!(with.score > without.score);
2770 }
2771
2772 #[test]
2773 fn fast_scorer_tag_no_boost_at_score_one() {
2774 let scorer = BayesianScorer::fast();
2775 let result = scorer.score_with_tags("Settings", "Settings", &["settings"]);
2777 assert!(result.score <= 1.0);
2778 }
2779
2780 #[test]
2785 fn unicode_exact_match() {
2786 let scorer = BayesianScorer::new();
2787 let result = scorer.score("日本語", "日本語");
2788 assert_eq!(result.match_type, MatchType::Exact);
2789 assert!(result.score > 0.9);
2790 }
2791
2792 #[test]
2793 fn unicode_prefix_match() {
2794 let scorer = BayesianScorer::new();
2795 let result = scorer.score("日本", "日本語テスト");
2796 assert_eq!(result.match_type, MatchType::Prefix);
2797 }
2798
2799 #[test]
2800 fn unicode_substring_match() {
2801 let scorer = BayesianScorer::new();
2802 let result = scorer.score("本語", "日本語テスト");
2803 assert_eq!(result.match_type, MatchType::Substring);
2804 }
2805
2806 #[test]
2807 fn unicode_fuzzy_match() {
2808 let scorer = BayesianScorer::new();
2809 let result = scorer.score("日テ", "日本語テスト");
2810 assert_eq!(result.match_type, MatchType::Fuzzy);
2811 }
2812
2813 #[test]
2814 fn unicode_no_match() {
2815 let scorer = BayesianScorer::new();
2816 let result = scorer.score("ωψξ", "αβγ");
2817 assert_eq!(result.match_type, MatchType::NoMatch);
2818 }
2819
2820 #[test]
2821 fn unicode_word_start_match() {
2822 let scorer = BayesianScorer::new();
2823 let result = scorer.score("gd", "go dashboard");
2825 assert_eq!(result.match_type, MatchType::WordStart);
2826 }
2827
2828 #[test]
2833 fn score_with_query_lower_matches_score() {
2834 let scorer = BayesianScorer::new();
2835 let direct = scorer.score("Set", "Settings");
2836 let pre = scorer.score_with_query_lower("Set", "set", "Settings");
2837 assert!((direct.score - pre.score).abs() < f64::EPSILON);
2838 assert_eq!(direct.match_type, pre.match_type);
2839 }
2840
2841 #[test]
2842 fn score_with_lowered_title_matches_score() {
2843 let scorer = BayesianScorer::new();
2844 let direct = scorer.score("set", "Settings");
2845 let pre = scorer.score_with_lowered_title("set", "set", "Settings", "settings");
2846 assert!((direct.score - pre.score).abs() < f64::EPSILON);
2847 }
2848
2849 #[test]
2850 fn score_with_lowered_title_and_words_matches() {
2851 let scorer = BayesianScorer::new();
2852 let direct = scorer.score("gd", "Go Dashboard");
2853 let pre = scorer.score_with_lowered_title_and_words(
2854 "gd",
2855 "gd",
2856 "Go Dashboard",
2857 "go dashboard",
2858 Some(&[0, 3]),
2859 );
2860 assert_eq!(direct.match_type, pre.match_type);
2861 assert!((direct.score - pre.score).abs() < f64::EPSILON);
2862 }
2863
2864 #[test]
2869 fn tag_match_no_title_match_returns_nomatch() {
2870 let scorer = BayesianScorer::new();
2871 let result = scorer.score_with_tags("xyz", "Settings", &["xyz"]);
2872 assert_eq!(result.match_type, MatchType::NoMatch);
2874 assert_eq!(result.score, 0.0);
2875 }
2876
2877 #[test]
2878 fn tag_match_no_matching_tag() {
2879 let scorer = BayesianScorer::new();
2880 let without = scorer.score("set", "Settings");
2881 let with = scorer.score_with_tags("set", "Settings", &["foo", "bar"]);
2882 assert!((without.score - with.score).abs() < f64::EPSILON);
2884 }
2885
2886 #[test]
2887 fn tag_match_case_insensitive() {
2888 let scorer = BayesianScorer::new();
2889 let result = scorer.score_with_tags("set", "Settings", &["SETUP"]);
2890 let without = scorer.score("set", "Settings");
2891 assert!(result.score > without.score);
2892 }
2893
2894 #[test]
2899 fn no_match_result_has_evidence() {
2900 let r = MatchResult::no_match();
2901 assert_eq!(r.score, 0.0);
2902 assert_eq!(r.match_type, MatchType::NoMatch);
2903 assert!(r.match_positions.is_empty());
2904 assert_eq!(r.evidence.entries().len(), 1);
2905 assert_eq!(r.evidence.entries()[0].kind, EvidenceKind::MatchType);
2906 assert_eq!(r.evidence.entries()[0].bayes_factor, 0.0);
2907 }
2908
2909 #[test]
2914 fn count_word_boundaries_at_start() {
2915 let scorer = BayesianScorer::new();
2916 let count = scorer.count_word_boundaries(&[0], "hello");
2918 assert_eq!(count, 1);
2919 }
2920
2921 #[test]
2922 fn count_word_boundaries_after_separators() {
2923 let scorer = BayesianScorer::new();
2924 let count = scorer.count_word_boundaries(&[0, 2, 4, 6], "a-b_c d");
2926 assert_eq!(count, 4);
2927 }
2928
2929 #[test]
2930 fn count_word_boundaries_mid_word() {
2931 let scorer = BayesianScorer::new();
2932 let count = scorer.count_word_boundaries(&[2], "hello");
2934 assert_eq!(count, 0);
2935 }
2936
2937 #[test]
2938 fn total_gap_empty() {
2939 let scorer = BayesianScorer::new();
2940 assert_eq!(scorer.total_gap(&[]), 0);
2941 }
2942
2943 #[test]
2944 fn total_gap_single() {
2945 let scorer = BayesianScorer::new();
2946 assert_eq!(scorer.total_gap(&[5]), 0);
2947 }
2948
2949 #[test]
2950 fn total_gap_contiguous() {
2951 let scorer = BayesianScorer::new();
2952 assert_eq!(scorer.total_gap(&[0, 1, 2]), 0);
2954 }
2955
2956 #[test]
2957 fn total_gap_with_gaps() {
2958 let scorer = BayesianScorer::new();
2959 assert_eq!(scorer.total_gap(&[0, 3, 7]), 5);
2961 }
2962
2963 #[test]
2968 fn conformal_custom_thresholds() {
2969 let ranker = ConformalRanker {
2970 tie_epsilon: 0.1,
2971 stable_threshold: 0.9,
2972 marginal_threshold: 0.5,
2973 };
2974 let mut r1 = MatchResult::no_match();
2976 r1.score = 0.5;
2977 r1.match_type = MatchType::Prefix;
2978 let mut r2 = MatchResult::no_match();
2979 r2.score = 0.45;
2980 r2.match_type = MatchType::Prefix;
2981
2982 let ranked = ranker.rank(vec![r1, r2]);
2983 assert_eq!(
2984 ranked.items[0].rank_confidence.stability,
2985 RankStability::Unstable,
2986 "Gap 0.05 < epsilon 0.1 → tie"
2987 );
2988 }
2989
2990 #[test]
2991 fn conformal_rank_top_k_larger_than_count() {
2992 let scorer = BayesianScorer::new();
2993 let results = vec![scorer.score("set", "Settings")];
2994 let ranker = ConformalRanker::new();
2995 let ranked = ranker.rank_top_k(results, 100);
2996 assert_eq!(ranked.items.len(), 1);
2997 }
2998
2999 #[test]
3000 fn conformal_rank_top_k_zero() {
3001 let scorer = BayesianScorer::new();
3002 let results = vec![
3003 scorer.score("set", "Settings"),
3004 scorer.score("set", "Asset"),
3005 ];
3006 let ranker = ConformalRanker::new();
3007 let ranked = ranker.rank_top_k(results, 0);
3008 assert!(ranked.items.is_empty());
3009 assert_eq!(ranked.summary.stable_count, 0);
3010 }
3011
3012 #[test]
3013 fn conformal_rank_confidence_display_marginal() {
3014 let rc = RankConfidence {
3015 confidence: 0.5,
3016 gap_to_next: 0.05,
3017 stability: RankStability::Marginal,
3018 };
3019 let s = format!("{rc}");
3020 assert!(s.contains("marginal"));
3021 }
3022
3023 #[test]
3024 fn conformal_rank_confidence_display_unstable() {
3025 let rc = RankConfidence {
3026 confidence: 0.1,
3027 gap_to_next: 0.001,
3028 stability: RankStability::Unstable,
3029 };
3030 let s = format!("{rc}");
3031 assert!(s.contains("unstable"));
3032 }
3033
3034 #[test]
3035 fn conformal_median_gap_even_count() {
3036 let mut results = Vec::new();
3038 for &s in &[0.9, 0.7, 0.5, 0.3, 0.1] {
3039 let mut r = MatchResult::no_match();
3040 r.score = s;
3041 r.match_type = MatchType::Prefix;
3042 results.push(r);
3043 }
3044 let ranker = ConformalRanker::new();
3045 let ranked = ranker.rank(results);
3046 assert!(
3048 (ranked.summary.median_gap - 0.2).abs() < 0.01,
3049 "median_gap={}, expected ~0.2",
3050 ranked.summary.median_gap
3051 );
3052 }
3053
3054 #[test]
3059 fn incremental_with_scorer_uses_provided_scorer() {
3060 let scorer = BayesianScorer::new(); let mut inc = IncrementalScorer::with_scorer(scorer);
3062 let corpus = test_corpus();
3063 let results = inc.score_corpus("git", &corpus, None);
3064 assert!(!results.is_empty());
3065 assert!(!results[0].1.evidence.entries().is_empty());
3067 }
3068
3069 #[test]
3070 fn incremental_default_trait() {
3071 let inc = IncrementalScorer::default();
3072 assert_eq!(inc.stats().full_scans, 0);
3073 assert_eq!(inc.stats().incremental_scans, 0);
3074 }
3075
3076 #[test]
3077 fn incremental_stats_prune_ratio_zero_when_empty() {
3078 let stats = IncrementalStats::default();
3079 assert_eq!(stats.prune_ratio(), 0.0);
3080 }
3081
3082 #[test]
3083 fn incremental_score_corpus_with_lowered() {
3084 let corpus = vec!["Open File", "Save File", "Close Tab"];
3085 let lower = vec!["open file", "save file", "close tab"];
3086 let mut inc = IncrementalScorer::new();
3087 let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3088 assert!(!results.is_empty());
3089 for (idx, _) in &results {
3091 assert!(*idx < corpus.len());
3092 }
3093 }
3094
3095 #[test]
3096 fn incremental_score_corpus_with_lowered_incremental_path() {
3097 let corpus = vec!["Open File", "Save File", "Close Tab"];
3098 let lower = vec!["open file", "save file", "close tab"];
3099 let mut inc = IncrementalScorer::new();
3100 inc.score_corpus_with_lowered("f", &corpus, &lower, None);
3101 let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3102 assert_eq!(inc.stats().incremental_scans, 1);
3103 assert!(!results.is_empty());
3104 }
3105
3106 #[test]
3107 fn incremental_score_corpus_with_lowered_and_words() {
3108 let corpus = vec![
3109 "Open File".to_string(),
3110 "Save File".to_string(),
3111 "Close Tab".to_string(),
3112 ];
3113 let lower = vec![
3114 "open file".to_string(),
3115 "save file".to_string(),
3116 "close tab".to_string(),
3117 ];
3118 let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3119
3120 let mut inc = IncrementalScorer::new();
3121 let results =
3122 inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3123 assert!(!results.is_empty());
3124 }
3125
3126 #[test]
3127 fn incremental_score_corpus_with_lowered_and_words_incremental_path() {
3128 let corpus = vec![
3129 "Open File".to_string(),
3130 "Save File".to_string(),
3131 "Close Tab".to_string(),
3132 ];
3133 let lower = vec![
3134 "open file".to_string(),
3135 "save file".to_string(),
3136 "close tab".to_string(),
3137 ];
3138 let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3139
3140 let mut inc = IncrementalScorer::new();
3141 inc.score_corpus_with_lowered_and_words("f", &corpus, &lower, &word_starts, None);
3142 let results =
3143 inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3144 assert_eq!(inc.stats().incremental_scans, 1);
3145 assert!(!results.is_empty());
3146 }
3147
3148 #[test]
3153 fn bayesian_scorer_default_has_evidence_off() {
3154 let scorer = BayesianScorer::default();
3155 assert!(!scorer.track_evidence);
3156 }
3157
3158 #[test]
3163 fn word_start_match_with_hyphens() {
3164 let scorer = BayesianScorer::new();
3165 let result = scorer.score("fc", "file-commander");
3166 assert_eq!(result.match_type, MatchType::WordStart);
3167 assert_eq!(result.match_positions, vec![0, 5]);
3168 }
3169
3170 #[test]
3171 fn word_start_match_with_underscores() {
3172 let scorer = BayesianScorer::new();
3173 let result = scorer.score("fc", "file_commander");
3174 assert_eq!(result.match_type, MatchType::WordStart);
3175 assert_eq!(result.match_positions, vec![0, 5]);
3176 }
3177
3178 #[test]
3183 fn match_type_ord() {
3184 let mut types = vec![
3185 MatchType::Exact,
3186 MatchType::NoMatch,
3187 MatchType::Fuzzy,
3188 MatchType::Prefix,
3189 MatchType::Substring,
3190 MatchType::WordStart,
3191 ];
3192 types.sort();
3193 assert_eq!(
3194 types,
3195 vec![
3196 MatchType::NoMatch,
3197 MatchType::Fuzzy,
3198 MatchType::Substring,
3199 MatchType::WordStart,
3200 MatchType::Prefix,
3201 MatchType::Exact,
3202 ]
3203 );
3204 }
3205
3206 #[test]
3211 fn evidence_kind_equality() {
3212 assert_eq!(EvidenceKind::MatchType, EvidenceKind::MatchType);
3213 assert_ne!(EvidenceKind::Position, EvidenceKind::GapPenalty);
3214 }
3215
3216 #[test]
3221 fn earlier_substring_scores_higher() {
3222 let scorer = BayesianScorer::new();
3223 let early = scorer.score("set", "set in stone");
3224 let late = scorer.score("set", "the asset");
3225 assert!(
3227 early.score > late.score,
3228 "Earlier match should score higher: {} vs {}",
3229 early.score,
3230 late.score
3231 );
3232 }
3233}
3234
3235#[cfg(test)]
3240mod perf_tests {
3241 use super::*;
3242 use std::time::Instant;
3243
3244 #[derive(Debug, Clone, Copy)]
3245 struct PerfStats {
3246 p50_us: u64,
3247 p95_us: u64,
3248 p99_us: u64,
3249 mean_us: f64,
3250 variance_us: f64,
3251 samples: usize,
3252 }
3253
3254 const SINGLE_SCORE_BUDGET_US: u64 = 10; const CORPUS_100_BUDGET_US: u64 = 500; const CORPUS_1000_BUDGET_US: u64 = 5_000; const CORPUS_5000_BUDGET_US: u64 = 25_000; const INCREMENTAL_7KEY_100_BUDGET_US: u64 = 2_000; const INCREMENTAL_7KEY_1000_BUDGET_US: u64 = 15_000; const COVERAGE_BUDGET_MULTIPLIER: u64 = 5;
3264
3265 fn is_coverage_run() -> bool {
3266 std::env::var("LLVM_PROFILE_FILE").is_ok() || std::env::var("CARGO_LLVM_COV").is_ok()
3267 }
3268
3269 fn coverage_budget_us_with_mode(base: u64, coverage_run: bool) -> u64 {
3270 if coverage_run {
3274 base.saturating_mul(COVERAGE_BUDGET_MULTIPLIER)
3275 } else {
3276 base
3277 }
3278 }
3279
3280 fn coverage_budget_us(base: u64) -> u64 {
3281 coverage_budget_us_with_mode(base, is_coverage_run())
3282 }
3283
3284 #[test]
3285 fn coverage_budget_us_with_mode_respects_flag() {
3286 assert_eq!(coverage_budget_us_with_mode(123, false), 123);
3287 assert_eq!(
3288 coverage_budget_us_with_mode(123, true),
3289 123 * COVERAGE_BUDGET_MULTIPLIER
3290 );
3291 }
3292
3293 fn make_corpus(size: usize) -> Vec<String> {
3295 let base = [
3296 "Open File",
3297 "Save File",
3298 "Close Tab",
3299 "Split Editor Right",
3300 "Split Editor Down",
3301 "Toggle Terminal",
3302 "Go to Line",
3303 "Find in Files",
3304 "Replace in Files",
3305 "Git: Commit",
3306 "Git: Push",
3307 "Git: Pull",
3308 "Debug: Start",
3309 "Debug: Stop",
3310 "Debug: Step Over",
3311 "Format Document",
3312 "Rename Symbol",
3313 "Go to Definition",
3314 "Find All References",
3315 "Toggle Sidebar",
3316 ];
3317 base.iter()
3318 .cycle()
3319 .take(size)
3320 .enumerate()
3321 .map(|(i, cmd)| {
3322 if i < base.len() {
3323 (*cmd).to_string()
3324 } else {
3325 format!("{} ({})", cmd, i)
3326 }
3327 })
3328 .collect()
3329 }
3330
3331 fn measure_stats_us(iterations: usize, mut f: impl FnMut()) -> PerfStats {
3332 let mut times = Vec::with_capacity(iterations);
3333 for _ in 0..3 {
3335 f();
3336 }
3337 for _ in 0..iterations {
3338 let start = Instant::now();
3339 f();
3340 times.push(start.elapsed().as_micros() as u64);
3341 }
3342 times.sort_unstable();
3343 let len = times.len();
3344 let p50 = times[len / 2];
3345 let p95 = times[((len as f64 * 0.95) as usize).min(len.saturating_sub(1))];
3346 let p99 = times[((len as f64 * 0.99) as usize).min(len.saturating_sub(1))];
3347 let mean = times.iter().copied().map(|value| value as f64).sum::<f64>() / len as f64;
3348 let variance = times
3349 .iter()
3350 .map(|value| {
3351 let delta = *value as f64 - mean;
3352 delta * delta
3353 })
3354 .sum::<f64>()
3355 / len as f64;
3356
3357 PerfStats {
3358 p50_us: p50,
3359 p95_us: p95,
3360 p99_us: p99,
3361 mean_us: mean,
3362 variance_us: variance,
3363 samples: len,
3364 }
3365 }
3366
3367 #[test]
3368 fn perf_single_score_under_budget() {
3369 let scorer = BayesianScorer::fast();
3370 let stats = measure_stats_us(200, || {
3371 std::hint::black_box(scorer.score("git co", "Git: Commit"));
3372 });
3373 eprintln!(
3374 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_score_single\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3375 stats.samples,
3376 stats.p50_us,
3377 stats.p95_us,
3378 stats.p99_us,
3379 stats.mean_us,
3380 stats.variance_us
3381 );
3382 let budget = coverage_budget_us(SINGLE_SCORE_BUDGET_US);
3383 assert!(
3384 stats.p50_us <= budget,
3385 "Single score p50 = {}µs exceeds budget {}µs",
3386 stats.p50_us,
3387 budget,
3388 );
3389 }
3390
3391 #[test]
3392 fn perf_corpus_100_under_budget() {
3393 let scorer = BayesianScorer::fast();
3394 let corpus = make_corpus(100);
3395 let stats = measure_stats_us(50, || {
3396 let mut results: Vec<_> = corpus
3397 .iter()
3398 .map(|t| scorer.score("git co", t))
3399 .filter(|r| r.score > 0.0)
3400 .collect();
3401 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3402 std::hint::black_box(&results);
3403 });
3404 eprintln!(
3405 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3406 stats.samples,
3407 stats.p50_us,
3408 stats.p95_us,
3409 stats.p99_us,
3410 stats.mean_us,
3411 stats.variance_us
3412 );
3413 let budget = coverage_budget_us(CORPUS_100_BUDGET_US);
3414 assert!(
3415 stats.p95_us <= budget,
3416 "100-item corpus p95 = {}µs exceeds budget {}µs",
3417 stats.p95_us,
3418 budget,
3419 );
3420 }
3421
3422 #[test]
3423 fn perf_corpus_1000_under_budget() {
3424 let scorer = BayesianScorer::fast();
3425 let corpus = make_corpus(1_000);
3426 let stats = measure_stats_us(20, || {
3427 let mut results: Vec<_> = corpus
3428 .iter()
3429 .map(|t| scorer.score("git co", t))
3430 .filter(|r| r.score > 0.0)
3431 .collect();
3432 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3433 std::hint::black_box(&results);
3434 });
3435 eprintln!(
3436 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3437 stats.samples,
3438 stats.p50_us,
3439 stats.p95_us,
3440 stats.p99_us,
3441 stats.mean_us,
3442 stats.variance_us
3443 );
3444 let budget = coverage_budget_us(CORPUS_1000_BUDGET_US);
3445 assert!(
3446 stats.p95_us <= budget,
3447 "1000-item corpus p95 = {}µs exceeds budget {}µs",
3448 stats.p95_us,
3449 budget,
3450 );
3451 }
3452
3453 #[test]
3454 fn perf_corpus_5000_under_budget() {
3455 let scorer = BayesianScorer::fast();
3456 let corpus = make_corpus(5_000);
3457 let stats = measure_stats_us(10, || {
3458 let mut results: Vec<_> = corpus
3459 .iter()
3460 .map(|t| scorer.score("git co", t))
3461 .filter(|r| r.score > 0.0)
3462 .collect();
3463 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3464 std::hint::black_box(&results);
3465 });
3466 eprintln!(
3467 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_5000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3468 stats.samples,
3469 stats.p50_us,
3470 stats.p95_us,
3471 stats.p99_us,
3472 stats.mean_us,
3473 stats.variance_us
3474 );
3475 let budget = coverage_budget_us(CORPUS_5000_BUDGET_US);
3476 assert!(
3477 stats.p95_us <= budget,
3478 "5000-item corpus p95 = {}µs exceeds budget {}µs",
3479 stats.p95_us,
3480 budget,
3481 );
3482 }
3483
3484 #[test]
3485 fn perf_incremental_7key_100_under_budget() {
3486 let corpus = make_corpus(100);
3487 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3488 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3489
3490 let stats = measure_stats_us(30, || {
3491 let mut inc = IncrementalScorer::new();
3492 for query in &queries {
3493 let results = inc.score_corpus(query, &corpus_refs, None);
3494 std::hint::black_box(&results);
3495 }
3496 });
3497 eprintln!(
3498 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3499 stats.samples,
3500 stats.p50_us,
3501 stats.p95_us,
3502 stats.p99_us,
3503 stats.mean_us,
3504 stats.variance_us
3505 );
3506 let budget = coverage_budget_us(INCREMENTAL_7KEY_100_BUDGET_US);
3507 assert!(
3508 stats.p95_us <= budget,
3509 "Incremental 7-key 100-item p95 = {}µs exceeds budget {}µs",
3510 stats.p95_us,
3511 budget,
3512 );
3513 }
3514
3515 #[test]
3516 fn perf_incremental_7key_1000_under_budget() {
3517 let corpus = make_corpus(1_000);
3518 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3519 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3520
3521 let stats = measure_stats_us(10, || {
3522 let mut inc = IncrementalScorer::new();
3523 for query in &queries {
3524 let results = inc.score_corpus(query, &corpus_refs, None);
3525 std::hint::black_box(&results);
3526 }
3527 });
3528 eprintln!(
3529 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3530 stats.samples,
3531 stats.p50_us,
3532 stats.p95_us,
3533 stats.p99_us,
3534 stats.mean_us,
3535 stats.variance_us
3536 );
3537 let budget = coverage_budget_us(INCREMENTAL_7KEY_1000_BUDGET_US);
3538 assert!(
3539 stats.p95_us <= budget,
3540 "Incremental 7-key 1000-item p95 = {}µs exceeds budget {}µs",
3541 stats.p95_us,
3542 budget,
3543 );
3544 }
3545
3546 #[test]
3547 fn perf_incremental_faster_than_naive() {
3548 let corpus = make_corpus(100);
3549 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3550 let scorer = BayesianScorer::fast();
3551 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3552
3553 let naive_stats = measure_stats_us(30, || {
3554 for query in &queries {
3555 let mut results: Vec<_> = corpus
3556 .iter()
3557 .map(|t| scorer.score(query, t))
3558 .filter(|r| r.score > 0.0)
3559 .collect();
3560 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3561 std::hint::black_box(&results);
3562 }
3563 });
3564
3565 let inc_stats = measure_stats_us(30, || {
3566 let mut inc = IncrementalScorer::new();
3567 for query in &queries {
3568 let results = inc.score_corpus(query, &corpus_refs, None);
3569 std::hint::black_box(&results);
3570 }
3571 });
3572 eprintln!(
3573 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_vs_naive\",\"samples\":{},\"naive_p50_us\":{},\"naive_p95_us\":{},\"inc_p50_us\":{},\"inc_p95_us\":{}}}",
3574 naive_stats.samples,
3575 naive_stats.p50_us,
3576 naive_stats.p95_us,
3577 inc_stats.p50_us,
3578 inc_stats.p95_us
3579 );
3580
3581 assert!(
3584 inc_stats.p50_us <= naive_stats.p50_us * 2 + 50, "Incremental p50 = {}µs is >2x slower than naive p50 = {}µs",
3586 inc_stats.p50_us,
3587 naive_stats.p50_us,
3588 );
3589 }
3590
3591 fn scaling_ratio_us(p50_small_us: u64, p50_large_us: u64) -> f64 {
3592 if p50_small_us > 0 {
3593 p50_large_us as f64 / p50_small_us as f64
3594 } else {
3595 0.0
3596 }
3597 }
3598
3599 #[test]
3600 fn scaling_ratio_handles_zero_divisor() {
3601 assert_eq!(scaling_ratio_us(0, 123), 0.0);
3602 assert_eq!(scaling_ratio_us(10, 25), 2.5);
3603 }
3604
3605 #[test]
3606 fn perf_scaling_sublinear() {
3607 let scorer = BayesianScorer::fast();
3608 let corpus_100 = make_corpus(100);
3609 let corpus_1000 = make_corpus(1_000);
3610
3611 let stats_100 = measure_stats_us(30, || {
3612 let mut results: Vec<_> = corpus_100
3613 .iter()
3614 .map(|t| scorer.score("git", t))
3615 .filter(|r| r.score > 0.0)
3616 .collect();
3617 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3618 std::hint::black_box(&results);
3619 });
3620
3621 let stats_1000 = measure_stats_us(20, || {
3622 let mut results: Vec<_> = corpus_1000
3623 .iter()
3624 .map(|t| scorer.score("git", t))
3625 .filter(|r| r.score > 0.0)
3626 .collect();
3627 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3628 std::hint::black_box(&results);
3629 });
3630 eprintln!(
3631 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_scaling\",\"samples_100\":{},\"p50_100_us\":{},\"samples_1000\":{},\"p50_1000_us\":{}}}",
3632 stats_100.samples, stats_100.p50_us, stats_1000.samples, stats_1000.p50_us
3633 );
3634
3635 let ratio = scaling_ratio_us(stats_100.p50_us, stats_1000.p50_us);
3638 assert!(
3639 ratio < 25.0,
3640 "1000/100 scaling ratio = {:.1}x exceeds 25x threshold (100: {}µs, 1000: {}µs)",
3641 ratio,
3642 stats_100.p50_us,
3643 stats_1000.p50_us,
3644 );
3645 }
3646}