1#![forbid(unsafe_code)]
2
3use std::fmt;
47
48#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
54pub enum MatchType {
55 NoMatch,
57 Fuzzy,
59 Substring,
61 WordStart,
63 Prefix,
65 Exact,
67}
68
69impl MatchType {
70 pub fn prior_odds(self) -> f64 {
77 match self {
78 Self::Exact => 99.0, Self::Prefix => 9.0, Self::WordStart => 4.0, Self::Substring => 2.0, Self::Fuzzy => 0.333, Self::NoMatch => 0.0, }
85 }
86
87 pub fn description(self) -> &'static str {
89 match self {
90 Self::Exact => "exact match",
91 Self::Prefix => "prefix match",
92 Self::WordStart => "word-start match",
93 Self::Substring => "contiguous substring",
94 Self::Fuzzy => "fuzzy match",
95 Self::NoMatch => "no match",
96 }
97 }
98}
99
100#[derive(Debug, Clone)]
106pub struct EvidenceEntry {
107 pub kind: EvidenceKind,
109 pub bayes_factor: f64,
112 pub description: EvidenceDescription,
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub enum EvidenceKind {
119 MatchType,
121 WordBoundary,
123 Position,
125 GapPenalty,
127 TagMatch,
129 TitleLength,
131}
132
133impl fmt::Display for EvidenceEntry {
134 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135 let direction = if self.bayes_factor > 1.0 {
136 "supports"
137 } else if self.bayes_factor < 1.0 {
138 "opposes"
139 } else {
140 "neutral"
141 };
142 write!(
143 f,
144 "{:?}: BF={:.2} ({}) - {}",
145 self.kind, self.bayes_factor, direction, self.description
146 )
147 }
148}
149
150#[derive(Debug, Clone)]
152pub enum EvidenceDescription {
153 Static(&'static str),
154 TitleLengthChars { len: usize },
155 FirstMatchPos { pos: usize },
156 WordBoundaryCount { count: usize },
157 GapTotal { total: usize },
158 CoveragePercent { percent: f64 },
159}
160
161impl fmt::Display for EvidenceDescription {
162 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
163 match self {
164 Self::Static(msg) => write!(f, "{msg}"),
165 Self::TitleLengthChars { len } => write!(f, "title length {} chars", len),
166 Self::FirstMatchPos { pos } => write!(f, "first match at position {}", pos),
167 Self::WordBoundaryCount { count } => write!(f, "{} word boundary matches", count),
168 Self::GapTotal { total } => write!(f, "total gap of {} characters", total),
169 Self::CoveragePercent { percent } => write!(f, "query covers {:.0}% of title", percent),
170 }
171 }
172}
173
174#[derive(Debug, Clone, Default)]
183pub struct EvidenceLedger {
184 entries: Vec<EvidenceEntry>,
185}
186
187impl EvidenceLedger {
188 pub fn new() -> Self {
190 Self::default()
191 }
192
193 pub fn add(&mut self, kind: EvidenceKind, bayes_factor: f64, description: EvidenceDescription) {
195 self.entries.push(EvidenceEntry {
196 kind,
197 bayes_factor,
198 description,
199 });
200 }
201
202 pub fn entries(&self) -> &[EvidenceEntry] {
204 &self.entries
205 }
206
207 pub fn combined_bayes_factor(&self) -> f64 {
209 self.entries.iter().map(|e| e.bayes_factor).product()
210 }
211
212 pub fn prior_odds(&self) -> Option<f64> {
214 self.entries
215 .iter()
216 .find(|e| e.kind == EvidenceKind::MatchType)
217 .map(|e| e.bayes_factor)
218 }
219
220 pub fn posterior_probability(&self) -> f64 {
225 let prior = self.prior_odds().unwrap_or(1.0);
226 let bf: f64 = self
228 .entries
229 .iter()
230 .filter(|e| e.kind != EvidenceKind::MatchType)
231 .map(|e| e.bayes_factor)
232 .product();
233
234 let posterior_odds = prior * bf;
235 if posterior_odds.is_infinite() {
236 1.0
237 } else {
238 posterior_odds / (1.0 + posterior_odds)
239 }
240 }
241}
242
243impl fmt::Display for EvidenceLedger {
244 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
245 writeln!(f, "Evidence Ledger:")?;
246 for entry in &self.entries {
247 writeln!(f, " {}", entry)?;
248 }
249 writeln!(f, " Combined BF: {:.3}", self.combined_bayes_factor())?;
250 writeln!(f, " Posterior P: {:.3}", self.posterior_probability())?;
251 Ok(())
252 }
253}
254
255#[derive(Debug, Clone)]
261pub struct MatchResult {
262 pub score: f64,
264 pub match_type: MatchType,
266 pub match_positions: Vec<usize>,
268 pub evidence: EvidenceLedger,
270}
271
272impl MatchResult {
273 pub fn no_match() -> Self {
275 let mut evidence = EvidenceLedger::new();
276 evidence.add(
277 EvidenceKind::MatchType,
278 0.0,
279 EvidenceDescription::Static("no matching characters found"),
280 );
281 Self {
282 score: 0.0,
283 match_type: MatchType::NoMatch,
284 match_positions: Vec::new(),
285 evidence,
286 }
287 }
288}
289
290#[derive(Debug, Clone, Default)]
299pub struct BayesianScorer {
300 pub track_evidence: bool,
302}
303
304impl BayesianScorer {
305 pub fn new() -> Self {
307 Self {
308 track_evidence: true,
309 }
310 }
311
312 pub fn fast() -> Self {
314 Self {
315 track_evidence: false,
316 }
317 }
318
319 pub fn score(&self, query: &str, title: &str) -> MatchResult {
323 if query.len() > title.len() {
325 return MatchResult::no_match();
326 }
327
328 if query.is_empty() {
330 return self.score_empty_query(title);
331 }
332
333 let query_lower = query.to_lowercase();
335 let title_lower = title.to_lowercase();
336
337 let (match_type, positions) = self.detect_match_type(&query_lower, &title_lower, title);
339
340 if match_type == MatchType::NoMatch {
341 return MatchResult::no_match();
342 }
343
344 self.compute_score(match_type, positions, &query_lower, title)
346 }
347
348 pub fn score_with_query_lower(
352 &self,
353 query: &str,
354 query_lower: &str,
355 title: &str,
356 ) -> MatchResult {
357 let title_lower = title.to_lowercase();
358 self.score_with_lowered_title(query, query_lower, title, &title_lower)
359 }
360
361 pub fn score_with_lowered_title(
365 &self,
366 query: &str,
367 query_lower: &str,
368 title: &str,
369 title_lower: &str,
370 ) -> MatchResult {
371 self.score_with_lowered_title_and_words(query, query_lower, title, title_lower, None)
372 }
373
374 pub fn score_with_lowered_title_and_words(
376 &self,
377 query: &str,
378 query_lower: &str,
379 title: &str,
380 title_lower: &str,
381 word_starts: Option<&[usize]>,
382 ) -> MatchResult {
383 if query.len() > title.len() {
385 return MatchResult::no_match();
386 }
387
388 if query.is_empty() {
390 return self.score_empty_query(title);
391 }
392
393 let (match_type, positions) =
395 self.detect_match_type_with_words(query_lower, title_lower, title, word_starts);
396
397 if match_type == MatchType::NoMatch {
398 return MatchResult::no_match();
399 }
400
401 self.compute_score(match_type, positions, query_lower, title)
403 }
404
405 pub fn score_with_tags(&self, query: &str, title: &str, tags: &[&str]) -> MatchResult {
407 let mut result = self.score(query, title);
408
409 let query_lower = query.to_lowercase();
411 let tag_match = tags
412 .iter()
413 .any(|tag| tag.to_lowercase().contains(&query_lower));
414
415 if tag_match && result.match_type != MatchType::NoMatch {
416 if self.track_evidence {
418 result.evidence.add(
419 EvidenceKind::TagMatch,
420 3.0, EvidenceDescription::Static("query matches tag"),
422 );
423 result.score = result.evidence.posterior_probability();
424 } else if (0.0..1.0).contains(&result.score) {
425 let odds = result.score / (1.0 - result.score);
426 let boosted = odds * 3.0;
427 result.score = boosted / (1.0 + boosted);
428 }
429 }
430
431 result
432 }
433
434 fn score_empty_query(&self, title: &str) -> MatchResult {
436 let length_factor = 1.0 + (1.0 / (title.len() as f64 + 1.0)) * 0.1;
438 if self.track_evidence {
439 let mut evidence = EvidenceLedger::new();
440 evidence.add(
441 EvidenceKind::MatchType,
442 1.0, EvidenceDescription::Static("empty query matches all"),
444 );
445 evidence.add(
446 EvidenceKind::TitleLength,
447 length_factor,
448 EvidenceDescription::TitleLengthChars { len: title.len() },
449 );
450 let score = evidence.posterior_probability();
451 MatchResult {
452 score,
453 match_type: MatchType::Fuzzy, match_positions: Vec::new(),
455 evidence,
456 }
457 } else {
458 let odds = length_factor;
459 let score = odds / (1.0 + odds);
460 MatchResult {
461 score,
462 match_type: MatchType::Fuzzy,
463 match_positions: Vec::new(),
464 evidence: EvidenceLedger::new(),
465 }
466 }
467 }
468
469 fn detect_match_type(
471 &self,
472 query_lower: &str,
473 title_lower: &str,
474 title: &str,
475 ) -> (MatchType, Vec<usize>) {
476 self.detect_match_type_with_words(query_lower, title_lower, title, None)
477 }
478
479 fn detect_match_type_with_words(
481 &self,
482 query_lower: &str,
483 title_lower: &str,
484 title: &str,
485 word_starts: Option<&[usize]>,
486 ) -> (MatchType, Vec<usize>) {
487 if query_lower.is_ascii() && title_lower.is_ascii() {
488 return self.detect_match_type_ascii(query_lower, title_lower, word_starts);
489 }
490
491 if query_lower == title_lower {
493 let positions: Vec<usize> = (0..title.len()).collect();
494 return (MatchType::Exact, positions);
495 }
496
497 if title_lower.starts_with(query_lower) {
499 let positions: Vec<usize> = (0..query_lower.len()).collect();
500 return (MatchType::Prefix, positions);
501 }
502
503 if let Some(positions) = self.word_start_match(query_lower, title_lower) {
505 return (MatchType::WordStart, positions);
506 }
507
508 if let Some(start) = title_lower.find(query_lower) {
510 let positions: Vec<usize> = (start..start + query_lower.len()).collect();
511 return (MatchType::Substring, positions);
512 }
513
514 if let Some(positions) = self.fuzzy_match(query_lower, title_lower) {
516 return (MatchType::Fuzzy, positions);
517 }
518
519 (MatchType::NoMatch, Vec::new())
520 }
521
522 fn detect_match_type_ascii(
524 &self,
525 query_lower: &str,
526 title_lower: &str,
527 word_starts: Option<&[usize]>,
528 ) -> (MatchType, Vec<usize>) {
529 let query_bytes = query_lower.as_bytes();
530 let title_bytes = title_lower.as_bytes();
531
532 if query_bytes == title_bytes {
533 let positions: Vec<usize> = (0..title_bytes.len()).collect();
534 return (MatchType::Exact, positions);
535 }
536
537 if title_bytes.starts_with(query_bytes) {
538 let positions: Vec<usize> = (0..query_bytes.len()).collect();
539 return (MatchType::Prefix, positions);
540 }
541
542 if let Some(positions) = self.word_start_match_ascii(query_bytes, title_bytes, word_starts)
543 {
544 return (MatchType::WordStart, positions);
545 }
546
547 if let Some(start) = title_lower.find(query_lower) {
548 let positions: Vec<usize> = (start..start + query_bytes.len()).collect();
549 return (MatchType::Substring, positions);
550 }
551
552 if let Some(positions) = self.fuzzy_match_ascii(query_bytes, title_bytes) {
553 return (MatchType::Fuzzy, positions);
554 }
555
556 (MatchType::NoMatch, Vec::new())
557 }
558
559 fn word_start_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
561 let mut positions = Vec::new();
562 let mut query_chars = query.chars().peekable();
563
564 let title_bytes = title.as_bytes();
565 for (i, c) in title.char_indices() {
566 let is_word_start = i == 0 || {
568 let prev = title_bytes
569 .get(i.saturating_sub(1))
570 .copied()
571 .unwrap_or(b' ');
572 prev == b' ' || prev == b'-' || prev == b'_'
573 };
574
575 if is_word_start
576 && let Some(&qc) = query_chars.peek()
577 && c == qc
578 {
579 positions.push(i);
580 query_chars.next();
581 }
582 }
583
584 if query_chars.peek().is_none() {
585 Some(positions)
586 } else {
587 None
588 }
589 }
590
591 fn word_start_match_ascii(
593 &self,
594 query: &[u8],
595 title: &[u8],
596 word_starts: Option<&[usize]>,
597 ) -> Option<Vec<usize>> {
598 let mut positions = Vec::new();
599 let mut query_idx = 0;
600 if query.is_empty() {
601 return Some(positions);
602 }
603
604 if let Some(starts) = word_starts {
605 for &pos in starts {
606 if pos >= title.len() {
607 continue;
608 }
609 if title[pos] == query[query_idx] {
610 positions.push(pos);
611 query_idx += 1;
612 if query_idx == query.len() {
613 return Some(positions);
614 }
615 }
616 }
617 } else {
618 for (i, &b) in title.iter().enumerate() {
619 let is_word_start = i == 0 || matches!(title[i - 1], b' ' | b'-' | b'_');
620 if is_word_start && b == query[query_idx] {
621 positions.push(i);
622 query_idx += 1;
623 if query_idx == query.len() {
624 return Some(positions);
625 }
626 }
627 }
628 }
629
630 None
631 }
632
633 fn fuzzy_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
635 let mut positions = Vec::new();
636 let mut query_chars = query.chars().peekable();
637
638 for (i, c) in title.char_indices() {
639 if let Some(&qc) = query_chars.peek()
640 && c == qc
641 {
642 positions.push(i);
643 query_chars.next();
644 }
645 }
646
647 if query_chars.peek().is_none() {
648 Some(positions)
649 } else {
650 None
651 }
652 }
653
654 fn fuzzy_match_ascii(&self, query: &[u8], title: &[u8]) -> Option<Vec<usize>> {
656 let mut positions = Vec::new();
657 let mut query_idx = 0;
658 if query.is_empty() {
659 return Some(positions);
660 }
661
662 for (i, &b) in title.iter().enumerate() {
663 if b == query[query_idx] {
664 positions.push(i);
665 query_idx += 1;
666 if query_idx == query.len() {
667 return Some(positions);
668 }
669 }
670 }
671
672 None
673 }
674
675 fn compute_score(
677 &self,
678 match_type: MatchType,
679 positions: Vec<usize>,
680 query: &str,
681 title: &str,
682 ) -> MatchResult {
683 let positions_ref = positions.as_slice();
684 if !self.track_evidence {
685 let mut combined_bf = match_type.prior_odds();
686
687 if let Some(&first_pos) = positions_ref.first() {
688 let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
689 combined_bf *= position_factor;
690 }
691
692 let word_boundary_count = self.count_word_boundaries(positions_ref, title);
693 if word_boundary_count > 0 {
694 let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
695 combined_bf *= boundary_factor;
696 }
697
698 if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
699 let total_gap = self.total_gap(positions_ref);
700 let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
701 combined_bf *= gap_factor;
702 }
703
704 let title_len = title.len().max(1);
705 let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
706 combined_bf *= length_factor;
707
708 let score = combined_bf / (1.0 + combined_bf);
709 return MatchResult {
710 score,
711 match_type,
712 match_positions: positions,
713 evidence: EvidenceLedger::new(),
714 };
715 }
716
717 let mut evidence = EvidenceLedger::new();
718
719 let prior_odds = match_type.prior_odds();
721 evidence.add(
722 EvidenceKind::MatchType,
723 prior_odds,
724 EvidenceDescription::Static(match_type.description()),
725 );
726
727 if let Some(&first_pos) = positions_ref.first() {
729 let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
730 evidence.add(
731 EvidenceKind::Position,
732 position_factor,
733 EvidenceDescription::FirstMatchPos { pos: first_pos },
734 );
735 }
736
737 let word_boundary_count = self.count_word_boundaries(positions_ref, title);
739 if word_boundary_count > 0 {
740 let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
741 evidence.add(
742 EvidenceKind::WordBoundary,
743 boundary_factor,
744 EvidenceDescription::WordBoundaryCount {
745 count: word_boundary_count,
746 },
747 );
748 }
749
750 if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
752 let total_gap = self.total_gap(positions_ref);
753 let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
754 evidence.add(
755 EvidenceKind::GapPenalty,
756 gap_factor,
757 EvidenceDescription::GapTotal { total: total_gap },
758 );
759 }
760
761 let title_len = title.len().max(1);
763 let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
764 evidence.add(
765 EvidenceKind::TitleLength,
766 length_factor,
767 EvidenceDescription::CoveragePercent {
768 percent: (query.len() as f64 / title_len as f64) * 100.0,
769 },
770 );
771
772 let score = evidence.posterior_probability();
773
774 MatchResult {
775 score,
776 match_type,
777 match_positions: positions,
778 evidence,
779 }
780 }
781
782 fn count_word_boundaries(&self, positions: &[usize], title: &str) -> usize {
784 let title_bytes = title.as_bytes();
785 positions
786 .iter()
787 .filter(|&&pos| {
788 pos == 0 || {
789 let prev = title_bytes
790 .get(pos.saturating_sub(1))
791 .copied()
792 .unwrap_or(b' ');
793 prev == b' ' || prev == b'-' || prev == b'_'
794 }
795 })
796 .count()
797 }
798
799 fn total_gap(&self, positions: &[usize]) -> usize {
801 if positions.len() < 2 {
802 return 0;
803 }
804 positions
805 .windows(2)
806 .map(|w| w[1].saturating_sub(w[0]).saturating_sub(1))
807 .sum()
808 }
809}
810
811#[derive(Debug, Clone)]
827pub struct RankConfidence {
828 pub confidence: f64,
832
833 pub gap_to_next: f64,
835
836 pub stability: RankStability,
838}
839
840#[derive(Debug, Clone, Copy, PartialEq, Eq)]
842pub enum RankStability {
843 Stable,
845 Marginal,
847 Unstable,
849}
850
851impl RankStability {
852 pub fn label(self) -> &'static str {
854 match self {
855 Self::Stable => "stable",
856 Self::Marginal => "marginal",
857 Self::Unstable => "unstable",
858 }
859 }
860}
861
862#[derive(Debug, Clone)]
864pub struct RankedResults {
865 pub items: Vec<RankedItem>,
867
868 pub summary: RankingSummary,
870}
871
872#[derive(Debug, Clone)]
874pub struct RankedItem {
875 pub original_index: usize,
877
878 pub result: MatchResult,
880
881 pub rank_confidence: RankConfidence,
883}
884
885#[derive(Debug, Clone)]
887pub struct RankingSummary {
888 pub count: usize,
890
891 pub stable_count: usize,
893
894 pub tie_group_count: usize,
896
897 pub median_gap: f64,
899}
900
901#[derive(Debug, Clone)]
929pub struct ConformalRanker {
930 pub tie_epsilon: f64,
932
933 pub stable_threshold: f64,
935
936 pub marginal_threshold: f64,
938}
939
940impl Default for ConformalRanker {
941 fn default() -> Self {
942 Self {
943 tie_epsilon: 1e-9,
944 stable_threshold: 0.7,
945 marginal_threshold: 0.3,
946 }
947 }
948}
949
950impl ConformalRanker {
951 pub fn new() -> Self {
953 Self::default()
954 }
955
956 pub fn rank(&self, results: Vec<MatchResult>) -> RankedResults {
961 let count = results.len();
962
963 if count == 0 {
964 return RankedResults {
965 items: Vec::new(),
966 summary: RankingSummary {
967 count: 0,
968 stable_count: 0,
969 tie_group_count: 0,
970 median_gap: 0.0,
971 },
972 };
973 }
974
975 let mut indexed: Vec<(usize, MatchResult)> = results.into_iter().enumerate().collect();
979 indexed.sort_by(|a, b| {
980 b.1.score
981 .partial_cmp(&a.1.score)
982 .unwrap_or(std::cmp::Ordering::Equal)
983 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
984 });
985
986 let gaps: Vec<f64> = if count > 1 {
988 indexed
989 .windows(2)
990 .map(|w| (w[0].1.score - w[1].1.score).max(0.0))
991 .collect()
992 } else {
993 Vec::new()
994 };
995
996 let mut sorted_gaps = gaps.clone();
998 sorted_gaps.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
999
1000 let mut items: Vec<RankedItem> = Vec::with_capacity(count);
1002 let mut stable_count = 0;
1003 let mut tie_group_count = 0;
1004 let mut in_tie_group = false;
1005
1006 for (rank, (orig_idx, result)) in indexed.into_iter().enumerate() {
1007 let gap_to_next = gaps.get(rank).copied().unwrap_or(0.0);
1008
1009 let confidence = if sorted_gaps.is_empty() {
1011 1.0
1013 } else {
1014 let leq_count =
1015 sorted_gaps.partition_point(|&g| g <= gap_to_next + self.tie_epsilon * 0.5);
1016 leq_count as f64 / sorted_gaps.len() as f64
1017 };
1018
1019 let is_tie = gap_to_next < self.tie_epsilon;
1020 let stability = if is_tie {
1021 if !in_tie_group {
1022 tie_group_count += 1;
1023 in_tie_group = true;
1024 }
1025 RankStability::Unstable
1026 } else {
1027 in_tie_group = false;
1028 if confidence >= self.stable_threshold {
1029 stable_count += 1;
1030 RankStability::Stable
1031 } else if confidence >= self.marginal_threshold {
1032 RankStability::Marginal
1033 } else {
1034 RankStability::Unstable
1035 }
1036 };
1037
1038 items.push(RankedItem {
1039 original_index: orig_idx,
1040 result,
1041 rank_confidence: RankConfidence {
1042 confidence,
1043 gap_to_next,
1044 stability,
1045 },
1046 });
1047 }
1048
1049 let median_gap = if sorted_gaps.is_empty() {
1050 0.0
1051 } else {
1052 let mid = sorted_gaps.len() / 2;
1053 if sorted_gaps.len().is_multiple_of(2) {
1054 (sorted_gaps[mid - 1] + sorted_gaps[mid]) / 2.0
1055 } else {
1056 sorted_gaps[mid]
1057 }
1058 };
1059
1060 RankedResults {
1061 items,
1062 summary: RankingSummary {
1063 count,
1064 stable_count,
1065 tie_group_count,
1066 median_gap,
1067 },
1068 }
1069 }
1070
1071 pub fn rank_top_k(&self, results: Vec<MatchResult>, k: usize) -> RankedResults {
1076 let mut ranked = self.rank(results);
1077 ranked.items.truncate(k);
1078 ranked.summary.stable_count = ranked
1080 .items
1081 .iter()
1082 .filter(|item| item.rank_confidence.stability == RankStability::Stable)
1083 .count();
1084 ranked
1085 }
1086}
1087
1088impl fmt::Display for RankConfidence {
1089 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1090 write!(
1091 f,
1092 "confidence={:.3} gap={:.4} ({})",
1093 self.confidence,
1094 self.gap_to_next,
1095 self.stability.label()
1096 )
1097 }
1098}
1099
1100impl fmt::Display for RankingSummary {
1101 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1102 write!(
1103 f,
1104 "{} items, {} stable, {} tie groups, median gap {:.4}",
1105 self.count, self.stable_count, self.tie_group_count, self.median_gap
1106 )
1107 }
1108}
1109
1110#[derive(Debug, Clone)]
1116#[allow(dead_code)]
1117struct CachedEntry {
1118 corpus_index: usize,
1120}
1121
1122#[derive(Debug)]
1150pub struct IncrementalScorer {
1151 scorer: BayesianScorer,
1153 prev_query: String,
1155 cache: Vec<CachedEntry>,
1157 corpus_generation: u64,
1159 corpus_len: usize,
1161 stats: IncrementalStats,
1163}
1164
1165#[derive(Debug, Clone, Default)]
1167pub struct IncrementalStats {
1168 pub full_scans: u64,
1170 pub incremental_scans: u64,
1172 pub total_evaluated: u64,
1174 pub total_pruned: u64,
1176}
1177
1178impl IncrementalStats {
1179 pub fn prune_ratio(&self) -> f64 {
1181 let total = self.total_evaluated + self.total_pruned;
1182 if total == 0 {
1183 0.0
1184 } else {
1185 self.total_pruned as f64 / total as f64
1186 }
1187 }
1188}
1189
1190impl IncrementalScorer {
1191 pub fn new() -> Self {
1193 Self {
1194 scorer: BayesianScorer::fast(),
1195 prev_query: String::new(),
1196 cache: Vec::new(),
1197 corpus_generation: 0,
1198 corpus_len: 0,
1199 stats: IncrementalStats::default(),
1200 }
1201 }
1202
1203 pub fn with_scorer(scorer: BayesianScorer) -> Self {
1205 Self {
1206 scorer,
1207 prev_query: String::new(),
1208 cache: Vec::new(),
1209 corpus_generation: 0,
1210 corpus_len: 0,
1211 stats: IncrementalStats::default(),
1212 }
1213 }
1214
1215 pub fn invalidate(&mut self) {
1217 self.prev_query.clear();
1218 self.cache.clear();
1219 self.corpus_generation = self.corpus_generation.wrapping_add(1);
1220 }
1221
1222 pub fn stats(&self) -> &IncrementalStats {
1224 &self.stats
1225 }
1226
1227 pub fn score_corpus(
1239 &mut self,
1240 query: &str,
1241 corpus: &[&str],
1242 generation: Option<u64>,
1243 ) -> Vec<(usize, MatchResult)> {
1244 let generation_val = generation.unwrap_or(self.corpus_generation);
1246 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1247 self.invalidate();
1248 self.corpus_generation = generation_val;
1249 self.corpus_len = corpus.len();
1250 }
1251
1252 let can_prune = !self.prev_query.is_empty()
1254 && query.starts_with(&self.prev_query)
1255 && !self.cache.is_empty();
1256
1257 let query_lower = query.to_lowercase();
1258 let results = if can_prune {
1259 self.score_incremental(query, &query_lower, corpus)
1260 } else {
1261 self.score_full(query, &query_lower, corpus)
1262 };
1263
1264 self.prev_query.clear();
1266 self.prev_query.push_str(query);
1267 self.cache = results
1268 .iter()
1269 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1270 .collect();
1271
1272 results
1273 }
1274
1275 pub fn score_corpus_with_lowered(
1279 &mut self,
1280 query: &str,
1281 corpus: &[&str],
1282 corpus_lower: &[&str],
1283 generation: Option<u64>,
1284 ) -> Vec<(usize, MatchResult)> {
1285 debug_assert_eq!(
1286 corpus.len(),
1287 corpus_lower.len(),
1288 "corpus_lower must match corpus length"
1289 );
1290
1291 let generation_val = generation.unwrap_or(self.corpus_generation);
1293 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1294 self.invalidate();
1295 self.corpus_generation = generation_val;
1296 self.corpus_len = corpus.len();
1297 }
1298
1299 let can_prune = !self.prev_query.is_empty()
1301 && query.starts_with(&self.prev_query)
1302 && !self.cache.is_empty();
1303
1304 let query_lower = query.to_lowercase();
1305 let results = if can_prune {
1306 self.score_incremental_lowered(query, &query_lower, corpus, corpus_lower)
1307 } else {
1308 self.score_full_lowered(query, &query_lower, corpus, corpus_lower)
1309 };
1310
1311 self.prev_query.clear();
1313 self.prev_query.push_str(query);
1314 self.cache = results
1315 .iter()
1316 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1317 .collect();
1318
1319 results
1320 }
1321
1322 pub fn score_corpus_with_lowered_and_words(
1326 &mut self,
1327 query: &str,
1328 corpus: &[String],
1329 corpus_lower: &[String],
1330 word_starts: &[Vec<usize>],
1331 generation: Option<u64>,
1332 ) -> Vec<(usize, MatchResult)> {
1333 debug_assert_eq!(
1334 corpus.len(),
1335 corpus_lower.len(),
1336 "corpus_lower must match corpus length"
1337 );
1338 debug_assert_eq!(
1339 corpus.len(),
1340 word_starts.len(),
1341 "word_starts must match corpus length"
1342 );
1343
1344 let generation_val = generation.unwrap_or(self.corpus_generation);
1346 if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1347 self.invalidate();
1348 self.corpus_generation = generation_val;
1349 self.corpus_len = corpus.len();
1350 }
1351
1352 let can_prune = !self.prev_query.is_empty()
1354 && query.starts_with(&self.prev_query)
1355 && !self.cache.is_empty();
1356
1357 let query_lower = query.to_lowercase();
1358 let results = if can_prune {
1359 self.score_incremental_lowered_with_words(
1360 query,
1361 &query_lower,
1362 corpus,
1363 corpus_lower,
1364 word_starts,
1365 )
1366 } else {
1367 self.score_full_lowered_with_words(
1368 query,
1369 &query_lower,
1370 corpus,
1371 corpus_lower,
1372 word_starts,
1373 )
1374 };
1375
1376 self.prev_query.clear();
1378 self.prev_query.push_str(query);
1379 self.cache = results
1380 .iter()
1381 .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1382 .collect();
1383
1384 results
1385 }
1386
1387 fn score_full(
1389 &mut self,
1390 query: &str,
1391 query_lower: &str,
1392 corpus: &[&str],
1393 ) -> Vec<(usize, MatchResult)> {
1394 self.stats.full_scans += 1;
1395 self.stats.total_evaluated += corpus.len() as u64;
1396
1397 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1398 for (i, title) in corpus.iter().enumerate() {
1399 let result = self
1400 .scorer
1401 .score_with_query_lower(query, query_lower, title);
1402 if result.score > 0.0 {
1403 results.push((i, result));
1404 }
1405 }
1406
1407 results.sort_by(|a, b| {
1408 b.1.score
1409 .partial_cmp(&a.1.score)
1410 .unwrap_or(std::cmp::Ordering::Equal)
1411 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1412 });
1413
1414 results
1415 }
1416
1417 fn score_full_lowered(
1419 &mut self,
1420 query: &str,
1421 query_lower: &str,
1422 corpus: &[&str],
1423 corpus_lower: &[&str],
1424 ) -> Vec<(usize, MatchResult)> {
1425 self.stats.full_scans += 1;
1426 self.stats.total_evaluated += corpus.len() as u64;
1427
1428 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1429 for (i, (title, title_lower)) in corpus.iter().zip(corpus_lower.iter()).enumerate() {
1430 let result =
1431 self.scorer
1432 .score_with_lowered_title(query, query_lower, title, title_lower);
1433 if result.score > 0.0 {
1434 results.push((i, result));
1435 }
1436 }
1437
1438 results.sort_by(|a, b| {
1439 b.1.score
1440 .partial_cmp(&a.1.score)
1441 .unwrap_or(std::cmp::Ordering::Equal)
1442 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1443 });
1444
1445 results
1446 }
1447
1448 fn score_full_lowered_with_words(
1450 &mut self,
1451 query: &str,
1452 query_lower: &str,
1453 corpus: &[String],
1454 corpus_lower: &[String],
1455 word_starts: &[Vec<usize>],
1456 ) -> Vec<(usize, MatchResult)> {
1457 self.stats.full_scans += 1;
1458 self.stats.total_evaluated += corpus.len() as u64;
1459
1460 let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1461 for (i, ((title, title_lower), starts)) in corpus
1462 .iter()
1463 .zip(corpus_lower.iter())
1464 .zip(word_starts.iter())
1465 .enumerate()
1466 {
1467 let result = self.scorer.score_with_lowered_title_and_words(
1468 query,
1469 query_lower,
1470 title,
1471 title_lower,
1472 Some(starts),
1473 );
1474 if result.score > 0.0 {
1475 results.push((i, result));
1476 }
1477 }
1478
1479 results.sort_by(|a, b| {
1480 b.1.score
1481 .partial_cmp(&a.1.score)
1482 .unwrap_or(std::cmp::Ordering::Equal)
1483 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1484 });
1485
1486 results
1487 }
1488 fn score_incremental(
1490 &mut self,
1491 query: &str,
1492 query_lower: &str,
1493 corpus: &[&str],
1494 ) -> Vec<(usize, MatchResult)> {
1495 self.stats.incremental_scans += 1;
1496
1497 let prev_match_count = self.cache.len();
1498 let pruned = corpus.len().saturating_sub(prev_match_count);
1499 self.stats.total_pruned += pruned as u64;
1500 self.stats.total_evaluated += prev_match_count as u64;
1501
1502 let mut results: Vec<(usize, MatchResult)> =
1503 Vec::with_capacity(self.cache.len().min(corpus.len()));
1504 for entry in &self.cache {
1505 if entry.corpus_index < corpus.len() {
1506 let result = self.scorer.score_with_query_lower(
1507 query,
1508 query_lower,
1509 corpus[entry.corpus_index],
1510 );
1511 if result.score > 0.0 {
1512 results.push((entry.corpus_index, result));
1513 }
1514 }
1515 }
1516
1517 results.sort_by(|a, b| {
1518 b.1.score
1519 .partial_cmp(&a.1.score)
1520 .unwrap_or(std::cmp::Ordering::Equal)
1521 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1522 });
1523
1524 results
1525 }
1526
1527 fn score_incremental_lowered(
1529 &mut self,
1530 query: &str,
1531 query_lower: &str,
1532 corpus: &[&str],
1533 corpus_lower: &[&str],
1534 ) -> Vec<(usize, MatchResult)> {
1535 self.stats.incremental_scans += 1;
1536
1537 let prev_match_count = self.cache.len();
1538 let pruned = corpus.len().saturating_sub(prev_match_count);
1539 self.stats.total_pruned += pruned as u64;
1540 self.stats.total_evaluated += prev_match_count as u64;
1541
1542 let mut results: Vec<(usize, MatchResult)> =
1543 Vec::with_capacity(self.cache.len().min(corpus.len()));
1544 for entry in &self.cache {
1545 if entry.corpus_index < corpus.len() {
1546 let title = corpus[entry.corpus_index];
1547 let title_lower = corpus_lower[entry.corpus_index];
1548 let result =
1549 self.scorer
1550 .score_with_lowered_title(query, query_lower, title, title_lower);
1551 if result.score > 0.0 {
1552 results.push((entry.corpus_index, result));
1553 }
1554 }
1555 }
1556
1557 results.sort_by(|a, b| {
1558 b.1.score
1559 .partial_cmp(&a.1.score)
1560 .unwrap_or(std::cmp::Ordering::Equal)
1561 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1562 });
1563
1564 results
1565 }
1566
1567 fn score_incremental_lowered_with_words(
1569 &mut self,
1570 query: &str,
1571 query_lower: &str,
1572 corpus: &[String],
1573 corpus_lower: &[String],
1574 word_starts: &[Vec<usize>],
1575 ) -> Vec<(usize, MatchResult)> {
1576 self.stats.incremental_scans += 1;
1577
1578 let prev_match_count = self.cache.len();
1579 let pruned = corpus.len().saturating_sub(prev_match_count);
1580 self.stats.total_pruned += pruned as u64;
1581 self.stats.total_evaluated += prev_match_count as u64;
1582
1583 let mut results: Vec<(usize, MatchResult)> =
1584 Vec::with_capacity(self.cache.len().min(corpus.len()));
1585 for entry in &self.cache {
1586 if entry.corpus_index < corpus.len() {
1587 let title = &corpus[entry.corpus_index];
1588 let title_lower = &corpus_lower[entry.corpus_index];
1589 let starts = &word_starts[entry.corpus_index];
1590 let result = self.scorer.score_with_lowered_title_and_words(
1591 query,
1592 query_lower,
1593 title,
1594 title_lower,
1595 Some(starts),
1596 );
1597 if result.score > 0.0 {
1598 results.push((entry.corpus_index, result));
1599 }
1600 }
1601 }
1602
1603 results.sort_by(|a, b| {
1604 b.1.score
1605 .partial_cmp(&a.1.score)
1606 .unwrap_or(std::cmp::Ordering::Equal)
1607 .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1608 });
1609
1610 results
1611 }
1612}
1613
1614impl Default for IncrementalScorer {
1615 fn default() -> Self {
1616 Self::new()
1617 }
1618}
1619
1620#[cfg(test)]
1625mod tests {
1626 use super::*;
1627
1628 #[test]
1631 fn exact_match_highest_score() {
1632 let scorer = BayesianScorer::new();
1633 let result = scorer.score("Settings", "Settings");
1634 assert_eq!(result.match_type, MatchType::Exact);
1635 assert!(result.score > 0.95, "Exact match should score > 0.95");
1636 }
1637
1638 #[test]
1639 fn prefix_match_high_score() {
1640 let scorer = BayesianScorer::new();
1641 let result = scorer.score("set", "Settings");
1642 assert_eq!(result.match_type, MatchType::Prefix);
1643 assert!(result.score > 0.85, "Prefix match should score > 0.85");
1644 }
1645
1646 #[test]
1647 fn word_start_match_score() {
1648 let scorer = BayesianScorer::new();
1649 let result = scorer.score("gd", "Go Dashboard");
1650 assert_eq!(result.match_type, MatchType::WordStart);
1651 assert!(result.score > 0.75, "Word-start should score > 0.75");
1652 }
1653
1654 #[test]
1655 fn substring_match_moderate_score() {
1656 let scorer = BayesianScorer::new();
1657 let result = scorer.score("set", "Asset Manager");
1658 assert_eq!(result.match_type, MatchType::Substring);
1659 assert!(result.score > 0.5, "Substring should score > 0.5");
1660 }
1661
1662 #[test]
1663 fn fuzzy_match_low_score() {
1664 let scorer = BayesianScorer::new();
1665 let result = scorer.score("stg", "Settings");
1666 assert_eq!(result.match_type, MatchType::Fuzzy);
1667 assert!(result.score > 0.2, "Fuzzy should score > 0.2");
1668 assert!(result.score < 0.7, "Fuzzy should score < 0.7");
1669 }
1670
1671 #[test]
1672 fn no_match_returns_zero() {
1673 let scorer = BayesianScorer::new();
1674 let result = scorer.score("xyz", "Settings");
1675 assert_eq!(result.match_type, MatchType::NoMatch);
1676 assert_eq!(result.score, 0.0);
1677 }
1678
1679 #[test]
1682 fn score_bounded() {
1683 let scorer = BayesianScorer::new();
1684 let test_cases = [
1685 ("a", "abcdefghijklmnop"),
1686 ("full", "full"),
1687 ("", "anything"),
1688 ("xyz", "abc"),
1689 ("stg", "Settings"),
1690 ];
1691
1692 for (query, title) in test_cases {
1693 let result = scorer.score(query, title);
1694 assert!(
1695 result.score >= 0.0 && result.score <= 1.0,
1696 "Score for ({}, {}) = {} not in [0, 1]",
1697 query,
1698 title,
1699 result.score
1700 );
1701 }
1702 }
1703
1704 #[test]
1705 fn score_deterministic() {
1706 let scorer = BayesianScorer::new();
1707 let result1 = scorer.score("nav", "Navigation");
1708 let result2 = scorer.score("nav", "Navigation");
1709 assert!(
1710 (result1.score - result2.score).abs() < f64::EPSILON,
1711 "Same input should produce identical scores"
1712 );
1713 }
1714
1715 #[test]
1716 fn tiebreak_shorter_first() {
1717 let scorer = BayesianScorer::new();
1718 let short = scorer.score("set", "Set");
1719 let long = scorer.score("set", "Settings");
1720 assert!(
1721 short.score >= long.score,
1722 "Shorter title should score >= longer: {} vs {}",
1723 short.score,
1724 long.score
1725 );
1726 }
1727
1728 #[test]
1731 fn case_insensitive() {
1732 let scorer = BayesianScorer::new();
1733 let lower = scorer.score("set", "Settings");
1734 let upper = scorer.score("SET", "Settings");
1735 assert!(
1736 (lower.score - upper.score).abs() < f64::EPSILON,
1737 "Case should not affect score"
1738 );
1739 }
1740
1741 #[test]
1744 fn match_positions_correct() {
1745 let scorer = BayesianScorer::new();
1746 let result = scorer.score("gd", "Go Dashboard");
1747 assert_eq!(result.match_positions, vec![0, 3]);
1748 }
1749
1750 #[test]
1751 fn fuzzy_match_positions() {
1752 let scorer = BayesianScorer::new();
1753 let result = scorer.score("stg", "Settings");
1754 assert_eq!(result.match_positions.len(), 3);
1756 assert_eq!(result.match_positions[0], 0); }
1758
1759 #[test]
1762 fn empty_query_returns_all() {
1763 let scorer = BayesianScorer::new();
1764 let result = scorer.score("", "Any Command");
1765 assert!(result.score > 0.0, "Empty query should have positive score");
1766 assert!(result.score < 1.0, "Empty query should not be max score");
1767 }
1768
1769 #[test]
1770 fn empty_title_is_safe() {
1771 let scorer = BayesianScorer::new();
1772 let result = scorer.score("x", "");
1773 assert_eq!(result.match_type, MatchType::NoMatch);
1774 assert!(result.score.is_finite(), "Score should remain finite");
1775 }
1776
1777 #[test]
1778 fn empty_query_empty_title_is_finite() {
1779 let scorer = BayesianScorer::new();
1780 let result = scorer.score("", "");
1781 assert!(result.score.is_finite(), "Score should remain finite");
1782 assert_eq!(result.match_type, MatchType::Fuzzy);
1783 }
1784
1785 #[test]
1788 fn query_longer_than_title() {
1789 let scorer = BayesianScorer::new();
1790 let result = scorer.score("verylongquery", "short");
1791 assert_eq!(result.match_type, MatchType::NoMatch);
1792 assert_eq!(result.score, 0.0);
1793 }
1794
1795 #[test]
1796 fn long_query_exact_match_is_handled() {
1797 let scorer = BayesianScorer::new();
1798 let query = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
1799 let result = scorer.score(query, query);
1800 assert_eq!(result.match_type, MatchType::Exact);
1801 assert!(result.score.is_finite());
1802 assert!(result.score > 0.9, "Exact long query should score high");
1803 }
1804
1805 #[test]
1806 fn gap_penalty_prefers_tight_fuzzy_matches() {
1807 let scorer = BayesianScorer::new();
1808 let tight = scorer.score("ace", "abcde");
1809 let gappy = scorer.score("ace", "a...c...e");
1810 assert_eq!(tight.match_type, MatchType::Fuzzy);
1811 assert_eq!(gappy.match_type, MatchType::Fuzzy);
1812 assert!(
1813 tight.score > gappy.score,
1814 "Tight fuzzy match should score higher than gappy: {} vs {}",
1815 tight.score,
1816 gappy.score
1817 );
1818 }
1819
1820 #[test]
1823 fn tag_match_boosts_score() {
1824 let scorer = BayesianScorer::new();
1825 let without_tag = scorer.score("set", "Settings");
1827 let with_tag = scorer.score_with_tags("set", "Settings", &["config", "setup"]);
1828 assert!(
1830 with_tag.score > without_tag.score,
1831 "Tag match should boost score: {} > {}",
1832 with_tag.score,
1833 without_tag.score
1834 );
1835 }
1836
1837 #[test]
1840 fn evidence_ledger_tracks_factors() {
1841 let scorer = BayesianScorer::new();
1842 let result = scorer.score("set", "Settings");
1843
1844 assert!(!result.evidence.entries().is_empty());
1845
1846 assert!(
1848 result
1849 .evidence
1850 .entries()
1851 .iter()
1852 .any(|e| e.kind == EvidenceKind::MatchType)
1853 );
1854 }
1855
1856 #[test]
1857 fn evidence_ledger_display() {
1858 let scorer = BayesianScorer::new();
1859 let result = scorer.score("gd", "Go Dashboard");
1860 let display = format!("{}", result.evidence);
1861 assert!(display.contains("Evidence Ledger"));
1862 assert!(display.contains("Posterior P:"));
1863 }
1864
1865 #[test]
1868 fn property_ordering_total() {
1869 let scorer = BayesianScorer::new();
1870 let titles = ["Settings", "Set Theme", "Reset View", "Asset"];
1871
1872 let mut scores: Vec<(f64, &str)> = titles
1873 .iter()
1874 .map(|t| (scorer.score("set", t).score, *t))
1875 .collect();
1876
1877 scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
1879
1880 for (score, _) in &scores {
1882 assert!(score.is_finite());
1883 }
1884 }
1885
1886 #[test]
1887 fn property_prefix_monotonic() {
1888 let scorer = BayesianScorer::new();
1889 let one_char = scorer.score("s", "Settings");
1891 let three_char = scorer.score("set", "Settings");
1892 assert!(
1894 three_char.score >= one_char.score,
1895 "Longer prefix should score >= shorter"
1896 );
1897 }
1898
1899 #[test]
1902 fn match_type_prior_ordering() {
1903 assert!(MatchType::Exact.prior_odds() > MatchType::Prefix.prior_odds());
1904 assert!(MatchType::Prefix.prior_odds() > MatchType::WordStart.prior_odds());
1905 assert!(MatchType::WordStart.prior_odds() > MatchType::Substring.prior_odds());
1906 assert!(MatchType::Substring.prior_odds() > MatchType::Fuzzy.prior_odds());
1907 assert!(MatchType::Fuzzy.prior_odds() > MatchType::NoMatch.prior_odds());
1908 }
1909
1910 #[test]
1915 fn conformal_tie_breaks_by_match_type() {
1916 let mut exact = MatchResult::no_match();
1917 exact.score = 0.5;
1918 exact.match_type = MatchType::Exact;
1919
1920 let mut prefix = MatchResult::no_match();
1921 prefix.score = 0.5;
1922 prefix.match_type = MatchType::Prefix;
1923
1924 let mut word_start = MatchResult::no_match();
1925 word_start.score = 0.5;
1926 word_start.match_type = MatchType::WordStart;
1927
1928 let mut substring = MatchResult::no_match();
1929 substring.score = 0.5;
1930 substring.match_type = MatchType::Substring;
1931
1932 let ranker = ConformalRanker::new();
1933 let ranked = ranker.rank(vec![substring, word_start, prefix, exact]);
1934 let order: Vec<MatchType> = ranked
1935 .items
1936 .iter()
1937 .map(|item| item.result.match_type)
1938 .collect();
1939
1940 assert_eq!(
1941 order,
1942 vec![
1943 MatchType::Exact,
1944 MatchType::Prefix,
1945 MatchType::WordStart,
1946 MatchType::Substring
1947 ]
1948 );
1949 }
1950
1951 #[test]
1952 fn conformal_empty_input() {
1953 let ranker = ConformalRanker::new();
1954 let ranked = ranker.rank(Vec::new());
1955 assert_eq!(ranked.items.len(), 0);
1956 assert_eq!(ranked.summary.count, 0);
1957 assert_eq!(ranked.summary.stable_count, 0);
1958 assert_eq!(ranked.summary.tie_group_count, 0);
1959 assert_eq!(ranked.summary.median_gap, 0.0);
1960 }
1961
1962 #[test]
1963 fn conformal_single_item() {
1964 let scorer = BayesianScorer::new();
1965 let results = vec![scorer.score("set", "Settings")];
1966 let ranker = ConformalRanker::new();
1967 let ranked = ranker.rank(results);
1968
1969 assert_eq!(ranked.items.len(), 1);
1970 assert_eq!(ranked.items[0].rank_confidence.confidence, 1.0);
1971 assert_eq!(ranked.summary.count, 1);
1972 }
1973
1974 #[test]
1975 fn conformal_sorted_descending() {
1976 let scorer = BayesianScorer::new();
1977 let results = vec![
1978 scorer.score("set", "Settings"),
1979 scorer.score("set", "Asset Manager"),
1980 scorer.score("set", "Reset Configuration Panel"),
1981 ];
1982 let ranker = ConformalRanker::new();
1983 let ranked = ranker.rank(results);
1984
1985 for w in ranked.items.windows(2) {
1987 assert!(
1988 w[0].result.score >= w[1].result.score,
1989 "Items should be sorted descending: {} >= {}",
1990 w[0].result.score,
1991 w[1].result.score
1992 );
1993 }
1994 }
1995
1996 #[test]
1997 fn conformal_confidence_bounded() {
1998 let scorer = BayesianScorer::new();
1999 let titles = [
2000 "Settings",
2001 "Set Theme",
2002 "Asset Manager",
2003 "Reset View",
2004 "Offset Tool",
2005 "Test Suite",
2006 ];
2007 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2008 let ranker = ConformalRanker::new();
2009 let ranked = ranker.rank(results);
2010
2011 for item in &ranked.items {
2012 assert!(
2013 item.rank_confidence.confidence >= 0.0 && item.rank_confidence.confidence <= 1.0,
2014 "Confidence must be in [0, 1], got {}",
2015 item.rank_confidence.confidence
2016 );
2017 assert!(
2018 item.rank_confidence.gap_to_next >= 0.0,
2019 "Gap must be non-negative"
2020 );
2021 }
2022 }
2023
2024 #[test]
2025 fn conformal_deterministic() {
2026 let scorer = BayesianScorer::new();
2027 let titles = ["Settings", "Set Theme", "Asset", "Reset"];
2028
2029 let results1: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2030 let results2: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2031
2032 let ranker = ConformalRanker::new();
2033 let ranked1 = ranker.rank(results1);
2034 let ranked2 = ranker.rank(results2);
2035
2036 for (a, b) in ranked1.items.iter().zip(ranked2.items.iter()) {
2037 assert!(
2038 (a.rank_confidence.confidence - b.rank_confidence.confidence).abs() < f64::EPSILON,
2039 "Confidence should be deterministic"
2040 );
2041 assert_eq!(
2042 a.original_index, b.original_index,
2043 "Rank order should be deterministic"
2044 );
2045 }
2046 }
2047
2048 #[test]
2049 fn conformal_ties_detected() {
2050 let mut r1 = MatchResult::no_match();
2052 r1.score = 0.8;
2053 r1.match_type = MatchType::Prefix;
2054 let mut r2 = MatchResult::no_match();
2055 r2.score = 0.8;
2056 r2.match_type = MatchType::Prefix;
2057 let mut r3 = MatchResult::no_match();
2058 r3.score = 0.5;
2059 r3.match_type = MatchType::Substring;
2060
2061 let ranker = ConformalRanker::new();
2062 let ranked = ranker.rank(vec![r1, r2, r3]);
2063
2064 assert_eq!(
2066 ranked.items[0].rank_confidence.stability,
2067 RankStability::Unstable,
2068 "Tied items should be Unstable"
2069 );
2070 assert!(
2071 ranked.summary.tie_group_count >= 1,
2072 "Should detect at least one tie group"
2073 );
2074 }
2075
2076 #[test]
2077 fn conformal_all_identical_scores() {
2078 let mut results = Vec::new();
2079 for _ in 0..5 {
2080 let mut r = MatchResult::no_match();
2081 r.score = 0.5;
2082 r.match_type = MatchType::Fuzzy;
2083 results.push(r);
2084 }
2085
2086 let ranker = ConformalRanker::new();
2087 let ranked = ranker.rank(results);
2088
2089 for item in &ranked.items {
2091 assert_eq!(item.rank_confidence.stability, RankStability::Unstable);
2092 }
2093 }
2094
2095 #[test]
2096 fn conformal_well_separated_scores_are_stable() {
2097 let mut results = Vec::new();
2098 for &s in &[0.9, 0.6, 0.3, 0.1] {
2100 let mut r = MatchResult::no_match();
2101 r.score = s;
2102 r.match_type = MatchType::Prefix;
2103 results.push(r);
2104 }
2105
2106 let ranker = ConformalRanker::new();
2107 let ranked = ranker.rank(results);
2108
2109 assert!(
2111 ranked.summary.stable_count >= 2,
2112 "Well-separated scores should yield stable positions, got {}",
2113 ranked.summary.stable_count
2114 );
2115 }
2116
2117 #[test]
2118 fn conformal_top_k_truncates() {
2119 let scorer = BayesianScorer::new();
2120 let titles = [
2121 "Settings",
2122 "Set Theme",
2123 "Asset Manager",
2124 "Reset View",
2125 "Offset Tool",
2126 ];
2127 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2128
2129 let ranker = ConformalRanker::new();
2130 let ranked = ranker.rank_top_k(results, 2);
2131
2132 assert_eq!(ranked.items.len(), 2);
2133 assert!(ranked.items[0].rank_confidence.confidence > 0.0);
2135 }
2136
2137 #[test]
2138 fn conformal_original_indices_preserved() {
2139 let scorer = BayesianScorer::new();
2140 let titles = ["Zebra Tool", "Settings", "Apple"];
2141 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2142
2143 let ranker = ConformalRanker::new();
2144 let ranked = ranker.rank(results);
2145
2146 assert_eq!(
2148 ranked.items[0].original_index, 1,
2149 "Settings should be first; original_index should be 1"
2150 );
2151 }
2152
2153 #[test]
2154 fn conformal_summary_display() {
2155 let scorer = BayesianScorer::new();
2156 let results = vec![
2157 scorer.score("set", "Settings"),
2158 scorer.score("set", "Set Theme"),
2159 ];
2160 let ranker = ConformalRanker::new();
2161 let ranked = ranker.rank(results);
2162
2163 let display = format!("{}", ranked.summary);
2164 assert!(display.contains("2 items"));
2165 }
2166
2167 #[test]
2168 fn conformal_rank_confidence_display() {
2169 let rc = RankConfidence {
2170 confidence: 0.85,
2171 gap_to_next: 0.1234,
2172 stability: RankStability::Stable,
2173 };
2174 let display = format!("{}", rc);
2175 assert!(display.contains("0.850"));
2176 assert!(display.contains("stable"));
2177 }
2178
2179 #[test]
2180 fn conformal_stability_labels() {
2181 assert_eq!(RankStability::Stable.label(), "stable");
2182 assert_eq!(RankStability::Marginal.label(), "marginal");
2183 assert_eq!(RankStability::Unstable.label(), "unstable");
2184 }
2185
2186 #[test]
2189 fn conformal_last_item_gap_zero() {
2190 let scorer = BayesianScorer::new();
2191 let results = vec![
2192 scorer.score("set", "Settings"),
2193 scorer.score("set", "Asset"),
2194 scorer.score("set", "Reset"),
2195 ];
2196 let ranker = ConformalRanker::new();
2197 let ranked = ranker.rank(results);
2198
2199 let last = ranked.items.last().unwrap();
2200 assert_eq!(
2201 last.rank_confidence.gap_to_next, 0.0,
2202 "Last item gap should be 0"
2203 );
2204 }
2205
2206 #[test]
2209 fn conformal_median_gap_non_negative() {
2210 let scorer = BayesianScorer::new();
2211 let titles = [
2212 "Settings",
2213 "Set Theme",
2214 "Asset Manager",
2215 "Reset View",
2216 "Offset Tool",
2217 "Test Suite",
2218 "System Settings",
2219 "Reset Defaults",
2220 ];
2221 let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2222 let ranker = ConformalRanker::new();
2223 let ranked = ranker.rank(results);
2224
2225 assert!(
2226 ranked.summary.median_gap >= 0.0,
2227 "Median gap must be non-negative"
2228 );
2229 }
2230
2231 fn test_corpus() -> Vec<&'static str> {
2236 vec![
2237 "Open File",
2238 "Save File",
2239 "Close Tab",
2240 "Git: Commit",
2241 "Git: Push",
2242 "Git: Pull",
2243 "Go to Line",
2244 "Find in Files",
2245 "Toggle Terminal",
2246 "Format Document",
2247 ]
2248 }
2249
2250 #[test]
2251 fn incremental_full_scan_on_first_call() {
2252 let mut scorer = IncrementalScorer::new();
2253 let corpus = test_corpus();
2254 let results = scorer.score_corpus("git", &corpus, None);
2255
2256 assert!(!results.is_empty(), "Should find matches for 'git'");
2257 assert_eq!(scorer.stats().full_scans, 1);
2258 assert_eq!(scorer.stats().incremental_scans, 0);
2259 }
2260
2261 #[test]
2262 fn incremental_prunes_on_query_extension() {
2263 let mut scorer = IncrementalScorer::new();
2264 let corpus = test_corpus();
2265
2266 let r1 = scorer.score_corpus("g", &corpus, None);
2268 assert_eq!(scorer.stats().full_scans, 1);
2269
2270 let r2 = scorer.score_corpus("gi", &corpus, None);
2272 assert_eq!(scorer.stats().incremental_scans, 1);
2273 assert!(r2.len() <= r1.len(), "Extended query should match <= items");
2274
2275 let r3 = scorer.score_corpus("git", &corpus, None);
2277 assert_eq!(scorer.stats().incremental_scans, 2);
2278 assert!(r3.len() <= r2.len());
2279 }
2280
2281 #[test]
2282 fn incremental_correctness_matches_full_scan() {
2283 let corpus = test_corpus();
2284
2285 let mut inc = IncrementalScorer::new();
2287 inc.score_corpus("g", &corpus, None);
2288 let inc_results = inc.score_corpus("git", &corpus, None);
2289
2290 let mut full = IncrementalScorer::new();
2292 let full_results = full.score_corpus("git", &corpus, None);
2293
2294 assert_eq!(
2296 inc_results.len(),
2297 full_results.len(),
2298 "Incremental and full scan should return same count"
2299 );
2300
2301 for (a, b) in inc_results.iter().zip(full_results.iter()) {
2302 assert_eq!(a.0, b.0, "Same corpus indices");
2303 assert!(
2304 (a.1.score - b.1.score).abs() < f64::EPSILON,
2305 "Same scores: {} vs {}",
2306 a.1.score,
2307 b.1.score
2308 );
2309 }
2310 }
2311
2312 #[test]
2313 fn incremental_falls_back_on_non_extension() {
2314 let mut scorer = IncrementalScorer::new();
2315 let corpus = test_corpus();
2316
2317 scorer.score_corpus("git", &corpus, None);
2318 assert_eq!(scorer.stats().full_scans, 1);
2319
2320 scorer.score_corpus("fi", &corpus, None);
2322 assert_eq!(scorer.stats().full_scans, 2);
2323 }
2324
2325 #[test]
2326 fn incremental_invalidate_forces_full_scan() {
2327 let mut scorer = IncrementalScorer::new();
2328 let corpus = test_corpus();
2329
2330 scorer.score_corpus("g", &corpus, None);
2331 scorer.invalidate();
2332
2333 scorer.score_corpus("gi", &corpus, None);
2335 assert_eq!(scorer.stats().full_scans, 2);
2336 assert_eq!(scorer.stats().incremental_scans, 0);
2337 }
2338
2339 #[test]
2340 fn incremental_generation_change_invalidates() {
2341 let mut scorer = IncrementalScorer::new();
2342 let corpus = test_corpus();
2343
2344 scorer.score_corpus("g", &corpus, Some(1));
2345
2346 scorer.score_corpus("gi", &corpus, Some(2));
2348 assert_eq!(scorer.stats().full_scans, 2);
2349 }
2350
2351 #[test]
2352 fn incremental_corpus_size_change_invalidates() {
2353 let mut scorer = IncrementalScorer::new();
2354 let corpus1 = test_corpus();
2355 let corpus2 = &corpus1[..5];
2356
2357 scorer.score_corpus("g", &corpus1, None);
2358 scorer.score_corpus("gi", corpus2, None);
2359 assert_eq!(scorer.stats().full_scans, 2);
2361 }
2362
2363 #[test]
2364 fn incremental_empty_query() {
2365 let mut scorer = IncrementalScorer::new();
2366 let corpus = test_corpus();
2367
2368 let results = scorer.score_corpus("", &corpus, None);
2369 assert_eq!(results.len(), corpus.len());
2371 }
2372
2373 #[test]
2374 fn incremental_no_match_query() {
2375 let mut scorer = IncrementalScorer::new();
2376 let corpus = test_corpus();
2377
2378 let results = scorer.score_corpus("zzz", &corpus, None);
2379 assert!(results.is_empty());
2380 }
2381
2382 #[test]
2383 fn incremental_stats_prune_ratio() {
2384 let mut scorer = IncrementalScorer::new();
2385 let corpus = test_corpus();
2386
2387 scorer.score_corpus("g", &corpus, None);
2388 scorer.score_corpus("gi", &corpus, None);
2389 scorer.score_corpus("git", &corpus, None);
2390
2391 let stats = scorer.stats();
2392 assert!(
2393 stats.prune_ratio() > 0.0,
2394 "Prune ratio should be > 0 after incremental scans"
2395 );
2396 assert!(stats.total_pruned > 0, "Should have pruned some items");
2397 }
2398
2399 #[test]
2400 fn incremental_results_sorted_descending() {
2401 let mut scorer = IncrementalScorer::new();
2402 let corpus = test_corpus();
2403
2404 let results = scorer.score_corpus("o", &corpus, None);
2405 for w in results.windows(2) {
2406 assert!(
2407 w[0].1.score >= w[1].1.score,
2408 "Results should be sorted descending: {} >= {}",
2409 w[0].1.score,
2410 w[1].1.score
2411 );
2412 }
2413 }
2414
2415 #[test]
2416 fn incremental_lowered_matches_full() {
2417 let corpus = vec![
2418 "Open File".to_string(),
2419 "Save File".to_string(),
2420 "Close".to_string(),
2421 "Launch 🚀".to_string(),
2422 ];
2423 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
2424 let lower: Vec<String> = corpus.iter().map(|s| s.to_lowercase()).collect();
2425 let word_starts: Vec<Vec<usize>> = lower
2426 .iter()
2427 .map(|title_lower| {
2428 let bytes = title_lower.as_bytes();
2429 title_lower
2430 .char_indices()
2431 .filter_map(|(i, _)| {
2432 let is_word_start = i == 0 || {
2433 let prev = bytes.get(i.saturating_sub(1)).copied().unwrap_or(b' ');
2434 prev == b' ' || prev == b'-' || prev == b'_'
2435 };
2436 is_word_start.then_some(i)
2437 })
2438 .collect()
2439 })
2440 .collect();
2441
2442 let mut full = IncrementalScorer::new();
2443 let mut lowered = IncrementalScorer::new();
2444
2445 let a = full.score_corpus("fi", &corpus_refs, None);
2446 let b =
2447 lowered.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
2448
2449 assert_eq!(a.len(), b.len());
2450 for ((ia, ra), (ib, rb)) in a.iter().zip(b.iter()) {
2451 assert_eq!(ia, ib);
2452 assert_eq!(ra.match_type, rb.match_type);
2453 assert_eq!(ra.match_positions, rb.match_positions);
2454 assert!(
2455 (ra.score - rb.score).abs() < 1e-12,
2456 "score mismatch: {} vs {}",
2457 ra.score,
2458 rb.score
2459 );
2460 }
2461 }
2462
2463 #[test]
2464 fn incremental_deterministic() {
2465 let corpus = test_corpus();
2466
2467 let mut s1 = IncrementalScorer::new();
2468 let r1 = s1.score_corpus("git", &corpus, None);
2469
2470 let mut s2 = IncrementalScorer::new();
2471 let r2 = s2.score_corpus("git", &corpus, None);
2472
2473 assert_eq!(r1.len(), r2.len());
2474 for (a, b) in r1.iter().zip(r2.iter()) {
2475 assert_eq!(a.0, b.0);
2476 assert!((a.1.score - b.1.score).abs() < f64::EPSILON);
2477 }
2478 }
2479}
2480
2481#[cfg(test)]
2486mod perf_tests {
2487 use super::*;
2488 use std::time::Instant;
2489
2490 #[derive(Debug, Clone, Copy)]
2491 struct PerfStats {
2492 p50_us: u64,
2493 p95_us: u64,
2494 p99_us: u64,
2495 mean_us: f64,
2496 variance_us: f64,
2497 samples: usize,
2498 }
2499
2500 const SINGLE_SCORE_BUDGET_US: u64 = 10; const CORPUS_100_BUDGET_US: u64 = 500; const CORPUS_1000_BUDGET_US: u64 = 5_000; const CORPUS_5000_BUDGET_US: u64 = 25_000; const INCREMENTAL_7KEY_100_BUDGET_US: u64 = 2_000; const INCREMENTAL_7KEY_1000_BUDGET_US: u64 = 15_000; fn make_corpus(size: usize) -> Vec<String> {
2511 let base = [
2512 "Open File",
2513 "Save File",
2514 "Close Tab",
2515 "Split Editor Right",
2516 "Split Editor Down",
2517 "Toggle Terminal",
2518 "Go to Line",
2519 "Find in Files",
2520 "Replace in Files",
2521 "Git: Commit",
2522 "Git: Push",
2523 "Git: Pull",
2524 "Debug: Start",
2525 "Debug: Stop",
2526 "Debug: Step Over",
2527 "Format Document",
2528 "Rename Symbol",
2529 "Go to Definition",
2530 "Find All References",
2531 "Toggle Sidebar",
2532 ];
2533 base.iter()
2534 .cycle()
2535 .take(size)
2536 .enumerate()
2537 .map(|(i, cmd)| {
2538 if i < base.len() {
2539 (*cmd).to_string()
2540 } else {
2541 format!("{} ({})", cmd, i)
2542 }
2543 })
2544 .collect()
2545 }
2546
2547 fn measure_stats_us(iterations: usize, mut f: impl FnMut()) -> PerfStats {
2548 let mut times = Vec::with_capacity(iterations);
2549 for _ in 0..3 {
2551 f();
2552 }
2553 for _ in 0..iterations {
2554 let start = Instant::now();
2555 f();
2556 times.push(start.elapsed().as_micros() as u64);
2557 }
2558 times.sort_unstable();
2559 let len = times.len();
2560 let p50 = times[len / 2];
2561 let p95 = times[((len as f64 * 0.95) as usize).min(len.saturating_sub(1))];
2562 let p99 = times[((len as f64 * 0.99) as usize).min(len.saturating_sub(1))];
2563 let mean = times.iter().copied().map(|value| value as f64).sum::<f64>() / len as f64;
2564 let variance = times
2565 .iter()
2566 .map(|value| {
2567 let delta = *value as f64 - mean;
2568 delta * delta
2569 })
2570 .sum::<f64>()
2571 / len as f64;
2572
2573 PerfStats {
2574 p50_us: p50,
2575 p95_us: p95,
2576 p99_us: p99,
2577 mean_us: mean,
2578 variance_us: variance,
2579 samples: len,
2580 }
2581 }
2582
2583 #[test]
2584 fn perf_single_score_under_budget() {
2585 let scorer = BayesianScorer::fast();
2586 let stats = measure_stats_us(200, || {
2587 std::hint::black_box(scorer.score("git co", "Git: Commit"));
2588 });
2589 eprintln!(
2590 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_score_single\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
2591 stats.samples,
2592 stats.p50_us,
2593 stats.p95_us,
2594 stats.p99_us,
2595 stats.mean_us,
2596 stats.variance_us
2597 );
2598 assert!(
2599 stats.p50_us <= SINGLE_SCORE_BUDGET_US,
2600 "Single score p50 = {}µs exceeds budget {}µs",
2601 stats.p50_us,
2602 SINGLE_SCORE_BUDGET_US,
2603 );
2604 }
2605
2606 #[test]
2607 fn perf_corpus_100_under_budget() {
2608 let scorer = BayesianScorer::fast();
2609 let corpus = make_corpus(100);
2610 let stats = measure_stats_us(50, || {
2611 let mut results: Vec<_> = corpus
2612 .iter()
2613 .map(|t| scorer.score("git co", t))
2614 .filter(|r| r.score > 0.0)
2615 .collect();
2616 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
2617 std::hint::black_box(&results);
2618 });
2619 eprintln!(
2620 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
2621 stats.samples,
2622 stats.p50_us,
2623 stats.p95_us,
2624 stats.p99_us,
2625 stats.mean_us,
2626 stats.variance_us
2627 );
2628 assert!(
2629 stats.p95_us <= CORPUS_100_BUDGET_US,
2630 "100-item corpus p95 = {}µs exceeds budget {}µs",
2631 stats.p95_us,
2632 CORPUS_100_BUDGET_US,
2633 );
2634 }
2635
2636 #[test]
2637 fn perf_corpus_1000_under_budget() {
2638 let scorer = BayesianScorer::fast();
2639 let corpus = make_corpus(1_000);
2640 let stats = measure_stats_us(20, || {
2641 let mut results: Vec<_> = corpus
2642 .iter()
2643 .map(|t| scorer.score("git co", t))
2644 .filter(|r| r.score > 0.0)
2645 .collect();
2646 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
2647 std::hint::black_box(&results);
2648 });
2649 eprintln!(
2650 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
2651 stats.samples,
2652 stats.p50_us,
2653 stats.p95_us,
2654 stats.p99_us,
2655 stats.mean_us,
2656 stats.variance_us
2657 );
2658 assert!(
2659 stats.p95_us <= CORPUS_1000_BUDGET_US,
2660 "1000-item corpus p95 = {}µs exceeds budget {}µs",
2661 stats.p95_us,
2662 CORPUS_1000_BUDGET_US,
2663 );
2664 }
2665
2666 #[test]
2667 fn perf_corpus_5000_under_budget() {
2668 let scorer = BayesianScorer::fast();
2669 let corpus = make_corpus(5_000);
2670 let stats = measure_stats_us(10, || {
2671 let mut results: Vec<_> = corpus
2672 .iter()
2673 .map(|t| scorer.score("git co", t))
2674 .filter(|r| r.score > 0.0)
2675 .collect();
2676 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
2677 std::hint::black_box(&results);
2678 });
2679 eprintln!(
2680 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_5000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
2681 stats.samples,
2682 stats.p50_us,
2683 stats.p95_us,
2684 stats.p99_us,
2685 stats.mean_us,
2686 stats.variance_us
2687 );
2688 assert!(
2689 stats.p95_us <= CORPUS_5000_BUDGET_US,
2690 "5000-item corpus p95 = {}µs exceeds budget {}µs",
2691 stats.p95_us,
2692 CORPUS_5000_BUDGET_US,
2693 );
2694 }
2695
2696 #[test]
2697 fn perf_incremental_7key_100_under_budget() {
2698 let corpus = make_corpus(100);
2699 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
2700 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
2701
2702 let stats = measure_stats_us(30, || {
2703 let mut inc = IncrementalScorer::new();
2704 for query in &queries {
2705 let results = inc.score_corpus(query, &corpus_refs, None);
2706 std::hint::black_box(&results);
2707 }
2708 });
2709 eprintln!(
2710 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
2711 stats.samples,
2712 stats.p50_us,
2713 stats.p95_us,
2714 stats.p99_us,
2715 stats.mean_us,
2716 stats.variance_us
2717 );
2718 assert!(
2719 stats.p95_us <= INCREMENTAL_7KEY_100_BUDGET_US,
2720 "Incremental 7-key 100-item p95 = {}µs exceeds budget {}µs",
2721 stats.p95_us,
2722 INCREMENTAL_7KEY_100_BUDGET_US,
2723 );
2724 }
2725
2726 #[test]
2727 fn perf_incremental_7key_1000_under_budget() {
2728 let corpus = make_corpus(1_000);
2729 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
2730 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
2731
2732 let stats = measure_stats_us(10, || {
2733 let mut inc = IncrementalScorer::new();
2734 for query in &queries {
2735 let results = inc.score_corpus(query, &corpus_refs, None);
2736 std::hint::black_box(&results);
2737 }
2738 });
2739 eprintln!(
2740 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
2741 stats.samples,
2742 stats.p50_us,
2743 stats.p95_us,
2744 stats.p99_us,
2745 stats.mean_us,
2746 stats.variance_us
2747 );
2748 assert!(
2749 stats.p95_us <= INCREMENTAL_7KEY_1000_BUDGET_US,
2750 "Incremental 7-key 1000-item p95 = {}µs exceeds budget {}µs",
2751 stats.p95_us,
2752 INCREMENTAL_7KEY_1000_BUDGET_US,
2753 );
2754 }
2755
2756 #[test]
2757 fn perf_incremental_faster_than_naive() {
2758 let corpus = make_corpus(100);
2759 let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
2760 let scorer = BayesianScorer::fast();
2761 let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
2762
2763 let naive_stats = measure_stats_us(30, || {
2764 for query in &queries {
2765 let mut results: Vec<_> = corpus
2766 .iter()
2767 .map(|t| scorer.score(query, t))
2768 .filter(|r| r.score > 0.0)
2769 .collect();
2770 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
2771 std::hint::black_box(&results);
2772 }
2773 });
2774
2775 let inc_stats = measure_stats_us(30, || {
2776 let mut inc = IncrementalScorer::new();
2777 for query in &queries {
2778 let results = inc.score_corpus(query, &corpus_refs, None);
2779 std::hint::black_box(&results);
2780 }
2781 });
2782 eprintln!(
2783 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_vs_naive\",\"samples\":{},\"naive_p50_us\":{},\"naive_p95_us\":{},\"inc_p50_us\":{},\"inc_p95_us\":{}}}",
2784 naive_stats.samples,
2785 naive_stats.p50_us,
2786 naive_stats.p95_us,
2787 inc_stats.p50_us,
2788 inc_stats.p95_us
2789 );
2790
2791 assert!(
2794 inc_stats.p50_us <= naive_stats.p50_us * 2 + 50, "Incremental p50 = {}µs is >2x slower than naive p50 = {}µs",
2796 inc_stats.p50_us,
2797 naive_stats.p50_us,
2798 );
2799 }
2800
2801 #[test]
2802 fn perf_scaling_sublinear() {
2803 let scorer = BayesianScorer::fast();
2804 let corpus_100 = make_corpus(100);
2805 let corpus_1000 = make_corpus(1_000);
2806
2807 let stats_100 = measure_stats_us(30, || {
2808 let mut results: Vec<_> = corpus_100
2809 .iter()
2810 .map(|t| scorer.score("git", t))
2811 .filter(|r| r.score > 0.0)
2812 .collect();
2813 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
2814 std::hint::black_box(&results);
2815 });
2816
2817 let stats_1000 = measure_stats_us(20, || {
2818 let mut results: Vec<_> = corpus_1000
2819 .iter()
2820 .map(|t| scorer.score("git", t))
2821 .filter(|r| r.score > 0.0)
2822 .collect();
2823 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
2824 std::hint::black_box(&results);
2825 });
2826 eprintln!(
2827 "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_scaling\",\"samples_100\":{},\"p50_100_us\":{},\"samples_1000\":{},\"p50_1000_us\":{}}}",
2828 stats_100.samples, stats_100.p50_us, stats_1000.samples, stats_1000.p50_us
2829 );
2830
2831 let ratio = if stats_100.p50_us > 0 {
2833 stats_1000.p50_us as f64 / stats_100.p50_us as f64
2834 } else {
2835 0.0
2836 };
2837 assert!(
2838 ratio < 15.0,
2839 "1000/100 scaling ratio = {:.1}x exceeds 15x threshold (100: {}µs, 1000: {}µs)",
2840 ratio,
2841 stats_100.p50_us,
2842 stats_1000.p50_us,
2843 );
2844 }
2845}