Skip to main content

ftui_widgets/command_palette/
scorer.rs

1#![forbid(unsafe_code)]
2
3//! Bayesian Match Scoring for Command Palette.
4//!
5//! This module implements a probabilistic scoring model using Bayes factors
6//! to compute match relevance. An evidence ledger tracks each scoring factor
7//! and its contribution, enabling explainable ranking decisions.
8//!
9//! # Mathematical Model
10//!
11//! We compute the posterior odds ratio:
12//!
13//! ```text
14//! P(relevant | evidence) / P(not_relevant | evidence)
15//!     = [P(relevant) / P(not_relevant)] × Π_i BF_i
16//!
17//! where BF_i = P(evidence_i | relevant) / P(evidence_i | not_relevant)
18//!            = likelihood ratio for evidence type i
19//! ```
20//!
21//! The prior odds depend on match type:
22//! - Exact match: 99:1 (very likely relevant)
23//! - Prefix match: 9:1 (likely relevant)
24//! - Word-start match: 4:1 (probably relevant)
25//! - Substring match: 2:1 (possibly relevant)
26//! - Fuzzy match: 1:3 (unlikely without other evidence)
27//!
28//! The final score is the posterior probability P(relevant | evidence).
29//!
30//! # Evidence Ledger
31//!
32//! Each match includes an evidence ledger that explains the score:
33//! - Match type contribution (prior odds)
34//! - Word boundary bonus (Bayes factor ~2.0)
35//! - Position bonus (earlier = higher factor)
36//! - Gap penalty (gaps reduce factor)
37//! - Tag match bonus (strong positive evidence)
38//!
39//! # Invariants
40//!
41//! 1. Scores are bounded: 0.0 ≤ score ≤ 1.0 (probability)
42//! 2. Determinism: same input → identical score
43//! 3. Monotonicity: longer exact prefixes score ≥ shorter
44//! 4. Transitivity: score ordering is consistent
45
46use std::fmt;
47
48// ---------------------------------------------------------------------------
49// Match Types
50// ---------------------------------------------------------------------------
51
52/// Type of match between query and title.
53#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
54pub enum MatchType {
55    /// No match found.
56    NoMatch,
57    /// Characters found in order but with gaps.
58    Fuzzy,
59    /// Query found as contiguous substring.
60    Substring,
61    /// Query matches start of word boundaries.
62    WordStart,
63    /// Title starts with query.
64    Prefix,
65    /// Query equals title exactly.
66    Exact,
67}
68
69impl MatchType {
70    /// Prior odds ratio P(relevant) / P(not_relevant) for this match type.
71    ///
72    /// These are derived from empirical observations of user intent:
73    /// - Exact matches are almost always what the user wants
74    /// - Prefix matches are very likely relevant
75    /// - Fuzzy matches need additional evidence
76    pub fn prior_odds(self) -> f64 {
77        match self {
78            Self::Exact => 99.0,    // 99:1 odds → P ≈ 0.99
79            Self::Prefix => 9.0,    // 9:1 odds → P ≈ 0.90
80            Self::WordStart => 4.0, // 4:1 odds → P ≈ 0.80
81            Self::Substring => 2.0, // 2:1 odds → P ≈ 0.67
82            Self::Fuzzy => 0.333,   // 1:3 odds → P ≈ 0.25
83            Self::NoMatch => 0.0,   // Impossible
84        }
85    }
86
87    /// Human-readable description for the evidence ledger.
88    pub fn description(self) -> &'static str {
89        match self {
90            Self::Exact => "exact match",
91            Self::Prefix => "prefix match",
92            Self::WordStart => "word-start match",
93            Self::Substring => "contiguous substring",
94            Self::Fuzzy => "fuzzy match",
95            Self::NoMatch => "no match",
96        }
97    }
98}
99
100// ---------------------------------------------------------------------------
101// Evidence Entry
102// ---------------------------------------------------------------------------
103
104/// A single piece of evidence contributing to the match score.
105#[derive(Debug, Clone)]
106pub struct EvidenceEntry {
107    /// Type of evidence.
108    pub kind: EvidenceKind,
109    /// Bayes factor: likelihood ratio P(evidence | relevant) / P(evidence | ¬relevant).
110    /// Values > 1.0 support relevance, < 1.0 oppose it.
111    pub bayes_factor: f64,
112    /// Human-readable explanation.
113    pub description: EvidenceDescription,
114}
115
116/// Types of evidence that contribute to match scoring.
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub enum EvidenceKind {
119    /// Base match type (prior).
120    MatchType,
121    /// Match at word boundary.
122    WordBoundary,
123    /// Match position (earlier is better).
124    Position,
125    /// Gap between matched characters.
126    GapPenalty,
127    /// Query also matches a tag.
128    TagMatch,
129    /// Title length factor (shorter is more specific).
130    TitleLength,
131}
132
133impl fmt::Display for EvidenceEntry {
134    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
135        let direction = if self.bayes_factor > 1.0 {
136            "supports"
137        } else if self.bayes_factor < 1.0 {
138            "opposes"
139        } else {
140            "neutral"
141        };
142        write!(
143            f,
144            "{:?}: BF={:.2} ({}) - {}",
145            self.kind, self.bayes_factor, direction, self.description
146        )
147    }
148}
149
150/// Human-readable evidence description (lazy formatting).
151#[derive(Debug, Clone)]
152pub enum EvidenceDescription {
153    Static(&'static str),
154    TitleLengthChars { len: usize },
155    FirstMatchPos { pos: usize },
156    WordBoundaryCount { count: usize },
157    GapTotal { total: usize },
158    CoveragePercent { percent: f64 },
159}
160
161impl fmt::Display for EvidenceDescription {
162    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
163        match self {
164            Self::Static(msg) => write!(f, "{msg}"),
165            Self::TitleLengthChars { len } => write!(f, "title length {} chars", len),
166            Self::FirstMatchPos { pos } => write!(f, "first match at position {}", pos),
167            Self::WordBoundaryCount { count } => write!(f, "{} word boundary matches", count),
168            Self::GapTotal { total } => write!(f, "total gap of {} characters", total),
169            Self::CoveragePercent { percent } => write!(f, "query covers {:.0}% of title", percent),
170        }
171    }
172}
173
174// ---------------------------------------------------------------------------
175// Evidence Ledger
176// ---------------------------------------------------------------------------
177
178/// A ledger of evidence explaining a match score.
179///
180/// This provides full transparency into why a match received its score,
181/// enabling debugging and user explanations.
182#[derive(Debug, Clone, Default)]
183pub struct EvidenceLedger {
184    entries: Vec<EvidenceEntry>,
185}
186
187impl EvidenceLedger {
188    /// Create a new empty ledger.
189    pub fn new() -> Self {
190        Self::default()
191    }
192
193    /// Add an evidence entry.
194    pub fn add(&mut self, kind: EvidenceKind, bayes_factor: f64, description: EvidenceDescription) {
195        self.entries.push(EvidenceEntry {
196            kind,
197            bayes_factor,
198            description,
199        });
200    }
201
202    /// Get all entries.
203    pub fn entries(&self) -> &[EvidenceEntry] {
204        &self.entries
205    }
206
207    /// Compute the combined Bayes factor (product of all factors).
208    pub fn combined_bayes_factor(&self) -> f64 {
209        self.entries.iter().map(|e| e.bayes_factor).product()
210    }
211
212    /// Get the prior odds (from MatchType entry, if present).
213    #[must_use = "use the prior odds (if any) for diagnostics"]
214    pub fn prior_odds(&self) -> Option<f64> {
215        self.entries
216            .iter()
217            .find(|e| e.kind == EvidenceKind::MatchType)
218            .map(|e| e.bayes_factor)
219    }
220
221    /// Compute posterior probability from prior odds and evidence.
222    ///
223    /// posterior_prob = posterior_odds / (1 + posterior_odds)
224    /// where posterior_odds = prior_odds × combined_bf
225    pub fn posterior_probability(&self) -> f64 {
226        let prior = self.prior_odds().unwrap_or(1.0);
227        // Exclude prior from BF since it's already the odds
228        let bf: f64 = self
229            .entries
230            .iter()
231            .filter(|e| e.kind != EvidenceKind::MatchType)
232            .map(|e| e.bayes_factor)
233            .product();
234
235        let posterior_odds = prior * bf;
236        if posterior_odds.is_infinite() {
237            1.0
238        } else {
239            posterior_odds / (1.0 + posterior_odds)
240        }
241    }
242}
243
244impl EvidenceLedger {
245    /// Format this ledger as a JSONL line for structured logging.
246    #[must_use]
247    pub fn to_jsonl(&self) -> String {
248        let entries_json: Vec<String> = self
249            .entries
250            .iter()
251            .map(|e| {
252                format!(
253                    r#"{{"kind":"{:?}","bf":{:.4},"desc":"{}"}}"#,
254                    e.kind, e.bayes_factor, e.description
255                )
256            })
257            .collect();
258        format!(
259            r#"{{"schema":"palette-scoring-v1","entries":[{}],"combined_bf":{:.6},"posterior_prob":{:.6}}}"#,
260            entries_json.join(","),
261            self.combined_bayes_factor(),
262            self.posterior_probability(),
263        )
264    }
265}
266
267impl fmt::Display for EvidenceLedger {
268    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
269        writeln!(f, "Evidence Ledger:")?;
270        for entry in &self.entries {
271            writeln!(f, "  {}", entry)?;
272        }
273        writeln!(f, "  Combined BF: {:.3}", self.combined_bayes_factor())?;
274        writeln!(f, "  Posterior P: {:.3}", self.posterior_probability())?;
275        Ok(())
276    }
277}
278
279// ---------------------------------------------------------------------------
280// Match Result
281// ---------------------------------------------------------------------------
282
283/// Result of scoring a query against a title.
284#[derive(Debug, Clone)]
285pub struct MatchResult {
286    /// Computed relevance score (posterior probability).
287    pub score: f64,
288    /// Type of match detected.
289    pub match_type: MatchType,
290    /// Positions of matched characters in the title.
291    pub match_positions: Vec<usize>,
292    /// Evidence ledger explaining the score.
293    pub evidence: EvidenceLedger,
294}
295
296impl MatchResult {
297    /// Create a no-match result.
298    pub fn no_match() -> Self {
299        let mut evidence = EvidenceLedger::new();
300        evidence.add(
301            EvidenceKind::MatchType,
302            0.0,
303            EvidenceDescription::Static("no matching characters found"),
304        );
305        Self {
306            score: 0.0,
307            match_type: MatchType::NoMatch,
308            match_positions: Vec::new(),
309            evidence,
310        }
311    }
312}
313
314// ---------------------------------------------------------------------------
315// Scorer
316// ---------------------------------------------------------------------------
317
318/// Bayesian fuzzy matcher for command palette.
319///
320/// Computes relevance scores using a probabilistic model with
321/// evidence tracking for explainable ranking.
322#[derive(Debug, Clone, Default)]
323pub struct BayesianScorer {
324    /// Whether to track detailed evidence (slower but explainable).
325    pub track_evidence: bool,
326}
327
328impl BayesianScorer {
329    /// Create a new scorer with evidence tracking enabled.
330    pub fn new() -> Self {
331        Self {
332            track_evidence: true,
333        }
334    }
335
336    /// Create a scorer without evidence tracking (faster).
337    pub fn fast() -> Self {
338        Self {
339            track_evidence: false,
340        }
341    }
342
343    /// Score a query against a title.
344    ///
345    /// Returns a MatchResult with score, match type, positions, and evidence.
346    pub fn score(&self, query: &str, title: &str) -> MatchResult {
347        // Quick rejection: query longer than title
348        if query.len() > title.len() {
349            return MatchResult::no_match();
350        }
351
352        // Empty query matches everything (show all)
353        if query.is_empty() {
354            return self.score_empty_query(title);
355        }
356
357        // Normalize for case-insensitive matching
358        let query_lower = query.to_lowercase();
359        let title_lower = title.to_lowercase();
360
361        // Determine match type
362        let (match_type, positions) = self.detect_match_type(&query_lower, &title_lower, title);
363
364        if match_type == MatchType::NoMatch {
365            return MatchResult::no_match();
366        }
367
368        // Build evidence ledger and compute score
369        self.compute_score(match_type, positions, &query_lower, title)
370    }
371
372    /// Score a query using a pre-lowercased query string.
373    ///
374    /// This avoids repeated query normalization when scoring against many titles.
375    pub fn score_with_query_lower(
376        &self,
377        query: &str,
378        query_lower: &str,
379        title: &str,
380    ) -> MatchResult {
381        let title_lower = title.to_lowercase();
382        self.score_with_lowered_title(query, query_lower, title, &title_lower)
383    }
384
385    /// Score a query with both query and title already lowercased.
386    ///
387    /// This avoids per-title lowercasing in hot loops.
388    pub fn score_with_lowered_title(
389        &self,
390        query: &str,
391        query_lower: &str,
392        title: &str,
393        title_lower: &str,
394    ) -> MatchResult {
395        self.score_with_lowered_title_and_words(query, query_lower, title, title_lower, None)
396    }
397
398    /// Score a query with pre-lowercased title and optional word-start cache.
399    pub fn score_with_lowered_title_and_words(
400        &self,
401        query: &str,
402        query_lower: &str,
403        title: &str,
404        title_lower: &str,
405        word_starts: Option<&[usize]>,
406    ) -> MatchResult {
407        // Quick rejection: query longer than title
408        if query.len() > title.len() {
409            return MatchResult::no_match();
410        }
411
412        // Empty query matches everything (show all)
413        if query.is_empty() {
414            return self.score_empty_query(title);
415        }
416
417        // Determine match type
418        let (match_type, positions) =
419            self.detect_match_type_with_words(query_lower, title_lower, title, word_starts);
420
421        if match_type == MatchType::NoMatch {
422            return MatchResult::no_match();
423        }
424
425        // Build evidence ledger and compute score
426        self.compute_score(match_type, positions, query_lower, title)
427    }
428
429    /// Score a query against a title with tags.
430    pub fn score_with_tags(&self, query: &str, title: &str, tags: &[&str]) -> MatchResult {
431        let mut result = self.score(query, title);
432
433        // Check if query matches any tag
434        let query_lower = query.to_lowercase();
435        let tag_match = tags
436            .iter()
437            .any(|tag| tag.to_lowercase().contains(&query_lower));
438
439        if tag_match && result.match_type != MatchType::NoMatch {
440            // Strong positive evidence
441            if self.track_evidence {
442                result.evidence.add(
443                    EvidenceKind::TagMatch,
444                    3.0, // 3:1 in favor
445                    EvidenceDescription::Static("query matches tag"),
446                );
447                result.score = result.evidence.posterior_probability();
448            } else if (0.0..1.0).contains(&result.score) {
449                let odds = result.score / (1.0 - result.score);
450                let boosted = odds * 3.0;
451                result.score = boosted / (1.0 + boosted);
452            }
453        }
454
455        result
456    }
457
458    /// Score when query is empty (returns all items with neutral score).
459    fn score_empty_query(&self, title: &str) -> MatchResult {
460        // Shorter titles are more specific, slight preference
461        let length_factor = 1.0 + (1.0 / (title.len() as f64 + 1.0)) * 0.1;
462        if self.track_evidence {
463            let mut evidence = EvidenceLedger::new();
464            evidence.add(
465                EvidenceKind::MatchType,
466                1.0, // Neutral prior
467                EvidenceDescription::Static("empty query matches all"),
468            );
469            evidence.add(
470                EvidenceKind::TitleLength,
471                length_factor,
472                EvidenceDescription::TitleLengthChars { len: title.len() },
473            );
474            let score = evidence.posterior_probability();
475            MatchResult {
476                score,
477                match_type: MatchType::Fuzzy, // Treat as weak match
478                match_positions: Vec::new(),
479                evidence,
480            }
481        } else {
482            let odds = length_factor;
483            let score = odds / (1.0 + odds);
484            MatchResult {
485                score,
486                match_type: MatchType::Fuzzy,
487                match_positions: Vec::new(),
488                evidence: EvidenceLedger::new(),
489            }
490        }
491    }
492
493    /// Detect the type of match and positions of matched characters.
494    fn detect_match_type(
495        &self,
496        query_lower: &str,
497        title_lower: &str,
498        title: &str,
499    ) -> (MatchType, Vec<usize>) {
500        self.detect_match_type_with_words(query_lower, title_lower, title, None)
501    }
502
503    /// Detect match type with optional precomputed word-start positions.
504    fn detect_match_type_with_words(
505        &self,
506        query_lower: &str,
507        title_lower: &str,
508        title: &str,
509        word_starts: Option<&[usize]>,
510    ) -> (MatchType, Vec<usize>) {
511        if query_lower.is_ascii() && title_lower.is_ascii() {
512            return self.detect_match_type_ascii(query_lower, title_lower, word_starts);
513        }
514
515        // Check exact match
516        if query_lower == title_lower {
517            let positions: Vec<usize> = (0..title.len()).collect();
518            return (MatchType::Exact, positions);
519        }
520
521        // Check prefix match
522        if title_lower.starts_with(query_lower) {
523            let positions: Vec<usize> = (0..query_lower.len()).collect();
524            return (MatchType::Prefix, positions);
525        }
526
527        // Check word-start match (e.g., "gd" matches "Go Dashboard")
528        if let Some(positions) = self.word_start_match(query_lower, title_lower) {
529            return (MatchType::WordStart, positions);
530        }
531
532        // Check contiguous substring
533        if let Some(start) = title_lower.find(query_lower) {
534            let positions: Vec<usize> = (start..start + query_lower.len()).collect();
535            return (MatchType::Substring, positions);
536        }
537
538        // Check fuzzy match
539        if let Some(positions) = self.fuzzy_match(query_lower, title_lower) {
540            return (MatchType::Fuzzy, positions);
541        }
542
543        (MatchType::NoMatch, Vec::new())
544    }
545
546    /// ASCII fast-path match detection.
547    fn detect_match_type_ascii(
548        &self,
549        query_lower: &str,
550        title_lower: &str,
551        word_starts: Option<&[usize]>,
552    ) -> (MatchType, Vec<usize>) {
553        let query_bytes = query_lower.as_bytes();
554        let title_bytes = title_lower.as_bytes();
555
556        if query_bytes == title_bytes {
557            let positions: Vec<usize> = (0..title_bytes.len()).collect();
558            return (MatchType::Exact, positions);
559        }
560
561        if title_bytes.starts_with(query_bytes) {
562            let positions: Vec<usize> = (0..query_bytes.len()).collect();
563            return (MatchType::Prefix, positions);
564        }
565
566        if let Some(positions) = self.word_start_match_ascii(query_bytes, title_bytes, word_starts)
567        {
568            return (MatchType::WordStart, positions);
569        }
570
571        if let Some(start) = title_lower.find(query_lower) {
572            let positions: Vec<usize> = (start..start + query_bytes.len()).collect();
573            return (MatchType::Substring, positions);
574        }
575
576        if let Some(positions) = self.fuzzy_match_ascii(query_bytes, title_bytes) {
577            return (MatchType::Fuzzy, positions);
578        }
579
580        (MatchType::NoMatch, Vec::new())
581    }
582
583    /// Check if query matches word starts (e.g., "gd" → "Go Dashboard").
584    fn word_start_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
585        let mut positions = Vec::new();
586        let mut query_chars = query.chars().peekable();
587
588        let title_bytes = title.as_bytes();
589        for (i, c) in title.char_indices() {
590            // Is this a word start?
591            let is_word_start = i == 0 || {
592                let prev = title_bytes
593                    .get(i.saturating_sub(1))
594                    .copied()
595                    .unwrap_or(b' ');
596                prev == b' ' || prev == b'-' || prev == b'_'
597            };
598
599            if is_word_start
600                && let Some(&qc) = query_chars.peek()
601                && c == qc
602            {
603                positions.push(i);
604                query_chars.next();
605            }
606        }
607
608        if query_chars.peek().is_none() {
609            Some(positions)
610        } else {
611            None
612        }
613    }
614
615    /// ASCII word-start match with optional precomputed word-start positions.
616    fn word_start_match_ascii(
617        &self,
618        query: &[u8],
619        title: &[u8],
620        word_starts: Option<&[usize]>,
621    ) -> Option<Vec<usize>> {
622        let mut positions = Vec::new();
623        let mut query_idx = 0;
624        if query.is_empty() {
625            return Some(positions);
626        }
627
628        if let Some(starts) = word_starts {
629            for &pos in starts {
630                if pos >= title.len() {
631                    continue;
632                }
633                if title[pos] == query[query_idx] {
634                    positions.push(pos);
635                    query_idx += 1;
636                    if query_idx == query.len() {
637                        return Some(positions);
638                    }
639                }
640            }
641        } else {
642            for (i, &b) in title.iter().enumerate() {
643                let is_word_start = i == 0 || matches!(title[i - 1], b' ' | b'-' | b'_');
644                if is_word_start && b == query[query_idx] {
645                    positions.push(i);
646                    query_idx += 1;
647                    if query_idx == query.len() {
648                        return Some(positions);
649                    }
650                }
651            }
652        }
653
654        None
655    }
656
657    /// Check fuzzy match (characters in order).
658    fn fuzzy_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
659        let mut positions = Vec::new();
660        let mut query_chars = query.chars().peekable();
661
662        for (i, c) in title.char_indices() {
663            if let Some(&qc) = query_chars.peek()
664                && c == qc
665            {
666                positions.push(i);
667                query_chars.next();
668            }
669        }
670
671        if query_chars.peek().is_none() {
672            Some(positions)
673        } else {
674            None
675        }
676    }
677
678    /// ASCII fuzzy match (characters in order).
679    fn fuzzy_match_ascii(&self, query: &[u8], title: &[u8]) -> Option<Vec<usize>> {
680        let mut positions = Vec::new();
681        let mut query_idx = 0;
682        if query.is_empty() {
683            return Some(positions);
684        }
685
686        for (i, &b) in title.iter().enumerate() {
687            if b == query[query_idx] {
688                positions.push(i);
689                query_idx += 1;
690                if query_idx == query.len() {
691                    return Some(positions);
692                }
693            }
694        }
695
696        None
697    }
698
699    /// Compute score from match type and positions.
700    fn compute_score(
701        &self,
702        match_type: MatchType,
703        positions: Vec<usize>,
704        query: &str,
705        title: &str,
706    ) -> MatchResult {
707        let positions_ref = positions.as_slice();
708        if !self.track_evidence {
709            let mut combined_bf = match_type.prior_odds();
710
711            if let Some(&first_pos) = positions_ref.first() {
712                let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
713                combined_bf *= position_factor;
714            }
715
716            let word_boundary_count = self.count_word_boundaries(positions_ref, title);
717            if word_boundary_count > 0 {
718                let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
719                combined_bf *= boundary_factor;
720            }
721
722            if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
723                let total_gap = self.total_gap(positions_ref);
724                let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
725                combined_bf *= gap_factor;
726            }
727
728            let title_len = title.len().max(1);
729            let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
730            combined_bf *= length_factor;
731
732            let score = combined_bf / (1.0 + combined_bf);
733            return MatchResult {
734                score,
735                match_type,
736                match_positions: positions,
737                evidence: EvidenceLedger::new(),
738            };
739        }
740
741        let mut evidence = EvidenceLedger::new();
742
743        // Prior odds from match type
744        let prior_odds = match_type.prior_odds();
745        evidence.add(
746            EvidenceKind::MatchType,
747            prior_odds,
748            EvidenceDescription::Static(match_type.description()),
749        );
750
751        // Position bonus: matches at start are better
752        if let Some(&first_pos) = positions_ref.first() {
753            let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
754            evidence.add(
755                EvidenceKind::Position,
756                position_factor,
757                EvidenceDescription::FirstMatchPos { pos: first_pos },
758            );
759        }
760
761        // Word boundary bonus
762        let word_boundary_count = self.count_word_boundaries(positions_ref, title);
763        if word_boundary_count > 0 {
764            let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
765            evidence.add(
766                EvidenceKind::WordBoundary,
767                boundary_factor,
768                EvidenceDescription::WordBoundaryCount {
769                    count: word_boundary_count,
770                },
771            );
772        }
773
774        // Gap penalty for fuzzy matches
775        if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
776            let total_gap = self.total_gap(positions_ref);
777            let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
778            evidence.add(
779                EvidenceKind::GapPenalty,
780                gap_factor,
781                EvidenceDescription::GapTotal { total: total_gap },
782            );
783        }
784
785        // Title length: prefer shorter (more specific) titles
786        let title_len = title.len().max(1);
787        let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
788        evidence.add(
789            EvidenceKind::TitleLength,
790            length_factor,
791            EvidenceDescription::CoveragePercent {
792                percent: (query.len() as f64 / title_len as f64) * 100.0,
793            },
794        );
795
796        let score = evidence.posterior_probability();
797
798        MatchResult {
799            score,
800            match_type,
801            match_positions: positions,
802            evidence,
803        }
804    }
805
806    /// Count how many matched positions are at word boundaries.
807    fn count_word_boundaries(&self, positions: &[usize], title: &str) -> usize {
808        let title_bytes = title.as_bytes();
809        positions
810            .iter()
811            .filter(|&&pos| {
812                pos == 0 || {
813                    let prev = title_bytes
814                        .get(pos.saturating_sub(1))
815                        .copied()
816                        .unwrap_or(b' ');
817                    prev == b' ' || prev == b'-' || prev == b'_'
818                }
819            })
820            .count()
821    }
822
823    /// Calculate total gap between matched positions.
824    fn total_gap(&self, positions: &[usize]) -> usize {
825        if positions.len() < 2 {
826            return 0;
827        }
828        positions
829            .windows(2)
830            .map(|w| w[1].saturating_sub(w[0]).saturating_sub(1))
831            .sum()
832    }
833}
834
835// ---------------------------------------------------------------------------
836// Conformal Rank Confidence
837// ---------------------------------------------------------------------------
838
839/// Confidence level for a ranking position.
840///
841/// Derived from distribution-free conformal prediction: we compute
842/// nonconformity scores (score gaps) and calibrate them against the
843/// empirical distribution of all gaps in the result set.
844///
845/// # Invariants
846///
847/// 1. `confidence` is in `[0.0, 1.0]`.
848/// 2. A gap of zero always yields `Unstable` stability.
849/// 3. Deterministic: same scores → same confidence.
850#[derive(Debug, Clone)]
851pub struct RankConfidence {
852    /// Probability that this item truly belongs at this rank position.
853    /// Computed as the fraction of score gaps that are smaller than
854    /// the gap between this item and the next.
855    pub confidence: f64,
856
857    /// Absolute score gap to the next-ranked item (0.0 if last or tied).
858    pub gap_to_next: f64,
859
860    /// Stability classification derived from gap analysis.
861    pub stability: RankStability,
862}
863
864/// Stability classification for a rank position.
865#[derive(Debug, Clone, Copy, PartialEq, Eq)]
866pub enum RankStability {
867    /// Score gap is large relative to the distribution — rank is reliable.
868    Stable,
869    /// Score gap is moderate — rank is plausible but could swap with neighbors.
870    Marginal,
871    /// Score gap is negligible — rank is essentially a tie.
872    Unstable,
873}
874
875impl RankStability {
876    /// Human-readable label.
877    pub fn label(self) -> &'static str {
878        match self {
879            Self::Stable => "stable",
880            Self::Marginal => "marginal",
881            Self::Unstable => "unstable",
882        }
883    }
884}
885
886/// Result of ranking a set of match results with conformal confidence.
887#[derive(Debug, Clone)]
888pub struct RankedResults {
889    /// Items sorted by descending score, each with rank confidence.
890    pub items: Vec<RankedItem>,
891
892    /// Summary statistics about the ranking.
893    pub summary: RankingSummary,
894}
895
896/// A single item in the ranked results.
897#[derive(Debug, Clone)]
898pub struct RankedItem {
899    /// Index into the original (pre-sort) input slice.
900    pub original_index: usize,
901
902    /// The match result.
903    pub result: MatchResult,
904
905    /// Conformal confidence for this rank position.
906    pub rank_confidence: RankConfidence,
907}
908
909/// Summary statistics for a ranked result set.
910#[derive(Debug, Clone)]
911pub struct RankingSummary {
912    /// Number of items in the ranking.
913    pub count: usize,
914
915    /// Number of items with stable rank positions.
916    pub stable_count: usize,
917
918    /// Number of tie groups (sets of items with indistinguishable scores).
919    pub tie_group_count: usize,
920
921    /// Median score gap between adjacent ranked items.
922    pub median_gap: f64,
923}
924
925/// Conformal ranker that assigns distribution-free confidence to rank positions.
926///
927/// # Method
928///
929/// Given sorted scores `s_1 ≥ s_2 ≥ ... ≥ s_n`, we define the nonconformity
930/// score for position `i` as the gap `g_i = s_i - s_{i+1}`.
931///
932/// The conformal p-value for position `i` is:
933///
934/// ```text
935/// p_i = |{j : g_j ≤ g_i}| / (n - 1)
936/// ```
937///
938/// This gives the fraction of gaps that are at most as large as this item's
939/// gap, which we interpret as confidence that the item is correctly ranked
940/// above its successor.
941///
942/// # Tie Detection
943///
944/// Two scores are considered tied when their gap is below `tie_epsilon`.
945/// The default epsilon is `1e-9`, suitable for f64 posterior probabilities.
946///
947/// # Failure Modes
948///
949/// - **All scores identical**: Every position is `Unstable` with confidence 0.
950/// - **Single item**: Confidence is 1.0 (trivially correct ranking).
951/// - **Empty input**: Returns empty results with zeroed summary.
952#[derive(Debug, Clone)]
953pub struct ConformalRanker {
954    /// Threshold below which two scores are considered tied.
955    pub tie_epsilon: f64,
956
957    /// Confidence threshold for `Stable` classification.
958    pub stable_threshold: f64,
959
960    /// Confidence threshold for `Marginal` (below this is `Unstable`).
961    pub marginal_threshold: f64,
962}
963
964impl Default for ConformalRanker {
965    fn default() -> Self {
966        Self {
967            tie_epsilon: 1e-9,
968            stable_threshold: 0.7,
969            marginal_threshold: 0.3,
970        }
971    }
972}
973
974impl ConformalRanker {
975    /// Create a ranker with default thresholds.
976    pub fn new() -> Self {
977        Self::default()
978    }
979
980    /// Rank a set of match results and assign conformal confidence.
981    ///
982    /// Results are sorted by descending score. Ties are broken by
983    /// `MatchType` (higher variant first), then by shorter title length.
984    pub fn rank(&self, results: Vec<MatchResult>) -> RankedResults {
985        let count = results.len();
986
987        if count == 0 {
988            return RankedResults {
989                items: Vec::new(),
990                summary: RankingSummary {
991                    count: 0,
992                    stable_count: 0,
993                    tie_group_count: 0,
994                    median_gap: 0.0,
995                },
996            };
997        }
998
999        // Tag each result with its original index, then sort descending by score.
1000        // Tie-break: higher MatchType variant first, then shorter match_positions
1001        // (proxy for title length).
1002        let mut indexed: Vec<(usize, MatchResult)> = results.into_iter().enumerate().collect();
1003        indexed.sort_by(|a, b| {
1004            b.1.score
1005                .partial_cmp(&a.1.score)
1006                .unwrap_or(std::cmp::Ordering::Equal)
1007                .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1008        });
1009
1010        // Compute gaps between adjacent scores.
1011        let gaps: Vec<f64> = if count > 1 {
1012            indexed
1013                .windows(2)
1014                .map(|w| (w[0].1.score - w[1].1.score).max(0.0))
1015                .collect()
1016        } else {
1017            Vec::new()
1018        };
1019
1020        // Sort gaps for computing conformal p-values (fraction of gaps ≤ g_i).
1021        let mut sorted_gaps = gaps.clone();
1022        sorted_gaps.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1023
1024        // Compute rank confidence for each position.
1025        let mut items: Vec<RankedItem> = Vec::with_capacity(count);
1026        let mut stable_count = 0;
1027        let mut tie_group_count = 0;
1028        let mut in_tie_group = false;
1029
1030        for (rank, (orig_idx, result)) in indexed.into_iter().enumerate() {
1031            let gap_to_next = gaps.get(rank).copied().unwrap_or(0.0);
1032
1033            // Conformal p-value: fraction of gaps that are ≤ this gap.
1034            let confidence = if sorted_gaps.is_empty() {
1035                // Single item: trivially ranked correctly.
1036                1.0
1037            } else {
1038                let leq_count =
1039                    sorted_gaps.partition_point(|&g| g <= gap_to_next + self.tie_epsilon * 0.5);
1040                leq_count as f64 / sorted_gaps.len() as f64
1041            };
1042
1043            let is_tie = gap_to_next < self.tie_epsilon;
1044            let stability = if is_tie {
1045                if !in_tie_group {
1046                    tie_group_count += 1;
1047                    in_tie_group = true;
1048                }
1049                RankStability::Unstable
1050            } else {
1051                in_tie_group = false;
1052                if confidence >= self.stable_threshold {
1053                    stable_count += 1;
1054                    RankStability::Stable
1055                } else if confidence >= self.marginal_threshold {
1056                    RankStability::Marginal
1057                } else {
1058                    RankStability::Unstable
1059                }
1060            };
1061
1062            items.push(RankedItem {
1063                original_index: orig_idx,
1064                result,
1065                rank_confidence: RankConfidence {
1066                    confidence,
1067                    gap_to_next,
1068                    stability,
1069                },
1070            });
1071        }
1072
1073        let median_gap = if sorted_gaps.is_empty() {
1074            0.0
1075        } else {
1076            let mid = sorted_gaps.len() / 2;
1077            if sorted_gaps.len().is_multiple_of(2) {
1078                (sorted_gaps[mid - 1] + sorted_gaps[mid]) / 2.0
1079            } else {
1080                sorted_gaps[mid]
1081            }
1082        };
1083
1084        RankedResults {
1085            items,
1086            summary: RankingSummary {
1087                count,
1088                stable_count,
1089                tie_group_count,
1090                median_gap,
1091            },
1092        }
1093    }
1094
1095    /// Convenience: rank the top-k items only.
1096    ///
1097    /// All items are still scored and sorted, but only the top `k` are
1098    /// returned (with correct confidence relative to the full set).
1099    pub fn rank_top_k(&self, results: Vec<MatchResult>, k: usize) -> RankedResults {
1100        let mut ranked = self.rank(results);
1101        ranked.items.truncate(k);
1102        // Stable count may decrease after truncation.
1103        ranked.summary.stable_count = ranked
1104            .items
1105            .iter()
1106            .filter(|item| item.rank_confidence.stability == RankStability::Stable)
1107            .count();
1108        ranked
1109    }
1110}
1111
1112impl fmt::Display for RankConfidence {
1113    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1114        write!(
1115            f,
1116            "confidence={:.3} gap={:.4} ({})",
1117            self.confidence,
1118            self.gap_to_next,
1119            self.stability.label()
1120        )
1121    }
1122}
1123
1124impl fmt::Display for RankingSummary {
1125    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1126        write!(
1127            f,
1128            "{} items, {} stable, {} tie groups, median gap {:.4}",
1129            self.count, self.stable_count, self.tie_group_count, self.median_gap
1130        )
1131    }
1132}
1133
1134// ---------------------------------------------------------------------------
1135// Incremental Scorer (bd-39y4.13)
1136// ---------------------------------------------------------------------------
1137
1138/// Cached entry from a previous scoring pass.
1139#[derive(Debug, Clone)]
1140#[allow(dead_code)]
1141struct CachedEntry {
1142    /// Index into the corpus.
1143    corpus_index: usize,
1144}
1145
1146/// Incremental scorer that caches results across keystrokes.
1147///
1148/// When the user types one more character, the new query is a prefix-extension
1149/// of the old query. Items that didn't match the shorter query can't match the
1150/// longer one (monotonicity of substring/prefix/fuzzy matching), so we skip them.
1151///
1152/// # Invariants
1153///
1154/// 1. **Correctness**: Incremental results are identical to a full rescan.
1155///    Adding characters can only reduce or maintain the match set, never expand it.
1156/// 2. **Determinism**: Same (query, corpus) → identical results.
1157/// 3. **Cache coherence**: Cache is invalidated when corpus changes or query
1158///    does not extend the previous query.
1159///
1160/// # Performance Model
1161///
1162/// Let `N` = corpus size, `M` = number of matches for the previous query.
1163/// - Full scan: O(N × L) where L is average title length.
1164/// - Incremental (query extends previous): O(M × L) where M ≤ N.
1165/// - Typical command palettes have M ≪ N after 2-3 characters.
1166///
1167/// # Failure Modes
1168///
1169/// - **Corpus mutation**: If the corpus changes between calls, results may be
1170///   stale. Call `invalidate()` or let the generation counter detect it.
1171/// - **Non-extending query** (e.g., backspace): Falls back to full scan.
1172///   This is correct but loses the incremental speedup.
1173#[derive(Debug)]
1174pub struct IncrementalScorer {
1175    /// Underlying scorer.
1176    scorer: BayesianScorer,
1177    /// Previous query string.
1178    prev_query: String,
1179    /// Cached matching entries from the previous pass.
1180    cache: Vec<CachedEntry>,
1181    /// Generation counter for corpus change detection.
1182    corpus_generation: u64,
1183    /// Number of items in the corpus at cache time.
1184    corpus_len: usize,
1185    /// Statistics for diagnostics.
1186    stats: IncrementalStats,
1187}
1188
1189/// Diagnostics for incremental scoring performance.
1190#[derive(Debug, Clone, Default)]
1191pub struct IncrementalStats {
1192    /// Number of full rescans performed.
1193    pub full_scans: u64,
1194    /// Number of incremental (pruned) scans performed.
1195    pub incremental_scans: u64,
1196    /// Total items evaluated across all scans.
1197    pub total_evaluated: u64,
1198    /// Total items pruned (skipped) across incremental scans.
1199    pub total_pruned: u64,
1200}
1201
1202impl IncrementalStats {
1203    /// Fraction of evaluations saved by incremental scoring.
1204    pub fn prune_ratio(&self) -> f64 {
1205        let total = self.total_evaluated + self.total_pruned;
1206        if total == 0 {
1207            0.0
1208        } else {
1209            self.total_pruned as f64 / total as f64
1210        }
1211    }
1212}
1213
1214impl IncrementalScorer {
1215    /// Create a new incremental scorer (evidence tracking disabled for speed).
1216    pub fn new() -> Self {
1217        Self {
1218            scorer: BayesianScorer::fast(),
1219            prev_query: String::new(),
1220            cache: Vec::new(),
1221            corpus_generation: 0,
1222            corpus_len: 0,
1223            stats: IncrementalStats::default(),
1224        }
1225    }
1226
1227    /// Create with explicit scorer configuration.
1228    pub fn with_scorer(scorer: BayesianScorer) -> Self {
1229        Self {
1230            scorer,
1231            prev_query: String::new(),
1232            cache: Vec::new(),
1233            corpus_generation: 0,
1234            corpus_len: 0,
1235            stats: IncrementalStats::default(),
1236        }
1237    }
1238
1239    /// Invalidate the cache (e.g., when corpus changes).
1240    pub fn invalidate(&mut self) {
1241        self.prev_query.clear();
1242        self.cache.clear();
1243        self.corpus_generation = self.corpus_generation.wrapping_add(1);
1244    }
1245
1246    /// Get diagnostic statistics.
1247    pub fn stats(&self) -> &IncrementalStats {
1248        &self.stats
1249    }
1250
1251    /// Score a query against a corpus, using cached results when possible.
1252    ///
1253    /// Returns indices into the corpus and their match results, sorted by
1254    /// descending score. Only items with score > 0 are returned.
1255    ///
1256    /// # Arguments
1257    ///
1258    /// * `query` - The current search query.
1259    /// * `corpus` - Slice of title strings to search.
1260    /// * `generation` - Optional generation counter; if it differs from the
1261    ///   cached value, the cache is invalidated.
1262    pub fn score_corpus(
1263        &mut self,
1264        query: &str,
1265        corpus: &[&str],
1266        generation: Option<u64>,
1267    ) -> Vec<(usize, MatchResult)> {
1268        // Detect corpus changes.
1269        let generation_val = generation.unwrap_or(self.corpus_generation);
1270        if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1271            self.invalidate();
1272            self.corpus_generation = generation_val;
1273            self.corpus_len = corpus.len();
1274        }
1275
1276        // Determine if we can use incremental scoring.
1277        let can_prune = !self.prev_query.is_empty()
1278            && query.starts_with(&self.prev_query)
1279            && !self.cache.is_empty();
1280
1281        let query_lower = query.to_lowercase();
1282        let results = if can_prune {
1283            self.score_incremental(query, &query_lower, corpus)
1284        } else {
1285            self.score_full(query, &query_lower, corpus)
1286        };
1287
1288        // Update cache state.
1289        self.prev_query.clear();
1290        self.prev_query.push_str(query);
1291        self.cache = results
1292            .iter()
1293            .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1294            .collect();
1295
1296        results
1297    }
1298
1299    /// Score a query against a corpus with pre-lowercased titles.
1300    ///
1301    /// `corpus_lower` must align 1:1 with `corpus`.
1302    pub fn score_corpus_with_lowered(
1303        &mut self,
1304        query: &str,
1305        corpus: &[&str],
1306        corpus_lower: &[&str],
1307        generation: Option<u64>,
1308    ) -> Vec<(usize, MatchResult)> {
1309        debug_assert_eq!(
1310            corpus.len(),
1311            corpus_lower.len(),
1312            "corpus_lower must match corpus length"
1313        );
1314
1315        // Detect corpus changes.
1316        let generation_val = generation.unwrap_or(self.corpus_generation);
1317        if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1318            self.invalidate();
1319            self.corpus_generation = generation_val;
1320            self.corpus_len = corpus.len();
1321        }
1322
1323        // Determine if we can use incremental scoring.
1324        let can_prune = !self.prev_query.is_empty()
1325            && query.starts_with(&self.prev_query)
1326            && !self.cache.is_empty();
1327
1328        let query_lower = query.to_lowercase();
1329        let results = if can_prune {
1330            self.score_incremental_lowered(query, &query_lower, corpus, corpus_lower)
1331        } else {
1332            self.score_full_lowered(query, &query_lower, corpus, corpus_lower)
1333        };
1334
1335        // Update cache state.
1336        self.prev_query.clear();
1337        self.prev_query.push_str(query);
1338        self.cache = results
1339            .iter()
1340            .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1341            .collect();
1342
1343        results
1344    }
1345
1346    /// Score a query against a corpus with pre-lowercased titles and word-start cache.
1347    ///
1348    /// `corpus_lower` and `word_starts` must align 1:1 with `corpus`.
1349    pub fn score_corpus_with_lowered_and_words(
1350        &mut self,
1351        query: &str,
1352        corpus: &[String],
1353        corpus_lower: &[String],
1354        word_starts: &[Vec<usize>],
1355        generation: Option<u64>,
1356    ) -> Vec<(usize, MatchResult)> {
1357        debug_assert_eq!(
1358            corpus.len(),
1359            corpus_lower.len(),
1360            "corpus_lower must match corpus length"
1361        );
1362        debug_assert_eq!(
1363            corpus.len(),
1364            word_starts.len(),
1365            "word_starts must match corpus length"
1366        );
1367
1368        // Detect corpus changes.
1369        let generation_val = generation.unwrap_or(self.corpus_generation);
1370        if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1371            self.invalidate();
1372            self.corpus_generation = generation_val;
1373            self.corpus_len = corpus.len();
1374        }
1375
1376        // Determine if we can use incremental scoring.
1377        let can_prune = !self.prev_query.is_empty()
1378            && query.starts_with(&self.prev_query)
1379            && !self.cache.is_empty();
1380
1381        let query_lower = query.to_lowercase();
1382        let results = if can_prune {
1383            self.score_incremental_lowered_with_words(
1384                query,
1385                &query_lower,
1386                corpus,
1387                corpus_lower,
1388                word_starts,
1389            )
1390        } else {
1391            self.score_full_lowered_with_words(
1392                query,
1393                &query_lower,
1394                corpus,
1395                corpus_lower,
1396                word_starts,
1397            )
1398        };
1399
1400        // Update cache state.
1401        self.prev_query.clear();
1402        self.prev_query.push_str(query);
1403        self.cache = results
1404            .iter()
1405            .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1406            .collect();
1407
1408        results
1409    }
1410
1411    /// Full scan: score every item in the corpus.
1412    fn score_full(
1413        &mut self,
1414        query: &str,
1415        query_lower: &str,
1416        corpus: &[&str],
1417    ) -> Vec<(usize, MatchResult)> {
1418        self.stats.full_scans += 1;
1419        self.stats.total_evaluated += corpus.len() as u64;
1420
1421        let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1422        for (i, title) in corpus.iter().enumerate() {
1423            let result = self
1424                .scorer
1425                .score_with_query_lower(query, query_lower, title);
1426            if result.score > 0.0 {
1427                results.push((i, result));
1428            }
1429        }
1430
1431        results.sort_by(|a, b| {
1432            b.1.score
1433                .partial_cmp(&a.1.score)
1434                .unwrap_or(std::cmp::Ordering::Equal)
1435                .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1436        });
1437
1438        results
1439    }
1440
1441    /// Full scan with pre-lowercased titles.
1442    fn score_full_lowered(
1443        &mut self,
1444        query: &str,
1445        query_lower: &str,
1446        corpus: &[&str],
1447        corpus_lower: &[&str],
1448    ) -> Vec<(usize, MatchResult)> {
1449        self.stats.full_scans += 1;
1450        self.stats.total_evaluated += corpus.len() as u64;
1451
1452        let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1453        for (i, (title, title_lower)) in corpus.iter().zip(corpus_lower.iter()).enumerate() {
1454            let result =
1455                self.scorer
1456                    .score_with_lowered_title(query, query_lower, title, title_lower);
1457            if result.score > 0.0 {
1458                results.push((i, result));
1459            }
1460        }
1461
1462        results.sort_by(|a, b| {
1463            b.1.score
1464                .partial_cmp(&a.1.score)
1465                .unwrap_or(std::cmp::Ordering::Equal)
1466                .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1467        });
1468
1469        results
1470    }
1471
1472    /// Full scan with pre-lowercased titles and word-start cache.
1473    fn score_full_lowered_with_words(
1474        &mut self,
1475        query: &str,
1476        query_lower: &str,
1477        corpus: &[String],
1478        corpus_lower: &[String],
1479        word_starts: &[Vec<usize>],
1480    ) -> Vec<(usize, MatchResult)> {
1481        self.stats.full_scans += 1;
1482        self.stats.total_evaluated += corpus.len() as u64;
1483
1484        let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1485        for (i, ((title, title_lower), starts)) in corpus
1486            .iter()
1487            .zip(corpus_lower.iter())
1488            .zip(word_starts.iter())
1489            .enumerate()
1490        {
1491            let result = self.scorer.score_with_lowered_title_and_words(
1492                query,
1493                query_lower,
1494                title,
1495                title_lower,
1496                Some(starts),
1497            );
1498            if result.score > 0.0 {
1499                results.push((i, result));
1500            }
1501        }
1502
1503        results.sort_by(|a, b| {
1504            b.1.score
1505                .partial_cmp(&a.1.score)
1506                .unwrap_or(std::cmp::Ordering::Equal)
1507                .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1508        });
1509
1510        results
1511    }
1512    /// Incremental scan: only re-score items that previously matched.
1513    fn score_incremental(
1514        &mut self,
1515        query: &str,
1516        query_lower: &str,
1517        corpus: &[&str],
1518    ) -> Vec<(usize, MatchResult)> {
1519        self.stats.incremental_scans += 1;
1520
1521        let prev_match_count = self.cache.len();
1522        let pruned = corpus.len().saturating_sub(prev_match_count);
1523        self.stats.total_pruned += pruned as u64;
1524        self.stats.total_evaluated += prev_match_count as u64;
1525
1526        let mut results: Vec<(usize, MatchResult)> =
1527            Vec::with_capacity(self.cache.len().min(corpus.len()));
1528        for entry in &self.cache {
1529            if entry.corpus_index < corpus.len() {
1530                let result = self.scorer.score_with_query_lower(
1531                    query,
1532                    query_lower,
1533                    corpus[entry.corpus_index],
1534                );
1535                if result.score > 0.0 {
1536                    results.push((entry.corpus_index, result));
1537                }
1538            }
1539        }
1540
1541        results.sort_by(|a, b| {
1542            b.1.score
1543                .partial_cmp(&a.1.score)
1544                .unwrap_or(std::cmp::Ordering::Equal)
1545                .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1546        });
1547
1548        results
1549    }
1550
1551    /// Incremental scan with pre-lowercased titles.
1552    fn score_incremental_lowered(
1553        &mut self,
1554        query: &str,
1555        query_lower: &str,
1556        corpus: &[&str],
1557        corpus_lower: &[&str],
1558    ) -> Vec<(usize, MatchResult)> {
1559        self.stats.incremental_scans += 1;
1560
1561        let prev_match_count = self.cache.len();
1562        let pruned = corpus.len().saturating_sub(prev_match_count);
1563        self.stats.total_pruned += pruned as u64;
1564        self.stats.total_evaluated += prev_match_count as u64;
1565
1566        let mut results: Vec<(usize, MatchResult)> =
1567            Vec::with_capacity(self.cache.len().min(corpus.len()));
1568        for entry in &self.cache {
1569            if entry.corpus_index < corpus.len() {
1570                let title = corpus[entry.corpus_index];
1571                let title_lower = corpus_lower[entry.corpus_index];
1572                let result =
1573                    self.scorer
1574                        .score_with_lowered_title(query, query_lower, title, title_lower);
1575                if result.score > 0.0 {
1576                    results.push((entry.corpus_index, result));
1577                }
1578            }
1579        }
1580
1581        results.sort_by(|a, b| {
1582            b.1.score
1583                .partial_cmp(&a.1.score)
1584                .unwrap_or(std::cmp::Ordering::Equal)
1585                .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1586        });
1587
1588        results
1589    }
1590
1591    /// Incremental scan with pre-lowercased titles and word-start cache.
1592    fn score_incremental_lowered_with_words(
1593        &mut self,
1594        query: &str,
1595        query_lower: &str,
1596        corpus: &[String],
1597        corpus_lower: &[String],
1598        word_starts: &[Vec<usize>],
1599    ) -> Vec<(usize, MatchResult)> {
1600        self.stats.incremental_scans += 1;
1601
1602        let prev_match_count = self.cache.len();
1603        let pruned = corpus.len().saturating_sub(prev_match_count);
1604        self.stats.total_pruned += pruned as u64;
1605        self.stats.total_evaluated += prev_match_count as u64;
1606
1607        let mut results: Vec<(usize, MatchResult)> =
1608            Vec::with_capacity(self.cache.len().min(corpus.len()));
1609        for entry in &self.cache {
1610            if entry.corpus_index < corpus.len() {
1611                let title = &corpus[entry.corpus_index];
1612                let title_lower = &corpus_lower[entry.corpus_index];
1613                let starts = &word_starts[entry.corpus_index];
1614                let result = self.scorer.score_with_lowered_title_and_words(
1615                    query,
1616                    query_lower,
1617                    title,
1618                    title_lower,
1619                    Some(starts),
1620                );
1621                if result.score > 0.0 {
1622                    results.push((entry.corpus_index, result));
1623                }
1624            }
1625        }
1626
1627        results.sort_by(|a, b| {
1628            b.1.score
1629                .partial_cmp(&a.1.score)
1630                .unwrap_or(std::cmp::Ordering::Equal)
1631                .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1632        });
1633
1634        results
1635    }
1636}
1637
1638impl Default for IncrementalScorer {
1639    fn default() -> Self {
1640        Self::new()
1641    }
1642}
1643
1644// ---------------------------------------------------------------------------
1645// Tests
1646// ---------------------------------------------------------------------------
1647
1648#[cfg(test)]
1649mod tests {
1650    use super::*;
1651
1652    // --- Match Type Tests ---
1653
1654    #[test]
1655    fn exact_match_highest_score() {
1656        let scorer = BayesianScorer::new();
1657        let result = scorer.score("Settings", "Settings");
1658        assert_eq!(result.match_type, MatchType::Exact);
1659        assert!(result.score > 0.95, "Exact match should score > 0.95");
1660    }
1661
1662    #[test]
1663    fn prefix_match_high_score() {
1664        let scorer = BayesianScorer::new();
1665        let result = scorer.score("set", "Settings");
1666        assert_eq!(result.match_type, MatchType::Prefix);
1667        assert!(result.score > 0.85, "Prefix match should score > 0.85");
1668    }
1669
1670    #[test]
1671    fn word_start_match_score() {
1672        let scorer = BayesianScorer::new();
1673        let result = scorer.score("gd", "Go Dashboard");
1674        assert_eq!(result.match_type, MatchType::WordStart);
1675        assert!(result.score > 0.75, "Word-start should score > 0.75");
1676    }
1677
1678    #[test]
1679    fn substring_match_moderate_score() {
1680        let scorer = BayesianScorer::new();
1681        let result = scorer.score("set", "Asset Manager");
1682        assert_eq!(result.match_type, MatchType::Substring);
1683        assert!(result.score > 0.5, "Substring should score > 0.5");
1684    }
1685
1686    #[test]
1687    fn fuzzy_match_low_score() {
1688        let scorer = BayesianScorer::new();
1689        let result = scorer.score("stg", "Settings");
1690        assert_eq!(result.match_type, MatchType::Fuzzy);
1691        assert!(result.score > 0.2, "Fuzzy should score > 0.2");
1692        assert!(result.score < 0.7, "Fuzzy should score < 0.7");
1693    }
1694
1695    #[test]
1696    fn no_match_returns_zero() {
1697        let scorer = BayesianScorer::new();
1698        let result = scorer.score("xyz", "Settings");
1699        assert_eq!(result.match_type, MatchType::NoMatch);
1700        assert_eq!(result.score, 0.0);
1701    }
1702
1703    // --- Internal ASCII Matcher Edge Cases ---
1704
1705    #[test]
1706    fn word_start_match_ascii_empty_query_matches() {
1707        let scorer = BayesianScorer::new();
1708        let positions = scorer
1709            .word_start_match_ascii(b"", b"go dashboard", None)
1710            .expect("empty query should match");
1711        assert!(positions.is_empty());
1712    }
1713
1714    #[test]
1715    fn word_start_match_ascii_skips_out_of_bounds_precomputed_positions() {
1716        let scorer = BayesianScorer::new();
1717        let starts = [999, 0, 3];
1718        let positions = scorer
1719            .word_start_match_ascii(b"gd", b"go dashboard", Some(&starts))
1720            .expect("should still match despite out-of-bounds precomputed starts");
1721        assert_eq!(positions, vec![0, 3]);
1722    }
1723
1724    #[test]
1725    fn fuzzy_match_ascii_empty_query_matches() {
1726        let scorer = BayesianScorer::new();
1727        let positions = scorer
1728            .fuzzy_match_ascii(b"", b"settings")
1729            .expect("empty query should match");
1730        assert!(positions.is_empty());
1731    }
1732
1733    // --- Score Invariants ---
1734
1735    #[test]
1736    fn score_bounded() {
1737        let scorer = BayesianScorer::new();
1738        let test_cases = [
1739            ("a", "abcdefghijklmnop"),
1740            ("full", "full"),
1741            ("", "anything"),
1742            ("xyz", "abc"),
1743            ("stg", "Settings"),
1744        ];
1745
1746        for (query, title) in test_cases {
1747            let result = scorer.score(query, title);
1748            assert!(
1749                result.score >= 0.0 && result.score <= 1.0,
1750                "Score for ({}, {}) = {} not in [0, 1]",
1751                query,
1752                title,
1753                result.score
1754            );
1755        }
1756    }
1757
1758    #[test]
1759    fn score_deterministic() {
1760        let scorer = BayesianScorer::new();
1761        let result1 = scorer.score("nav", "Navigation");
1762        let result2 = scorer.score("nav", "Navigation");
1763        assert!(
1764            (result1.score - result2.score).abs() < f64::EPSILON,
1765            "Same input should produce identical scores"
1766        );
1767    }
1768
1769    #[test]
1770    fn tiebreak_shorter_first() {
1771        let scorer = BayesianScorer::new();
1772        let short = scorer.score("set", "Set");
1773        let long = scorer.score("set", "Settings");
1774        assert!(
1775            short.score >= long.score,
1776            "Shorter title should score >= longer: {} vs {}",
1777            short.score,
1778            long.score
1779        );
1780    }
1781
1782    // --- Case Insensitivity ---
1783
1784    #[test]
1785    fn case_insensitive() {
1786        let scorer = BayesianScorer::new();
1787        let lower = scorer.score("set", "Settings");
1788        let upper = scorer.score("SET", "Settings");
1789        assert!(
1790            (lower.score - upper.score).abs() < f64::EPSILON,
1791            "Case should not affect score"
1792        );
1793    }
1794
1795    // --- Match Positions ---
1796
1797    #[test]
1798    fn match_positions_correct() {
1799        let scorer = BayesianScorer::new();
1800        let result = scorer.score("gd", "Go Dashboard");
1801        assert_eq!(result.match_positions, vec![0, 3]);
1802    }
1803
1804    #[test]
1805    fn fuzzy_match_positions() {
1806        let scorer = BayesianScorer::new();
1807        let result = scorer.score("stg", "Settings");
1808        // s(0), t(3), g(6)
1809        assert_eq!(result.match_positions.len(), 3);
1810        assert_eq!(result.match_positions[0], 0); // 's'
1811    }
1812
1813    // --- Empty Query ---
1814
1815    #[test]
1816    fn empty_query_returns_all() {
1817        let scorer = BayesianScorer::new();
1818        let result = scorer.score("", "Any Command");
1819        assert!(result.score > 0.0, "Empty query should have positive score");
1820        assert!(result.score < 1.0, "Empty query should not be max score");
1821    }
1822
1823    #[test]
1824    fn empty_title_is_safe() {
1825        let scorer = BayesianScorer::new();
1826        let result = scorer.score("x", "");
1827        assert_eq!(result.match_type, MatchType::NoMatch);
1828        assert!(result.score.is_finite(), "Score should remain finite");
1829    }
1830
1831    #[test]
1832    fn empty_query_empty_title_is_finite() {
1833        let scorer = BayesianScorer::new();
1834        let result = scorer.score("", "");
1835        assert!(result.score.is_finite(), "Score should remain finite");
1836        assert_eq!(result.match_type, MatchType::Fuzzy);
1837    }
1838
1839    // --- Query Longer Than Title ---
1840
1841    #[test]
1842    fn query_longer_than_title() {
1843        let scorer = BayesianScorer::new();
1844        let result = scorer.score("verylongquery", "short");
1845        assert_eq!(result.match_type, MatchType::NoMatch);
1846        assert_eq!(result.score, 0.0);
1847    }
1848
1849    #[test]
1850    fn long_query_exact_match_is_handled() {
1851        let scorer = BayesianScorer::new();
1852        let query = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
1853        let result = scorer.score(query, query);
1854        assert_eq!(result.match_type, MatchType::Exact);
1855        assert!(result.score.is_finite());
1856        assert!(result.score > 0.9, "Exact long query should score high");
1857    }
1858
1859    #[test]
1860    fn gap_penalty_prefers_tight_fuzzy_matches() {
1861        let scorer = BayesianScorer::new();
1862        let tight = scorer.score("ace", "abcde");
1863        let gappy = scorer.score("ace", "a...c...e");
1864        assert_eq!(tight.match_type, MatchType::Fuzzy);
1865        assert_eq!(gappy.match_type, MatchType::Fuzzy);
1866        assert!(
1867            tight.score > gappy.score,
1868            "Tight fuzzy match should score higher than gappy: {} vs {}",
1869            tight.score,
1870            gappy.score
1871        );
1872    }
1873
1874    // --- Tag Matching ---
1875
1876    #[test]
1877    fn tag_match_boosts_score() {
1878        let scorer = BayesianScorer::new();
1879        // Use a query that matches the title (fuzzy)
1880        let without_tag = scorer.score("set", "Settings");
1881        let with_tag = scorer.score_with_tags("set", "Settings", &["config", "setup"]);
1882        // Tag "setup" contains "set", so it should boost the score
1883        assert!(
1884            with_tag.score > without_tag.score,
1885            "Tag match should boost score: {} > {}",
1886            with_tag.score,
1887            without_tag.score
1888        );
1889    }
1890
1891    // --- Evidence Ledger ---
1892
1893    #[test]
1894    fn evidence_ledger_tracks_factors() {
1895        let scorer = BayesianScorer::new();
1896        let result = scorer.score("set", "Settings");
1897
1898        assert!(!result.evidence.entries().is_empty());
1899
1900        // Should have match type entry
1901        assert!(
1902            result
1903                .evidence
1904                .entries()
1905                .iter()
1906                .any(|e| e.kind == EvidenceKind::MatchType)
1907        );
1908    }
1909
1910    #[test]
1911    fn evidence_ledger_display() {
1912        let scorer = BayesianScorer::new();
1913        let result = scorer.score("gd", "Go Dashboard");
1914        let display = format!("{}", result.evidence);
1915        assert!(display.contains("Evidence Ledger"));
1916        assert!(display.contains("Posterior P:"));
1917    }
1918
1919    // --- Property Tests ---
1920
1921    #[test]
1922    fn property_ordering_total() {
1923        let scorer = BayesianScorer::new();
1924        let titles = ["Settings", "Set Theme", "Reset View", "Asset"];
1925
1926        let mut scores: Vec<(f64, &str)> = titles
1927            .iter()
1928            .map(|t| (scorer.score("set", t).score, *t))
1929            .collect();
1930
1931        // Sort should be stable and total
1932        scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
1933
1934        // Verify no NaN or infinite scores
1935        for (score, _) in &scores {
1936            assert!(score.is_finite());
1937        }
1938    }
1939
1940    #[test]
1941    fn property_prefix_monotonic() {
1942        let scorer = BayesianScorer::new();
1943        // Longer exact prefix match should score higher
1944        let one_char = scorer.score("s", "Settings");
1945        let three_char = scorer.score("set", "Settings");
1946        // Both are prefix matches, longer should be better
1947        assert!(
1948            three_char.score >= one_char.score,
1949            "Longer prefix should score >= shorter"
1950        );
1951    }
1952
1953    // --- Match Type Prior Odds ---
1954
1955    #[test]
1956    fn match_type_prior_ordering() {
1957        assert!(MatchType::Exact.prior_odds() > MatchType::Prefix.prior_odds());
1958        assert!(MatchType::Prefix.prior_odds() > MatchType::WordStart.prior_odds());
1959        assert!(MatchType::WordStart.prior_odds() > MatchType::Substring.prior_odds());
1960        assert!(MatchType::Substring.prior_odds() > MatchType::Fuzzy.prior_odds());
1961        assert!(MatchType::Fuzzy.prior_odds() > MatchType::NoMatch.prior_odds());
1962    }
1963
1964    // -----------------------------------------------------------------------
1965    // Conformal Ranker Tests
1966    // -----------------------------------------------------------------------
1967
1968    #[test]
1969    fn conformal_tie_breaks_by_match_type() {
1970        let mut exact = MatchResult::no_match();
1971        exact.score = 0.5;
1972        exact.match_type = MatchType::Exact;
1973
1974        let mut prefix = MatchResult::no_match();
1975        prefix.score = 0.5;
1976        prefix.match_type = MatchType::Prefix;
1977
1978        let mut word_start = MatchResult::no_match();
1979        word_start.score = 0.5;
1980        word_start.match_type = MatchType::WordStart;
1981
1982        let mut substring = MatchResult::no_match();
1983        substring.score = 0.5;
1984        substring.match_type = MatchType::Substring;
1985
1986        let ranker = ConformalRanker::new();
1987        let ranked = ranker.rank(vec![substring, word_start, prefix, exact]);
1988        let order: Vec<MatchType> = ranked
1989            .items
1990            .iter()
1991            .map(|item| item.result.match_type)
1992            .collect();
1993
1994        assert_eq!(
1995            order,
1996            vec![
1997                MatchType::Exact,
1998                MatchType::Prefix,
1999                MatchType::WordStart,
2000                MatchType::Substring
2001            ]
2002        );
2003    }
2004
2005    #[test]
2006    fn conformal_empty_input() {
2007        let ranker = ConformalRanker::new();
2008        let ranked = ranker.rank(Vec::new());
2009        assert_eq!(ranked.items.len(), 0);
2010        assert_eq!(ranked.summary.count, 0);
2011        assert_eq!(ranked.summary.stable_count, 0);
2012        assert_eq!(ranked.summary.tie_group_count, 0);
2013        assert_eq!(ranked.summary.median_gap, 0.0);
2014    }
2015
2016    #[test]
2017    fn conformal_single_item() {
2018        let scorer = BayesianScorer::new();
2019        let results = vec![scorer.score("set", "Settings")];
2020        let ranker = ConformalRanker::new();
2021        let ranked = ranker.rank(results);
2022
2023        assert_eq!(ranked.items.len(), 1);
2024        assert_eq!(ranked.items[0].rank_confidence.confidence, 1.0);
2025        assert_eq!(ranked.summary.count, 1);
2026    }
2027
2028    #[test]
2029    fn conformal_sorted_descending() {
2030        let scorer = BayesianScorer::new();
2031        let results = vec![
2032            scorer.score("set", "Settings"),
2033            scorer.score("set", "Asset Manager"),
2034            scorer.score("set", "Reset Configuration Panel"),
2035        ];
2036        let ranker = ConformalRanker::new();
2037        let ranked = ranker.rank(results);
2038
2039        // Verify descending score order.
2040        for w in ranked.items.windows(2) {
2041            let a = w[0].result.score;
2042            let b = w[1].result.score;
2043            assert!(a >= b, "Items should be sorted descending: {a} >= {b}");
2044        }
2045    }
2046
2047    #[test]
2048    fn conformal_confidence_bounded() {
2049        let scorer = BayesianScorer::new();
2050        let titles = [
2051            "Settings",
2052            "Set Theme",
2053            "Asset Manager",
2054            "Reset View",
2055            "Offset Tool",
2056            "Test Suite",
2057        ];
2058        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2059        let ranker = ConformalRanker::new();
2060        let ranked = ranker.rank(results);
2061
2062        for item in &ranked.items {
2063            assert!(
2064                item.rank_confidence.confidence >= 0.0 && item.rank_confidence.confidence <= 1.0,
2065                "Confidence must be in [0, 1], got {}",
2066                item.rank_confidence.confidence
2067            );
2068            assert!(
2069                item.rank_confidence.gap_to_next >= 0.0,
2070                "Gap must be non-negative"
2071            );
2072        }
2073    }
2074
2075    #[test]
2076    fn conformal_deterministic() {
2077        let scorer = BayesianScorer::new();
2078        let titles = ["Settings", "Set Theme", "Asset", "Reset"];
2079
2080        let results1: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2081        let results2: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2082
2083        let ranker = ConformalRanker::new();
2084        let ranked1 = ranker.rank(results1);
2085        let ranked2 = ranker.rank(results2);
2086
2087        for (a, b) in ranked1.items.iter().zip(ranked2.items.iter()) {
2088            assert!(
2089                (a.rank_confidence.confidence - b.rank_confidence.confidence).abs() < f64::EPSILON,
2090                "Confidence should be deterministic"
2091            );
2092            assert_eq!(
2093                a.original_index, b.original_index,
2094                "Rank order should be deterministic"
2095            );
2096        }
2097    }
2098
2099    #[test]
2100    fn conformal_ties_detected() {
2101        // Create items with identical scores.
2102        let mut r1 = MatchResult::no_match();
2103        r1.score = 0.8;
2104        r1.match_type = MatchType::Prefix;
2105        let mut r2 = MatchResult::no_match();
2106        r2.score = 0.8;
2107        r2.match_type = MatchType::Prefix;
2108        let mut r3 = MatchResult::no_match();
2109        r3.score = 0.5;
2110        r3.match_type = MatchType::Substring;
2111
2112        let ranker = ConformalRanker::new();
2113        let ranked = ranker.rank(vec![r1, r2, r3]);
2114
2115        // The first two have identical scores — their gap is 0 → unstable.
2116        assert_eq!(
2117            ranked.items[0].rank_confidence.stability,
2118            RankStability::Unstable,
2119            "Tied items should be Unstable"
2120        );
2121        assert!(
2122            ranked.summary.tie_group_count >= 1,
2123            "Should detect at least one tie group"
2124        );
2125    }
2126
2127    #[test]
2128    fn conformal_all_identical_scores() {
2129        let mut results = Vec::new();
2130        for _ in 0..5 {
2131            let mut r = MatchResult::no_match();
2132            r.score = 0.5;
2133            r.match_type = MatchType::Fuzzy;
2134            results.push(r);
2135        }
2136
2137        let ranker = ConformalRanker::new();
2138        let ranked = ranker.rank(results);
2139
2140        // All gaps are zero → all unstable.
2141        for item in &ranked.items {
2142            assert_eq!(item.rank_confidence.stability, RankStability::Unstable);
2143        }
2144    }
2145
2146    #[test]
2147    fn conformal_well_separated_scores_are_stable() {
2148        let mut results = Vec::new();
2149        // Scores well spread out: 0.9, 0.6, 0.3, 0.1.
2150        for &s in &[0.9, 0.6, 0.3, 0.1] {
2151            let mut r = MatchResult::no_match();
2152            r.score = s;
2153            r.match_type = MatchType::Prefix;
2154            results.push(r);
2155        }
2156
2157        let ranker = ConformalRanker::new();
2158        let ranked = ranker.rank(results);
2159
2160        // With well-separated scores, most should be stable.
2161        assert!(
2162            ranked.summary.stable_count >= 2,
2163            "Well-separated scores should yield stable positions, got {}",
2164            ranked.summary.stable_count
2165        );
2166    }
2167
2168    #[test]
2169    fn conformal_top_k_truncates() {
2170        let scorer = BayesianScorer::new();
2171        let titles = [
2172            "Settings",
2173            "Set Theme",
2174            "Asset Manager",
2175            "Reset View",
2176            "Offset Tool",
2177        ];
2178        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2179
2180        let ranker = ConformalRanker::new();
2181        let ranked = ranker.rank_top_k(results, 2);
2182
2183        assert_eq!(ranked.items.len(), 2);
2184        // Top-k items should still have confidence from the full ranking.
2185        assert!(ranked.items[0].rank_confidence.confidence > 0.0);
2186    }
2187
2188    #[test]
2189    fn conformal_original_indices_preserved() {
2190        let scorer = BayesianScorer::new();
2191        let titles = ["Zebra Tool", "Settings", "Apple"];
2192        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2193
2194        let ranker = ConformalRanker::new();
2195        let ranked = ranker.rank(results);
2196
2197        // "Settings" (index 1) should be ranked first (prefix match).
2198        assert_eq!(
2199            ranked.items[0].original_index, 1,
2200            "Settings should be first; original_index should be 1"
2201        );
2202    }
2203
2204    #[test]
2205    fn conformal_summary_display() {
2206        let scorer = BayesianScorer::new();
2207        let results = vec![
2208            scorer.score("set", "Settings"),
2209            scorer.score("set", "Set Theme"),
2210        ];
2211        let ranker = ConformalRanker::new();
2212        let ranked = ranker.rank(results);
2213
2214        let display = format!("{}", ranked.summary);
2215        assert!(display.contains("2 items"));
2216    }
2217
2218    #[test]
2219    fn conformal_rank_confidence_display() {
2220        let rc = RankConfidence {
2221            confidence: 0.85,
2222            gap_to_next: 0.1234,
2223            stability: RankStability::Stable,
2224        };
2225        let display = format!("{}", rc);
2226        assert!(display.contains("0.850"));
2227        assert!(display.contains("stable"));
2228    }
2229
2230    #[test]
2231    fn conformal_stability_labels() {
2232        assert_eq!(RankStability::Stable.label(), "stable");
2233        assert_eq!(RankStability::Marginal.label(), "marginal");
2234        assert_eq!(RankStability::Unstable.label(), "unstable");
2235    }
2236
2237    // --- Property: gap_to_next of last item is always 0 ---
2238
2239    #[test]
2240    fn conformal_last_item_gap_zero() {
2241        let scorer = BayesianScorer::new();
2242        let results = vec![
2243            scorer.score("set", "Settings"),
2244            scorer.score("set", "Asset"),
2245            scorer.score("set", "Reset"),
2246        ];
2247        let ranker = ConformalRanker::new();
2248        let ranked = ranker.rank(results);
2249
2250        let last = ranked.items.last().unwrap();
2251        assert_eq!(
2252            last.rank_confidence.gap_to_next, 0.0,
2253            "Last item gap should be 0"
2254        );
2255    }
2256
2257    // --- Property: median_gap is non-negative ---
2258
2259    #[test]
2260    fn conformal_median_gap_non_negative() {
2261        let scorer = BayesianScorer::new();
2262        let titles = [
2263            "Settings",
2264            "Set Theme",
2265            "Asset Manager",
2266            "Reset View",
2267            "Offset Tool",
2268            "Test Suite",
2269            "System Settings",
2270            "Reset Defaults",
2271        ];
2272        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2273        let ranker = ConformalRanker::new();
2274        let ranked = ranker.rank(results);
2275
2276        assert!(
2277            ranked.summary.median_gap >= 0.0,
2278            "Median gap must be non-negative"
2279        );
2280    }
2281
2282    // -----------------------------------------------------------------------
2283    // IncrementalScorer Tests (bd-39y4.13)
2284    // -----------------------------------------------------------------------
2285
2286    fn test_corpus() -> Vec<&'static str> {
2287        vec![
2288            "Open File",
2289            "Save File",
2290            "Close Tab",
2291            "Git: Commit",
2292            "Git: Push",
2293            "Git: Pull",
2294            "Go to Line",
2295            "Find in Files",
2296            "Toggle Terminal",
2297            "Format Document",
2298        ]
2299    }
2300
2301    #[test]
2302    fn incremental_full_scan_on_first_call() {
2303        let mut scorer = IncrementalScorer::new();
2304        let corpus = test_corpus();
2305        let results = scorer.score_corpus("git", &corpus, None);
2306
2307        assert!(!results.is_empty(), "Should find matches for 'git'");
2308        assert_eq!(scorer.stats().full_scans, 1);
2309        assert_eq!(scorer.stats().incremental_scans, 0);
2310    }
2311
2312    #[test]
2313    fn incremental_prunes_on_query_extension() {
2314        let mut scorer = IncrementalScorer::new();
2315        let corpus = test_corpus();
2316
2317        // First query: "g" matches many items
2318        let r1 = scorer.score_corpus("g", &corpus, None);
2319        assert_eq!(scorer.stats().full_scans, 1);
2320
2321        // Extended query: "gi" — should use incremental path
2322        let r2 = scorer.score_corpus("gi", &corpus, None);
2323        assert_eq!(scorer.stats().incremental_scans, 1);
2324        assert!(r2.len() <= r1.len(), "Extended query should match <= items");
2325
2326        // Further extension: "git" — still incremental
2327        let r3 = scorer.score_corpus("git", &corpus, None);
2328        assert_eq!(scorer.stats().incremental_scans, 2);
2329        assert!(r3.len() <= r2.len());
2330    }
2331
2332    #[test]
2333    fn incremental_correctness_matches_full_scan() {
2334        let corpus = test_corpus();
2335
2336        // Incremental path
2337        let mut inc = IncrementalScorer::new();
2338        inc.score_corpus("g", &corpus, None);
2339        let inc_results = inc.score_corpus("git", &corpus, None);
2340
2341        // Full scan path (fresh scorer, no cache)
2342        let mut full = IncrementalScorer::new();
2343        let full_results = full.score_corpus("git", &corpus, None);
2344
2345        // Results should be identical.
2346        assert_eq!(
2347            inc_results.len(),
2348            full_results.len(),
2349            "Incremental and full scan should return same count"
2350        );
2351
2352        for (a, b) in inc_results.iter().zip(full_results.iter()) {
2353            assert_eq!(a.0, b.0, "Same corpus indices");
2354            assert!(
2355                (a.1.score - b.1.score).abs() < f64::EPSILON,
2356                "Same scores: {} vs {}",
2357                a.1.score,
2358                b.1.score
2359            );
2360        }
2361    }
2362
2363    #[test]
2364    fn incremental_falls_back_on_non_extension() {
2365        let mut scorer = IncrementalScorer::new();
2366        let corpus = test_corpus();
2367
2368        scorer.score_corpus("git", &corpus, None);
2369        assert_eq!(scorer.stats().full_scans, 1);
2370
2371        // "fi" doesn't extend "git" — must full scan
2372        scorer.score_corpus("fi", &corpus, None);
2373        assert_eq!(scorer.stats().full_scans, 2);
2374    }
2375
2376    #[test]
2377    fn incremental_invalidate_forces_full_scan() {
2378        let mut scorer = IncrementalScorer::new();
2379        let corpus = test_corpus();
2380
2381        scorer.score_corpus("g", &corpus, None);
2382        scorer.invalidate();
2383
2384        // Even though "gi" extends "g", cache was cleared
2385        scorer.score_corpus("gi", &corpus, None);
2386        assert_eq!(scorer.stats().full_scans, 2);
2387        assert_eq!(scorer.stats().incremental_scans, 0);
2388    }
2389
2390    #[test]
2391    fn incremental_generation_change_invalidates() {
2392        let mut scorer = IncrementalScorer::new();
2393        let corpus = test_corpus();
2394
2395        scorer.score_corpus("g", &corpus, Some(1));
2396
2397        // Generation changed — cache invalid
2398        scorer.score_corpus("gi", &corpus, Some(2));
2399        assert_eq!(scorer.stats().full_scans, 2);
2400    }
2401
2402    #[test]
2403    fn incremental_corpus_size_change_invalidates() {
2404        let mut scorer = IncrementalScorer::new();
2405        let corpus1 = test_corpus();
2406        let corpus2 = &corpus1[..5];
2407
2408        scorer.score_corpus("g", &corpus1, None);
2409        scorer.score_corpus("gi", corpus2, None);
2410        // Corpus size changed → full scan
2411        assert_eq!(scorer.stats().full_scans, 2);
2412    }
2413
2414    #[test]
2415    fn incremental_skips_out_of_bounds_cache_entries() {
2416        let mut scorer = IncrementalScorer::new();
2417
2418        // Simulate a cache produced for a larger corpus. The scorer should safely
2419        // skip entries that are out of bounds for the current corpus.
2420        scorer.cache = vec![
2421            CachedEntry { corpus_index: 0 },
2422            CachedEntry { corpus_index: 999 },
2423        ];
2424
2425        let corpus = ["a"];
2426        let results = scorer.score_incremental("a", "a", &corpus);
2427        assert_eq!(results.len(), 1);
2428        assert_eq!(results[0].0, 0);
2429
2430        let corpus_lower = ["a"];
2431        let results = scorer.score_incremental_lowered("a", "a", &corpus, &corpus_lower);
2432        assert_eq!(results.len(), 1);
2433        assert_eq!(results[0].0, 0);
2434
2435        let corpus_s = vec!["a".to_string()];
2436        let corpus_lower_s = vec!["a".to_string()];
2437        let word_starts = vec![vec![0]];
2438        let results = scorer.score_incremental_lowered_with_words(
2439            "a",
2440            "a",
2441            &corpus_s,
2442            &corpus_lower_s,
2443            &word_starts,
2444        );
2445        assert_eq!(results.len(), 1);
2446        assert_eq!(results[0].0, 0);
2447    }
2448
2449    #[test]
2450    fn incremental_empty_query() {
2451        let mut scorer = IncrementalScorer::new();
2452        let corpus = test_corpus();
2453
2454        let results = scorer.score_corpus("", &corpus, None);
2455        // Empty query matches everything (with weak scores)
2456        assert_eq!(results.len(), corpus.len());
2457    }
2458
2459    #[test]
2460    fn incremental_no_match_query() {
2461        let mut scorer = IncrementalScorer::new();
2462        let corpus = test_corpus();
2463
2464        let results = scorer.score_corpus("zzz", &corpus, None);
2465        assert!(results.is_empty());
2466    }
2467
2468    #[test]
2469    fn incremental_stats_prune_ratio() {
2470        let mut scorer = IncrementalScorer::new();
2471        let corpus = test_corpus();
2472
2473        scorer.score_corpus("g", &corpus, None);
2474        scorer.score_corpus("gi", &corpus, None);
2475        scorer.score_corpus("git", &corpus, None);
2476
2477        let stats = scorer.stats();
2478        assert!(
2479            stats.prune_ratio() > 0.0,
2480            "Prune ratio should be > 0 after incremental scans"
2481        );
2482        assert!(stats.total_pruned > 0, "Should have pruned some items");
2483    }
2484
2485    #[test]
2486    fn incremental_results_sorted_descending() {
2487        let mut scorer = IncrementalScorer::new();
2488        let corpus = test_corpus();
2489
2490        let results = scorer.score_corpus("o", &corpus, None);
2491        for w in results.windows(2) {
2492            let a = w[0].1.score;
2493            let b = w[1].1.score;
2494            assert!(a >= b, "Results should be sorted descending: {a} >= {b}");
2495        }
2496    }
2497
2498    #[test]
2499    fn incremental_lowered_matches_full() {
2500        let corpus = vec![
2501            "Open File".to_string(),
2502            "Save File".to_string(),
2503            "Close".to_string(),
2504            "Launch 🚀".to_string(),
2505        ];
2506        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
2507        let lower: Vec<String> = corpus.iter().map(|s| s.to_lowercase()).collect();
2508        let word_starts: Vec<Vec<usize>> = lower
2509            .iter()
2510            .map(|title_lower| {
2511                let bytes = title_lower.as_bytes();
2512                title_lower
2513                    .char_indices()
2514                    .filter_map(|(i, _)| {
2515                        let is_word_start = i == 0 || {
2516                            let prev = bytes.get(i.saturating_sub(1)).copied().unwrap_or(b' ');
2517                            prev == b' ' || prev == b'-' || prev == b'_'
2518                        };
2519                        is_word_start.then_some(i)
2520                    })
2521                    .collect()
2522            })
2523            .collect();
2524
2525        let mut full = IncrementalScorer::new();
2526        let mut lowered = IncrementalScorer::new();
2527
2528        let a = full.score_corpus("fi", &corpus_refs, None);
2529        let b =
2530            lowered.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
2531
2532        assert_eq!(a.len(), b.len());
2533        for ((ia, ra), (ib, rb)) in a.iter().zip(b.iter()) {
2534            assert_eq!(ia, ib);
2535            assert_eq!(ra.match_type, rb.match_type);
2536            assert_eq!(ra.match_positions, rb.match_positions);
2537            assert!(
2538                (ra.score - rb.score).abs() < 1e-12,
2539                "score mismatch: {} vs {}",
2540                ra.score,
2541                rb.score
2542            );
2543        }
2544    }
2545
2546    #[test]
2547    fn incremental_deterministic() {
2548        let corpus = test_corpus();
2549
2550        let mut s1 = IncrementalScorer::new();
2551        let r1 = s1.score_corpus("git", &corpus, None);
2552
2553        let mut s2 = IncrementalScorer::new();
2554        let r2 = s2.score_corpus("git", &corpus, None);
2555
2556        assert_eq!(r1.len(), r2.len());
2557        for (a, b) in r1.iter().zip(r2.iter()) {
2558            assert_eq!(a.0, b.0);
2559            assert!((a.1.score - b.1.score).abs() < f64::EPSILON);
2560        }
2561    }
2562
2563    // -----------------------------------------------------------------------
2564    // MatchType::description coverage (bd-z1c65)
2565    // -----------------------------------------------------------------------
2566
2567    #[test]
2568    fn match_type_descriptions() {
2569        assert_eq!(MatchType::Exact.description(), "exact match");
2570        assert_eq!(MatchType::Prefix.description(), "prefix match");
2571        assert_eq!(MatchType::WordStart.description(), "word-start match");
2572        assert_eq!(MatchType::Substring.description(), "contiguous substring");
2573        assert_eq!(MatchType::Fuzzy.description(), "fuzzy match");
2574        assert_eq!(MatchType::NoMatch.description(), "no match");
2575    }
2576
2577    #[test]
2578    fn match_type_no_match_prior_is_zero() {
2579        assert_eq!(MatchType::NoMatch.prior_odds(), 0.0);
2580    }
2581
2582    // -----------------------------------------------------------------------
2583    // EvidenceDescription Display variants (bd-z1c65)
2584    // -----------------------------------------------------------------------
2585
2586    #[test]
2587    fn evidence_description_static_display() {
2588        let d = EvidenceDescription::Static("test message");
2589        assert_eq!(format!("{d}"), "test message");
2590    }
2591
2592    #[test]
2593    fn evidence_description_title_length_display() {
2594        let d = EvidenceDescription::TitleLengthChars { len: 42 };
2595        assert_eq!(format!("{d}"), "title length 42 chars");
2596    }
2597
2598    #[test]
2599    fn evidence_description_first_match_pos_display() {
2600        let d = EvidenceDescription::FirstMatchPos { pos: 5 };
2601        assert_eq!(format!("{d}"), "first match at position 5");
2602    }
2603
2604    #[test]
2605    fn evidence_description_word_boundary_count_display() {
2606        let d = EvidenceDescription::WordBoundaryCount { count: 3 };
2607        assert_eq!(format!("{d}"), "3 word boundary matches");
2608    }
2609
2610    #[test]
2611    fn evidence_description_gap_total_display() {
2612        let d = EvidenceDescription::GapTotal { total: 7 };
2613        assert_eq!(format!("{d}"), "total gap of 7 characters");
2614    }
2615
2616    #[test]
2617    fn evidence_description_coverage_percent_display() {
2618        let d = EvidenceDescription::CoveragePercent { percent: 83.5 };
2619        let s = format!("{d}");
2620        assert!(s.contains("84%"), "Should round: {s}");
2621    }
2622
2623    // -----------------------------------------------------------------------
2624    // EvidenceEntry Display (bd-z1c65)
2625    // -----------------------------------------------------------------------
2626
2627    #[test]
2628    fn evidence_entry_display_supports() {
2629        let entry = EvidenceEntry {
2630            kind: EvidenceKind::Position,
2631            bayes_factor: 1.5,
2632            description: EvidenceDescription::FirstMatchPos { pos: 0 },
2633        };
2634        let s = format!("{entry}");
2635        assert!(s.contains("supports"));
2636        assert!(s.contains("Position"));
2637    }
2638
2639    #[test]
2640    fn evidence_entry_display_opposes() {
2641        let entry = EvidenceEntry {
2642            kind: EvidenceKind::GapPenalty,
2643            bayes_factor: 0.5,
2644            description: EvidenceDescription::GapTotal { total: 10 },
2645        };
2646        let s = format!("{entry}");
2647        assert!(s.contains("opposes"));
2648    }
2649
2650    #[test]
2651    fn evidence_entry_display_neutral() {
2652        let entry = EvidenceEntry {
2653            kind: EvidenceKind::TitleLength,
2654            bayes_factor: 1.0,
2655            description: EvidenceDescription::Static("neutral factor"),
2656        };
2657        let s = format!("{entry}");
2658        assert!(s.contains("neutral"));
2659    }
2660
2661    // -----------------------------------------------------------------------
2662    // EvidenceLedger direct method tests (bd-z1c65)
2663    // -----------------------------------------------------------------------
2664
2665    #[test]
2666    fn ledger_combined_bayes_factor() {
2667        let mut ledger = EvidenceLedger::new();
2668        ledger.add(
2669            EvidenceKind::Position,
2670            2.0,
2671            EvidenceDescription::Static("a"),
2672        );
2673        ledger.add(
2674            EvidenceKind::WordBoundary,
2675            3.0,
2676            EvidenceDescription::Static("b"),
2677        );
2678        assert!((ledger.combined_bayes_factor() - 6.0).abs() < f64::EPSILON);
2679    }
2680
2681    #[test]
2682    fn ledger_combined_bayes_factor_empty() {
2683        let ledger = EvidenceLedger::new();
2684        assert!((ledger.combined_bayes_factor() - 1.0).abs() < f64::EPSILON);
2685    }
2686
2687    #[test]
2688    fn ledger_prior_odds_present() {
2689        let mut ledger = EvidenceLedger::new();
2690        ledger.add(
2691            EvidenceKind::MatchType,
2692            9.0,
2693            EvidenceDescription::Static("prefix"),
2694        );
2695        assert_eq!(ledger.prior_odds(), Some(9.0));
2696    }
2697
2698    #[test]
2699    fn ledger_prior_odds_absent() {
2700        let ledger = EvidenceLedger::new();
2701        assert_eq!(ledger.prior_odds(), None);
2702    }
2703
2704    #[test]
2705    fn ledger_posterior_probability_no_prior() {
2706        let mut ledger = EvidenceLedger::new();
2707        ledger.add(
2708            EvidenceKind::Position,
2709            2.0,
2710            EvidenceDescription::Static("pos"),
2711        );
2712        // No MatchType entry → prior defaults to 1.0
2713        // posterior_odds = 1.0 * 2.0 = 2.0 → prob = 2/3
2714        let prob = ledger.posterior_probability();
2715        assert!((prob - 2.0 / 3.0).abs() < 1e-9);
2716    }
2717
2718    #[test]
2719    fn ledger_posterior_probability_infinite_odds() {
2720        let mut ledger = EvidenceLedger::new();
2721        ledger.add(
2722            EvidenceKind::MatchType,
2723            f64::INFINITY,
2724            EvidenceDescription::Static("inf"),
2725        );
2726        assert_eq!(ledger.posterior_probability(), 1.0);
2727    }
2728
2729    // -----------------------------------------------------------------------
2730    // BayesianScorer::fast() path (no evidence tracking) (bd-z1c65)
2731    // -----------------------------------------------------------------------
2732
2733    #[test]
2734    fn fast_scorer_no_evidence() {
2735        let scorer = BayesianScorer::fast();
2736        let result = scorer.score("set", "Settings");
2737        assert!(result.score > 0.0);
2738        assert!(result.evidence.entries().is_empty());
2739    }
2740
2741    #[test]
2742    fn fast_scorer_score_matches_probability_range() {
2743        let scorer = BayesianScorer::fast();
2744        let result = scorer.score("set", "Settings");
2745        assert!(result.score >= 0.0 && result.score <= 1.0);
2746    }
2747
2748    #[test]
2749    fn fast_scorer_empty_query() {
2750        let scorer = BayesianScorer::fast();
2751        let result = scorer.score("", "Test");
2752        assert!(result.score > 0.0);
2753        assert!(result.evidence.entries().is_empty());
2754    }
2755
2756    #[test]
2757    fn fast_scorer_fuzzy_with_gap_penalty() {
2758        let scorer = BayesianScorer::fast();
2759        let result = scorer.score("ace", "a...b...c...d...e");
2760        assert_eq!(result.match_type, MatchType::Fuzzy);
2761        assert!(result.score > 0.0);
2762    }
2763
2764    #[test]
2765    fn fast_scorer_tag_boost() {
2766        let scorer = BayesianScorer::fast();
2767        let without = scorer.score("set", "Settings");
2768        let with = scorer.score_with_tags("set", "Settings", &["setup"]);
2769        assert!(with.score > without.score);
2770    }
2771
2772    #[test]
2773    fn fast_scorer_tag_no_boost_at_score_one() {
2774        let scorer = BayesianScorer::fast();
2775        // score_with_tags fast path only boosts when score is in (0, 1)
2776        let result = scorer.score_with_tags("Settings", "Settings", &["settings"]);
2777        assert!(result.score <= 1.0);
2778    }
2779
2780    // -----------------------------------------------------------------------
2781    // Unicode (non-ASCII) matching paths (bd-z1c65)
2782    // -----------------------------------------------------------------------
2783
2784    #[test]
2785    fn unicode_exact_match() {
2786        let scorer = BayesianScorer::new();
2787        let result = scorer.score("日本語", "日本語");
2788        assert_eq!(result.match_type, MatchType::Exact);
2789        assert!(result.score > 0.9);
2790    }
2791
2792    #[test]
2793    fn unicode_prefix_match() {
2794        let scorer = BayesianScorer::new();
2795        let result = scorer.score("日本", "日本語テスト");
2796        assert_eq!(result.match_type, MatchType::Prefix);
2797    }
2798
2799    #[test]
2800    fn unicode_substring_match() {
2801        let scorer = BayesianScorer::new();
2802        let result = scorer.score("本語", "日本語テスト");
2803        assert_eq!(result.match_type, MatchType::Substring);
2804    }
2805
2806    #[test]
2807    fn unicode_fuzzy_match() {
2808        let scorer = BayesianScorer::new();
2809        let result = scorer.score("日テ", "日本語テスト");
2810        assert_eq!(result.match_type, MatchType::Fuzzy);
2811    }
2812
2813    #[test]
2814    fn unicode_no_match() {
2815        let scorer = BayesianScorer::new();
2816        let result = scorer.score("ωψξ", "αβγ");
2817        assert_eq!(result.match_type, MatchType::NoMatch);
2818    }
2819
2820    #[test]
2821    fn unicode_word_start_match() {
2822        let scorer = BayesianScorer::new();
2823        // Word boundaries at space/dash/underscore
2824        let result = scorer.score("gd", "go dashboard");
2825        assert_eq!(result.match_type, MatchType::WordStart);
2826    }
2827
2828    // -----------------------------------------------------------------------
2829    // score_with_query_lower / score_with_lowered_title (bd-z1c65)
2830    // -----------------------------------------------------------------------
2831
2832    #[test]
2833    fn score_with_query_lower_matches_score() {
2834        let scorer = BayesianScorer::new();
2835        let direct = scorer.score("Set", "Settings");
2836        let pre = scorer.score_with_query_lower("Set", "set", "Settings");
2837        assert!((direct.score - pre.score).abs() < f64::EPSILON);
2838        assert_eq!(direct.match_type, pre.match_type);
2839    }
2840
2841    #[test]
2842    fn score_with_lowered_title_matches_score() {
2843        let scorer = BayesianScorer::new();
2844        let direct = scorer.score("set", "Settings");
2845        let pre = scorer.score_with_lowered_title("set", "set", "Settings", "settings");
2846        assert!((direct.score - pre.score).abs() < f64::EPSILON);
2847    }
2848
2849    #[test]
2850    fn score_with_lowered_title_and_words_matches() {
2851        let scorer = BayesianScorer::new();
2852        let direct = scorer.score("gd", "Go Dashboard");
2853        let pre = scorer.score_with_lowered_title_and_words(
2854            "gd",
2855            "gd",
2856            "Go Dashboard",
2857            "go dashboard",
2858            Some(&[0, 3]),
2859        );
2860        assert_eq!(direct.match_type, pre.match_type);
2861        assert!((direct.score - pre.score).abs() < f64::EPSILON);
2862    }
2863
2864    // -----------------------------------------------------------------------
2865    // Tag matching edge cases (bd-z1c65)
2866    // -----------------------------------------------------------------------
2867
2868    #[test]
2869    fn tag_match_no_title_match_returns_nomatch() {
2870        let scorer = BayesianScorer::new();
2871        let result = scorer.score_with_tags("xyz", "Settings", &["xyz"]);
2872        // Tag matches "xyz" but title doesn't match → no boost applied
2873        assert_eq!(result.match_type, MatchType::NoMatch);
2874        assert_eq!(result.score, 0.0);
2875    }
2876
2877    #[test]
2878    fn tag_match_no_matching_tag() {
2879        let scorer = BayesianScorer::new();
2880        let without = scorer.score("set", "Settings");
2881        let with = scorer.score_with_tags("set", "Settings", &["foo", "bar"]);
2882        // No tag matches → score unchanged
2883        assert!((without.score - with.score).abs() < f64::EPSILON);
2884    }
2885
2886    #[test]
2887    fn tag_match_case_insensitive() {
2888        let scorer = BayesianScorer::new();
2889        let result = scorer.score_with_tags("set", "Settings", &["SETUP"]);
2890        let without = scorer.score("set", "Settings");
2891        assert!(result.score > without.score);
2892    }
2893
2894    // -----------------------------------------------------------------------
2895    // MatchResult::no_match() (bd-z1c65)
2896    // -----------------------------------------------------------------------
2897
2898    #[test]
2899    fn no_match_result_has_evidence() {
2900        let r = MatchResult::no_match();
2901        assert_eq!(r.score, 0.0);
2902        assert_eq!(r.match_type, MatchType::NoMatch);
2903        assert!(r.match_positions.is_empty());
2904        assert_eq!(r.evidence.entries().len(), 1);
2905        assert_eq!(r.evidence.entries()[0].kind, EvidenceKind::MatchType);
2906        assert_eq!(r.evidence.entries()[0].bayes_factor, 0.0);
2907    }
2908
2909    // -----------------------------------------------------------------------
2910    // count_word_boundaries / total_gap edge cases (bd-z1c65)
2911    // -----------------------------------------------------------------------
2912
2913    #[test]
2914    fn count_word_boundaries_at_start() {
2915        let scorer = BayesianScorer::new();
2916        // Position 0 is always a word boundary
2917        let count = scorer.count_word_boundaries(&[0], "hello");
2918        assert_eq!(count, 1);
2919    }
2920
2921    #[test]
2922    fn count_word_boundaries_after_separators() {
2923        let scorer = BayesianScorer::new();
2924        // "a-b_c d" → positions 0, 2, 4, 6 are word starts
2925        let count = scorer.count_word_boundaries(&[0, 2, 4, 6], "a-b_c d");
2926        assert_eq!(count, 4);
2927    }
2928
2929    #[test]
2930    fn count_word_boundaries_mid_word() {
2931        let scorer = BayesianScorer::new();
2932        // Position 2 in "hello" is mid-word
2933        let count = scorer.count_word_boundaries(&[2], "hello");
2934        assert_eq!(count, 0);
2935    }
2936
2937    #[test]
2938    fn total_gap_empty() {
2939        let scorer = BayesianScorer::new();
2940        assert_eq!(scorer.total_gap(&[]), 0);
2941    }
2942
2943    #[test]
2944    fn total_gap_single() {
2945        let scorer = BayesianScorer::new();
2946        assert_eq!(scorer.total_gap(&[5]), 0);
2947    }
2948
2949    #[test]
2950    fn total_gap_contiguous() {
2951        let scorer = BayesianScorer::new();
2952        // [0,1,2] → gaps are 0,0 → total 0
2953        assert_eq!(scorer.total_gap(&[0, 1, 2]), 0);
2954    }
2955
2956    #[test]
2957    fn total_gap_with_gaps() {
2958        let scorer = BayesianScorer::new();
2959        // [0, 3, 7] → gaps: (3-0-1)=2, (7-3-1)=3 → total 5
2960        assert_eq!(scorer.total_gap(&[0, 3, 7]), 5);
2961    }
2962
2963    // -----------------------------------------------------------------------
2964    // ConformalRanker edge cases (bd-z1c65)
2965    // -----------------------------------------------------------------------
2966
2967    #[test]
2968    fn conformal_custom_thresholds() {
2969        let ranker = ConformalRanker {
2970            tie_epsilon: 0.1,
2971            stable_threshold: 0.9,
2972            marginal_threshold: 0.5,
2973        };
2974        // With tie_epsilon=0.1, scores within 0.1 are ties
2975        let mut r1 = MatchResult::no_match();
2976        r1.score = 0.5;
2977        r1.match_type = MatchType::Prefix;
2978        let mut r2 = MatchResult::no_match();
2979        r2.score = 0.45;
2980        r2.match_type = MatchType::Prefix;
2981
2982        let ranked = ranker.rank(vec![r1, r2]);
2983        assert_eq!(
2984            ranked.items[0].rank_confidence.stability,
2985            RankStability::Unstable,
2986            "Gap 0.05 < epsilon 0.1 → tie"
2987        );
2988    }
2989
2990    #[test]
2991    fn conformal_rank_top_k_larger_than_count() {
2992        let scorer = BayesianScorer::new();
2993        let results = vec![scorer.score("set", "Settings")];
2994        let ranker = ConformalRanker::new();
2995        let ranked = ranker.rank_top_k(results, 100);
2996        assert_eq!(ranked.items.len(), 1);
2997    }
2998
2999    #[test]
3000    fn conformal_rank_top_k_zero() {
3001        let scorer = BayesianScorer::new();
3002        let results = vec![
3003            scorer.score("set", "Settings"),
3004            scorer.score("set", "Asset"),
3005        ];
3006        let ranker = ConformalRanker::new();
3007        let ranked = ranker.rank_top_k(results, 0);
3008        assert!(ranked.items.is_empty());
3009        assert_eq!(ranked.summary.stable_count, 0);
3010    }
3011
3012    #[test]
3013    fn conformal_rank_confidence_display_marginal() {
3014        let rc = RankConfidence {
3015            confidence: 0.5,
3016            gap_to_next: 0.05,
3017            stability: RankStability::Marginal,
3018        };
3019        let s = format!("{rc}");
3020        assert!(s.contains("marginal"));
3021    }
3022
3023    #[test]
3024    fn conformal_rank_confidence_display_unstable() {
3025        let rc = RankConfidence {
3026            confidence: 0.1,
3027            gap_to_next: 0.001,
3028            stability: RankStability::Unstable,
3029        };
3030        let s = format!("{rc}");
3031        assert!(s.contains("unstable"));
3032    }
3033
3034    #[test]
3035    fn conformal_median_gap_even_count() {
3036        // 4 items → 3 gaps → odd, but let's use 5 items → 4 gaps → even
3037        let mut results = Vec::new();
3038        for &s in &[0.9, 0.7, 0.5, 0.3, 0.1] {
3039            let mut r = MatchResult::no_match();
3040            r.score = s;
3041            r.match_type = MatchType::Prefix;
3042            results.push(r);
3043        }
3044        let ranker = ConformalRanker::new();
3045        let ranked = ranker.rank(results);
3046        // 4 gaps, all 0.2 → median = 0.2
3047        assert!(
3048            (ranked.summary.median_gap - 0.2).abs() < 0.01,
3049            "median_gap={}, expected ~0.2",
3050            ranked.summary.median_gap
3051        );
3052    }
3053
3054    // -----------------------------------------------------------------------
3055    // IncrementalScorer additional coverage (bd-z1c65)
3056    // -----------------------------------------------------------------------
3057
3058    #[test]
3059    fn incremental_with_scorer_uses_provided_scorer() {
3060        let scorer = BayesianScorer::new(); // evidence tracking ON
3061        let mut inc = IncrementalScorer::with_scorer(scorer);
3062        let corpus = test_corpus();
3063        let results = inc.score_corpus("git", &corpus, None);
3064        assert!(!results.is_empty());
3065        // Evidence tracking is on, so evidence should be populated
3066        assert!(!results[0].1.evidence.entries().is_empty());
3067    }
3068
3069    #[test]
3070    fn incremental_default_trait() {
3071        let inc = IncrementalScorer::default();
3072        assert_eq!(inc.stats().full_scans, 0);
3073        assert_eq!(inc.stats().incremental_scans, 0);
3074    }
3075
3076    #[test]
3077    fn incremental_stats_prune_ratio_zero_when_empty() {
3078        let stats = IncrementalStats::default();
3079        assert_eq!(stats.prune_ratio(), 0.0);
3080    }
3081
3082    #[test]
3083    fn incremental_score_corpus_with_lowered() {
3084        let corpus = vec!["Open File", "Save File", "Close Tab"];
3085        let lower = vec!["open file", "save file", "close tab"];
3086        let mut inc = IncrementalScorer::new();
3087        let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3088        assert!(!results.is_empty());
3089        // All results should reference valid corpus indices
3090        for (idx, _) in &results {
3091            assert!(*idx < corpus.len());
3092        }
3093    }
3094
3095    #[test]
3096    fn incremental_score_corpus_with_lowered_incremental_path() {
3097        let corpus = vec!["Open File", "Save File", "Close Tab"];
3098        let lower = vec!["open file", "save file", "close tab"];
3099        let mut inc = IncrementalScorer::new();
3100        inc.score_corpus_with_lowered("f", &corpus, &lower, None);
3101        let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3102        assert_eq!(inc.stats().incremental_scans, 1);
3103        assert!(!results.is_empty());
3104    }
3105
3106    #[test]
3107    fn incremental_score_corpus_with_lowered_and_words() {
3108        let corpus = vec![
3109            "Open File".to_string(),
3110            "Save File".to_string(),
3111            "Close Tab".to_string(),
3112        ];
3113        let lower = vec![
3114            "open file".to_string(),
3115            "save file".to_string(),
3116            "close tab".to_string(),
3117        ];
3118        let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3119
3120        let mut inc = IncrementalScorer::new();
3121        let results =
3122            inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3123        assert!(!results.is_empty());
3124    }
3125
3126    #[test]
3127    fn incremental_score_corpus_with_lowered_and_words_incremental_path() {
3128        let corpus = vec![
3129            "Open File".to_string(),
3130            "Save File".to_string(),
3131            "Close Tab".to_string(),
3132        ];
3133        let lower = vec![
3134            "open file".to_string(),
3135            "save file".to_string(),
3136            "close tab".to_string(),
3137        ];
3138        let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3139
3140        let mut inc = IncrementalScorer::new();
3141        inc.score_corpus_with_lowered_and_words("f", &corpus, &lower, &word_starts, None);
3142        let results =
3143            inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3144        assert_eq!(inc.stats().incremental_scans, 1);
3145        assert!(!results.is_empty());
3146    }
3147
3148    // -----------------------------------------------------------------------
3149    // BayesianScorer::Default (bd-z1c65)
3150    // -----------------------------------------------------------------------
3151
3152    #[test]
3153    fn bayesian_scorer_default_has_evidence_off() {
3154        let scorer = BayesianScorer::default();
3155        assert!(!scorer.track_evidence);
3156    }
3157
3158    // -----------------------------------------------------------------------
3159    // Word boundary separator: hyphen and underscore (bd-z1c65)
3160    // -----------------------------------------------------------------------
3161
3162    #[test]
3163    fn word_start_match_with_hyphens() {
3164        let scorer = BayesianScorer::new();
3165        let result = scorer.score("fc", "file-commander");
3166        assert_eq!(result.match_type, MatchType::WordStart);
3167        assert_eq!(result.match_positions, vec![0, 5]);
3168    }
3169
3170    #[test]
3171    fn word_start_match_with_underscores() {
3172        let scorer = BayesianScorer::new();
3173        let result = scorer.score("fc", "file_commander");
3174        assert_eq!(result.match_type, MatchType::WordStart);
3175        assert_eq!(result.match_positions, vec![0, 5]);
3176    }
3177
3178    // -----------------------------------------------------------------------
3179    // MatchType Ord derivation (bd-z1c65)
3180    // -----------------------------------------------------------------------
3181
3182    #[test]
3183    fn match_type_ord() {
3184        let mut types = vec![
3185            MatchType::Exact,
3186            MatchType::NoMatch,
3187            MatchType::Fuzzy,
3188            MatchType::Prefix,
3189            MatchType::Substring,
3190            MatchType::WordStart,
3191        ];
3192        types.sort();
3193        assert_eq!(
3194            types,
3195            vec![
3196                MatchType::NoMatch,
3197                MatchType::Fuzzy,
3198                MatchType::Substring,
3199                MatchType::WordStart,
3200                MatchType::Prefix,
3201                MatchType::Exact,
3202            ]
3203        );
3204    }
3205
3206    // -----------------------------------------------------------------------
3207    // EvidenceKind equality (bd-z1c65)
3208    // -----------------------------------------------------------------------
3209
3210    #[test]
3211    fn evidence_kind_equality() {
3212        assert_eq!(EvidenceKind::MatchType, EvidenceKind::MatchType);
3213        assert_ne!(EvidenceKind::Position, EvidenceKind::GapPenalty);
3214    }
3215
3216    // -----------------------------------------------------------------------
3217    // Scoring position bonus (bd-z1c65)
3218    // -----------------------------------------------------------------------
3219
3220    #[test]
3221    fn earlier_substring_scores_higher() {
3222        let scorer = BayesianScorer::new();
3223        let early = scorer.score("set", "set in stone");
3224        let late = scorer.score("set", "the asset");
3225        // "set" at position 0 vs "set" at position 5
3226        assert!(
3227            early.score > late.score,
3228            "Earlier match should score higher: {} vs {}",
3229            early.score,
3230            late.score
3231        );
3232    }
3233}
3234
3235// ===========================================================================
3236// Performance regression tests (bd-39y4.5)
3237// ===========================================================================
3238
3239#[cfg(test)]
3240mod perf_tests {
3241    use super::*;
3242    use std::time::Instant;
3243
3244    #[derive(Debug, Clone, Copy)]
3245    struct PerfStats {
3246        p50_us: u64,
3247        p95_us: u64,
3248        p99_us: u64,
3249        mean_us: f64,
3250        variance_us: f64,
3251        samples: usize,
3252    }
3253
3254    /// Budget thresholds for single-query scoring.
3255    /// These are generous to avoid CI flakes but catch >2x regressions.
3256    const SINGLE_SCORE_BUDGET_US: u64 = 10; // 10µs per score call
3257    const CORPUS_100_BUDGET_US: u64 = 500; // 500µs for 100-item full scan
3258    const CORPUS_1000_BUDGET_US: u64 = 5_000; // 5ms for 1000-item full scan
3259    const CORPUS_5000_BUDGET_US: u64 = 25_000; // 25ms for 5000-item full scan
3260    const INCREMENTAL_7KEY_100_BUDGET_US: u64 = 2_000; // 2ms for 7 keystrokes on 100 items
3261    const INCREMENTAL_7KEY_1000_BUDGET_US: u64 = 15_000; // 15ms for 7 keystrokes on 1000 items
3262
3263    const COVERAGE_BUDGET_MULTIPLIER: u64 = 5;
3264
3265    fn is_coverage_run() -> bool {
3266        std::env::var("LLVM_PROFILE_FILE").is_ok() || std::env::var("CARGO_LLVM_COV").is_ok()
3267    }
3268
3269    fn coverage_budget_us_with_mode(base: u64, coverage_run: bool) -> u64 {
3270        // Coverage instrumentation adds substantial overhead (and noise) to these
3271        // microbench-style regression tests, so we relax budgets enough to keep
3272        // `cargo llvm-cov` stable while still logging the timings.
3273        if coverage_run {
3274            base.saturating_mul(COVERAGE_BUDGET_MULTIPLIER)
3275        } else {
3276            base
3277        }
3278    }
3279
3280    fn coverage_budget_us(base: u64) -> u64 {
3281        coverage_budget_us_with_mode(base, is_coverage_run())
3282    }
3283
3284    #[test]
3285    fn coverage_budget_us_with_mode_respects_flag() {
3286        assert_eq!(coverage_budget_us_with_mode(123, false), 123);
3287        assert_eq!(
3288            coverage_budget_us_with_mode(123, true),
3289            123 * COVERAGE_BUDGET_MULTIPLIER
3290        );
3291    }
3292
3293    /// Generate a command corpus of the specified size with realistic variety.
3294    fn make_corpus(size: usize) -> Vec<String> {
3295        let base = [
3296            "Open File",
3297            "Save File",
3298            "Close Tab",
3299            "Split Editor Right",
3300            "Split Editor Down",
3301            "Toggle Terminal",
3302            "Go to Line",
3303            "Find in Files",
3304            "Replace in Files",
3305            "Git: Commit",
3306            "Git: Push",
3307            "Git: Pull",
3308            "Debug: Start",
3309            "Debug: Stop",
3310            "Debug: Step Over",
3311            "Format Document",
3312            "Rename Symbol",
3313            "Go to Definition",
3314            "Find All References",
3315            "Toggle Sidebar",
3316        ];
3317        base.iter()
3318            .cycle()
3319            .take(size)
3320            .enumerate()
3321            .map(|(i, cmd)| {
3322                if i < base.len() {
3323                    (*cmd).to_string()
3324                } else {
3325                    format!("{} ({})", cmd, i)
3326                }
3327            })
3328            .collect()
3329    }
3330
3331    fn measure_stats_us(iterations: usize, mut f: impl FnMut()) -> PerfStats {
3332        let mut times = Vec::with_capacity(iterations);
3333        // Warmup
3334        for _ in 0..3 {
3335            f();
3336        }
3337        for _ in 0..iterations {
3338            let start = Instant::now();
3339            f();
3340            times.push(start.elapsed().as_micros() as u64);
3341        }
3342        times.sort_unstable();
3343        let len = times.len();
3344        let p50 = times[len / 2];
3345        let p95 = times[((len as f64 * 0.95) as usize).min(len.saturating_sub(1))];
3346        let p99 = times[((len as f64 * 0.99) as usize).min(len.saturating_sub(1))];
3347        let mean = times.iter().copied().map(|value| value as f64).sum::<f64>() / len as f64;
3348        let variance = times
3349            .iter()
3350            .map(|value| {
3351                let delta = *value as f64 - mean;
3352                delta * delta
3353            })
3354            .sum::<f64>()
3355            / len as f64;
3356
3357        PerfStats {
3358            p50_us: p50,
3359            p95_us: p95,
3360            p99_us: p99,
3361            mean_us: mean,
3362            variance_us: variance,
3363            samples: len,
3364        }
3365    }
3366
3367    #[test]
3368    fn perf_single_score_under_budget() {
3369        let scorer = BayesianScorer::fast();
3370        let stats = measure_stats_us(200, || {
3371            std::hint::black_box(scorer.score("git co", "Git: Commit"));
3372        });
3373        eprintln!(
3374            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_score_single\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3375            stats.samples,
3376            stats.p50_us,
3377            stats.p95_us,
3378            stats.p99_us,
3379            stats.mean_us,
3380            stats.variance_us
3381        );
3382        let budget = coverage_budget_us(SINGLE_SCORE_BUDGET_US);
3383        assert!(
3384            stats.p50_us <= budget,
3385            "Single score p50 = {}µs exceeds budget {}µs",
3386            stats.p50_us,
3387            budget,
3388        );
3389    }
3390
3391    #[test]
3392    fn perf_corpus_100_under_budget() {
3393        let scorer = BayesianScorer::fast();
3394        let corpus = make_corpus(100);
3395        let stats = measure_stats_us(50, || {
3396            let mut results: Vec<_> = corpus
3397                .iter()
3398                .map(|t| scorer.score("git co", t))
3399                .filter(|r| r.score > 0.0)
3400                .collect();
3401            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3402            std::hint::black_box(&results);
3403        });
3404        eprintln!(
3405            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3406            stats.samples,
3407            stats.p50_us,
3408            stats.p95_us,
3409            stats.p99_us,
3410            stats.mean_us,
3411            stats.variance_us
3412        );
3413        let budget = coverage_budget_us(CORPUS_100_BUDGET_US);
3414        assert!(
3415            stats.p95_us <= budget,
3416            "100-item corpus p95 = {}µs exceeds budget {}µs",
3417            stats.p95_us,
3418            budget,
3419        );
3420    }
3421
3422    #[test]
3423    fn perf_corpus_1000_under_budget() {
3424        let scorer = BayesianScorer::fast();
3425        let corpus = make_corpus(1_000);
3426        let stats = measure_stats_us(20, || {
3427            let mut results: Vec<_> = corpus
3428                .iter()
3429                .map(|t| scorer.score("git co", t))
3430                .filter(|r| r.score > 0.0)
3431                .collect();
3432            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3433            std::hint::black_box(&results);
3434        });
3435        eprintln!(
3436            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3437            stats.samples,
3438            stats.p50_us,
3439            stats.p95_us,
3440            stats.p99_us,
3441            stats.mean_us,
3442            stats.variance_us
3443        );
3444        let budget = coverage_budget_us(CORPUS_1000_BUDGET_US);
3445        assert!(
3446            stats.p95_us <= budget,
3447            "1000-item corpus p95 = {}µs exceeds budget {}µs",
3448            stats.p95_us,
3449            budget,
3450        );
3451    }
3452
3453    #[test]
3454    fn perf_corpus_5000_under_budget() {
3455        let scorer = BayesianScorer::fast();
3456        let corpus = make_corpus(5_000);
3457        let stats = measure_stats_us(10, || {
3458            let mut results: Vec<_> = corpus
3459                .iter()
3460                .map(|t| scorer.score("git co", t))
3461                .filter(|r| r.score > 0.0)
3462                .collect();
3463            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3464            std::hint::black_box(&results);
3465        });
3466        eprintln!(
3467            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_5000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3468            stats.samples,
3469            stats.p50_us,
3470            stats.p95_us,
3471            stats.p99_us,
3472            stats.mean_us,
3473            stats.variance_us
3474        );
3475        let budget = coverage_budget_us(CORPUS_5000_BUDGET_US);
3476        assert!(
3477            stats.p95_us <= budget,
3478            "5000-item corpus p95 = {}µs exceeds budget {}µs",
3479            stats.p95_us,
3480            budget,
3481        );
3482    }
3483
3484    #[test]
3485    fn perf_incremental_7key_100_under_budget() {
3486        let corpus = make_corpus(100);
3487        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3488        let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3489
3490        let stats = measure_stats_us(30, || {
3491            let mut inc = IncrementalScorer::new();
3492            for query in &queries {
3493                let results = inc.score_corpus(query, &corpus_refs, None);
3494                std::hint::black_box(&results);
3495            }
3496        });
3497        eprintln!(
3498            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3499            stats.samples,
3500            stats.p50_us,
3501            stats.p95_us,
3502            stats.p99_us,
3503            stats.mean_us,
3504            stats.variance_us
3505        );
3506        let budget = coverage_budget_us(INCREMENTAL_7KEY_100_BUDGET_US);
3507        assert!(
3508            stats.p95_us <= budget,
3509            "Incremental 7-key 100-item p95 = {}µs exceeds budget {}µs",
3510            stats.p95_us,
3511            budget,
3512        );
3513    }
3514
3515    #[test]
3516    fn perf_incremental_7key_1000_under_budget() {
3517        let corpus = make_corpus(1_000);
3518        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3519        let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3520
3521        let stats = measure_stats_us(10, || {
3522            let mut inc = IncrementalScorer::new();
3523            for query in &queries {
3524                let results = inc.score_corpus(query, &corpus_refs, None);
3525                std::hint::black_box(&results);
3526            }
3527        });
3528        eprintln!(
3529            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3530            stats.samples,
3531            stats.p50_us,
3532            stats.p95_us,
3533            stats.p99_us,
3534            stats.mean_us,
3535            stats.variance_us
3536        );
3537        let budget = coverage_budget_us(INCREMENTAL_7KEY_1000_BUDGET_US);
3538        assert!(
3539            stats.p95_us <= budget,
3540            "Incremental 7-key 1000-item p95 = {}µs exceeds budget {}µs",
3541            stats.p95_us,
3542            budget,
3543        );
3544    }
3545
3546    #[test]
3547    fn perf_incremental_faster_than_naive() {
3548        let corpus = make_corpus(100);
3549        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3550        let scorer = BayesianScorer::fast();
3551        let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3552
3553        let naive_stats = measure_stats_us(30, || {
3554            for query in &queries {
3555                let mut results: Vec<_> = corpus
3556                    .iter()
3557                    .map(|t| scorer.score(query, t))
3558                    .filter(|r| r.score > 0.0)
3559                    .collect();
3560                results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3561                std::hint::black_box(&results);
3562            }
3563        });
3564
3565        let inc_stats = measure_stats_us(30, || {
3566            let mut inc = IncrementalScorer::new();
3567            for query in &queries {
3568                let results = inc.score_corpus(query, &corpus_refs, None);
3569                std::hint::black_box(&results);
3570            }
3571        });
3572        eprintln!(
3573            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_vs_naive\",\"samples\":{},\"naive_p50_us\":{},\"naive_p95_us\":{},\"inc_p50_us\":{},\"inc_p95_us\":{}}}",
3574            naive_stats.samples,
3575            naive_stats.p50_us,
3576            naive_stats.p95_us,
3577            inc_stats.p50_us,
3578            inc_stats.p95_us
3579        );
3580
3581        // Incremental should not be more than 2x slower than naive
3582        // (in practice it's faster, but we set a relaxed threshold)
3583        assert!(
3584            inc_stats.p50_us <= naive_stats.p50_us * 2 + 50, // +50µs tolerance for measurement noise
3585            "Incremental p50 = {}µs is >2x slower than naive p50 = {}µs",
3586            inc_stats.p50_us,
3587            naive_stats.p50_us,
3588        );
3589    }
3590
3591    fn scaling_ratio_us(p50_small_us: u64, p50_large_us: u64) -> f64 {
3592        if p50_small_us > 0 {
3593            p50_large_us as f64 / p50_small_us as f64
3594        } else {
3595            0.0
3596        }
3597    }
3598
3599    #[test]
3600    fn scaling_ratio_handles_zero_divisor() {
3601        assert_eq!(scaling_ratio_us(0, 123), 0.0);
3602        assert_eq!(scaling_ratio_us(10, 25), 2.5);
3603    }
3604
3605    #[test]
3606    fn perf_scaling_sublinear() {
3607        let scorer = BayesianScorer::fast();
3608        let corpus_100 = make_corpus(100);
3609        let corpus_1000 = make_corpus(1_000);
3610
3611        let stats_100 = measure_stats_us(30, || {
3612            let mut results: Vec<_> = corpus_100
3613                .iter()
3614                .map(|t| scorer.score("git", t))
3615                .filter(|r| r.score > 0.0)
3616                .collect();
3617            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3618            std::hint::black_box(&results);
3619        });
3620
3621        let stats_1000 = measure_stats_us(20, || {
3622            let mut results: Vec<_> = corpus_1000
3623                .iter()
3624                .map(|t| scorer.score("git", t))
3625                .filter(|r| r.score > 0.0)
3626                .collect();
3627            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3628            std::hint::black_box(&results);
3629        });
3630        eprintln!(
3631            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_scaling\",\"samples_100\":{},\"p50_100_us\":{},\"samples_1000\":{},\"p50_1000_us\":{}}}",
3632            stats_100.samples, stats_100.p50_us, stats_1000.samples, stats_1000.p50_us
3633        );
3634
3635        // 10x corpus ⇒ O(n log n) theoretical ratio ≈ 15x.  Use 25x to absorb
3636        // system noise when tests run in parallel (still catches O(n²) = 100x).
3637        let ratio = scaling_ratio_us(stats_100.p50_us, stats_1000.p50_us);
3638        assert!(
3639            ratio < 25.0,
3640            "1000/100 scaling ratio = {:.1}x exceeds 25x threshold (100: {}µs, 1000: {}µs)",
3641            ratio,
3642            stats_100.p50_us,
3643            stats_1000.p50_us,
3644        );
3645    }
3646}