Skip to main content

ftui_widgets/command_palette/
scorer.rs

1#![forbid(unsafe_code)]
2
3//! Bayesian Match Scoring for Command Palette.
4//!
5//! This module implements a probabilistic scoring model using Bayes factors
6//! to compute match relevance. An evidence ledger tracks each scoring factor
7//! and its contribution, enabling explainable ranking decisions.
8//!
9//! # Mathematical Model
10//!
11//! We compute the posterior odds ratio:
12//!
13//! ```text
14//! P(relevant | evidence) / P(not_relevant | evidence)
15//!     = [P(relevant) / P(not_relevant)] × Π_i BF_i
16//!
17//! where BF_i = P(evidence_i | relevant) / P(evidence_i | not_relevant)
18//!            = likelihood ratio for evidence type i
19//! ```
20//!
21//! The prior odds depend on match type:
22//! - Exact match: 99:1 (very likely relevant)
23//! - Prefix match: 9:1 (likely relevant)
24//! - Word-start match: 4:1 (probably relevant)
25//! - Substring match: 2:1 (possibly relevant)
26//! - Fuzzy match: 1:3 (unlikely without other evidence)
27//!
28//! The final score is the posterior probability P(relevant | evidence).
29//!
30//! # Evidence Ledger
31//!
32//! Each match includes an evidence ledger that explains the score:
33//! - Match type contribution (prior odds)
34//! - Word boundary bonus (Bayes factor ~2.0)
35//! - Position bonus (earlier = higher factor)
36//! - Gap penalty (gaps reduce factor)
37//! - Tag match bonus (strong positive evidence)
38//!
39//! # Invariants
40//!
41//! 1. Scores are bounded: 0.0 ≤ score ≤ 1.0 (probability)
42//! 2. Determinism: same input → identical score
43//! 3. Monotonicity: longer exact prefixes score ≥ shorter
44//! 4. Transitivity: score ordering is consistent
45
46use std::cmp::Ordering;
47use std::fmt;
48
49// ---------------------------------------------------------------------------
50// Match Types
51// ---------------------------------------------------------------------------
52
53/// Type of match between query and title.
54#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
55pub enum MatchType {
56    /// No match found.
57    NoMatch,
58    /// Characters found in order but with gaps.
59    Fuzzy,
60    /// Query found as contiguous substring.
61    Substring,
62    /// Query matches start of word boundaries.
63    WordStart,
64    /// Title starts with query.
65    Prefix,
66    /// Query equals title exactly.
67    Exact,
68}
69
70impl MatchType {
71    /// Prior odds ratio P(relevant) / P(not_relevant) for this match type.
72    ///
73    /// These are derived from empirical observations of user intent:
74    /// - Exact matches are almost always what the user wants
75    /// - Prefix matches are very likely relevant
76    /// - Fuzzy matches need additional evidence
77    pub fn prior_odds(self) -> f64 {
78        match self {
79            Self::Exact => 99.0,    // 99:1 odds → P ≈ 0.99
80            Self::Prefix => 9.0,    // 9:1 odds → P ≈ 0.90
81            Self::WordStart => 4.0, // 4:1 odds → P ≈ 0.80
82            Self::Substring => 2.0, // 2:1 odds → P ≈ 0.67
83            Self::Fuzzy => 0.333,   // 1:3 odds → P ≈ 0.25
84            Self::NoMatch => 0.0,   // Impossible
85        }
86    }
87
88    /// Human-readable description for the evidence ledger.
89    pub fn description(self) -> &'static str {
90        match self {
91            Self::Exact => "exact match",
92            Self::Prefix => "prefix match",
93            Self::WordStart => "word-start match",
94            Self::Substring => "contiguous substring",
95            Self::Fuzzy => "fuzzy match",
96            Self::NoMatch => "no match",
97        }
98    }
99}
100
101fn compare_ranked_match_results(
102    left: &(usize, MatchResult),
103    right: &(usize, MatchResult),
104) -> Ordering {
105    right
106        .1
107        .score
108        .partial_cmp(&left.1.score)
109        .unwrap_or(Ordering::Equal)
110        .then_with(|| right.1.match_type.cmp(&left.1.match_type))
111}
112
113// ---------------------------------------------------------------------------
114// Evidence Entry
115// ---------------------------------------------------------------------------
116
117/// A single piece of evidence contributing to the match score.
118#[derive(Debug, Clone)]
119pub struct EvidenceEntry {
120    /// Type of evidence.
121    pub kind: EvidenceKind,
122    /// Bayes factor: likelihood ratio P(evidence | relevant) / P(evidence | ¬relevant).
123    /// Values > 1.0 support relevance, < 1.0 oppose it.
124    pub bayes_factor: f64,
125    /// Human-readable explanation.
126    pub description: EvidenceDescription,
127}
128
129/// Types of evidence that contribute to match scoring.
130#[derive(Debug, Clone, Copy, PartialEq, Eq)]
131pub enum EvidenceKind {
132    /// Base match type (prior).
133    MatchType,
134    /// Match at word boundary.
135    WordBoundary,
136    /// Match position (earlier is better).
137    Position,
138    /// Gap between matched characters.
139    GapPenalty,
140    /// Query also matches a tag.
141    TagMatch,
142    /// Title length factor (shorter is more specific).
143    TitleLength,
144}
145
146impl fmt::Display for EvidenceEntry {
147    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148        let direction = if self.bayes_factor > 1.0 {
149            "supports"
150        } else if self.bayes_factor < 1.0 {
151            "opposes"
152        } else {
153            "neutral"
154        };
155        write!(
156            f,
157            "{:?}: BF={:.2} ({}) - {}",
158            self.kind, self.bayes_factor, direction, self.description
159        )
160    }
161}
162
163/// Human-readable evidence description (lazy formatting).
164#[derive(Debug, Clone)]
165pub enum EvidenceDescription {
166    Static(&'static str),
167    TitleLengthChars { len: usize },
168    FirstMatchPos { pos: usize },
169    WordBoundaryCount { count: usize },
170    GapTotal { total: usize },
171    CoveragePercent { percent: f64 },
172}
173
174impl fmt::Display for EvidenceDescription {
175    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176        match self {
177            Self::Static(msg) => write!(f, "{msg}"),
178            Self::TitleLengthChars { len } => write!(f, "title length {} chars", len),
179            Self::FirstMatchPos { pos } => write!(f, "first match at position {}", pos),
180            Self::WordBoundaryCount { count } => write!(f, "{} word boundary matches", count),
181            Self::GapTotal { total } => write!(f, "total gap of {} characters", total),
182            Self::CoveragePercent { percent } => write!(f, "query covers {:.0}% of title", percent),
183        }
184    }
185}
186
187// ---------------------------------------------------------------------------
188// Evidence Ledger
189// ---------------------------------------------------------------------------
190
191/// A ledger of evidence explaining a match score.
192///
193/// This provides full transparency into why a match received its score,
194/// enabling debugging and user explanations.
195#[derive(Debug, Clone, Default)]
196pub struct EvidenceLedger {
197    entries: Vec<EvidenceEntry>,
198}
199
200impl EvidenceLedger {
201    /// Create a new empty ledger.
202    pub fn new() -> Self {
203        Self::default()
204    }
205
206    /// Add an evidence entry.
207    pub fn add(&mut self, kind: EvidenceKind, bayes_factor: f64, description: EvidenceDescription) {
208        self.entries.push(EvidenceEntry {
209            kind,
210            bayes_factor,
211            description,
212        });
213    }
214
215    /// Get all entries.
216    pub fn entries(&self) -> &[EvidenceEntry] {
217        &self.entries
218    }
219
220    /// Compute the combined Bayes factor (product of all factors).
221    pub fn combined_bayes_factor(&self) -> f64 {
222        self.entries.iter().map(|e| e.bayes_factor).product()
223    }
224
225    /// Get the prior odds (from MatchType entry, if present).
226    #[must_use = "use the prior odds (if any) for diagnostics"]
227    pub fn prior_odds(&self) -> Option<f64> {
228        self.entries
229            .iter()
230            .find(|e| e.kind == EvidenceKind::MatchType)
231            .map(|e| e.bayes_factor)
232    }
233
234    /// Compute posterior probability from prior odds and evidence.
235    ///
236    /// posterior_prob = posterior_odds / (1 + posterior_odds)
237    /// where posterior_odds = prior_odds × combined_bf
238    pub fn posterior_probability(&self) -> f64 {
239        let prior = self.prior_odds().unwrap_or(1.0);
240        // Exclude prior from BF since it's already the odds
241        let bf: f64 = self
242            .entries
243            .iter()
244            .filter(|e| e.kind != EvidenceKind::MatchType)
245            .map(|e| e.bayes_factor)
246            .product();
247
248        let posterior_odds = prior * bf;
249        if posterior_odds.is_infinite() {
250            1.0
251        } else {
252            posterior_odds / (1.0 + posterior_odds)
253        }
254    }
255}
256
257impl EvidenceLedger {
258    /// Format this ledger as a JSONL line for structured logging.
259    #[must_use]
260    pub fn to_jsonl(&self) -> String {
261        let entries_json: Vec<String> = self
262            .entries
263            .iter()
264            .map(|e| {
265                format!(
266                    r#"{{"kind":"{:?}","bf":{:.4},"desc":"{}"}}"#,
267                    e.kind, e.bayes_factor, e.description
268                )
269            })
270            .collect();
271        format!(
272            r#"{{"schema":"palette-scoring-v1","entries":[{}],"combined_bf":{:.6},"posterior_prob":{:.6}}}"#,
273            entries_json.join(","),
274            self.combined_bayes_factor(),
275            self.posterior_probability(),
276        )
277    }
278}
279
280impl fmt::Display for EvidenceLedger {
281    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282        writeln!(f, "Evidence Ledger:")?;
283        for entry in &self.entries {
284            writeln!(f, "  {}", entry)?;
285        }
286        writeln!(f, "  Combined BF: {:.3}", self.combined_bayes_factor())?;
287        writeln!(f, "  Posterior P: {:.3}", self.posterior_probability())?;
288        Ok(())
289    }
290}
291
292// ---------------------------------------------------------------------------
293// Match Result
294// ---------------------------------------------------------------------------
295
296/// Result of scoring a query against a title.
297#[derive(Debug, Clone)]
298pub struct MatchResult {
299    /// Computed relevance score (posterior probability).
300    pub score: f64,
301    /// Type of match detected.
302    pub match_type: MatchType,
303    /// Positions of matched characters in the title.
304    pub match_positions: Vec<usize>,
305    /// Evidence ledger explaining the score.
306    pub evidence: EvidenceLedger,
307}
308
309impl MatchResult {
310    /// Create a no-match result.
311    pub fn no_match() -> Self {
312        let mut evidence = EvidenceLedger::new();
313        evidence.add(
314            EvidenceKind::MatchType,
315            0.0,
316            EvidenceDescription::Static("no matching characters found"),
317        );
318        Self {
319            score: 0.0,
320            match_type: MatchType::NoMatch,
321            match_positions: Vec::new(),
322            evidence,
323        }
324    }
325}
326
327// ---------------------------------------------------------------------------
328// Scorer
329// ---------------------------------------------------------------------------
330
331/// Bayesian fuzzy matcher for command palette.
332///
333/// Computes relevance scores using a probabilistic model with
334/// evidence tracking for explainable ranking.
335#[derive(Debug, Clone, Default)]
336pub struct BayesianScorer {
337    /// Whether to track detailed evidence (slower but explainable).
338    pub track_evidence: bool,
339}
340
341impl BayesianScorer {
342    /// Create a new scorer with evidence tracking enabled.
343    pub fn new() -> Self {
344        Self {
345            track_evidence: true,
346        }
347    }
348
349    /// Create a scorer without evidence tracking (faster).
350    pub fn fast() -> Self {
351        Self {
352            track_evidence: false,
353        }
354    }
355
356    /// Score a query against a title.
357    ///
358    /// Returns a MatchResult with score, match type, positions, and evidence.
359    pub fn score(&self, query: &str, title: &str) -> MatchResult {
360        // Quick rejection: query longer than title
361        if query.len() > title.len() {
362            return MatchResult::no_match();
363        }
364
365        // Empty query matches everything (show all)
366        if query.is_empty() {
367            return self.score_empty_query(title);
368        }
369
370        // Normalize for case-insensitive matching
371        let query_lower = query.to_lowercase();
372        let title_lower = title.to_lowercase();
373
374        // Determine match type
375        let (match_type, positions) = self.detect_match_type(&query_lower, &title_lower, title);
376
377        if match_type == MatchType::NoMatch {
378            return MatchResult::no_match();
379        }
380
381        // Build evidence ledger and compute score
382        self.compute_score(match_type, positions, &query_lower, title)
383    }
384
385    /// Score a query using a pre-lowercased query string.
386    ///
387    /// This avoids repeated query normalization when scoring against many titles.
388    pub fn score_with_query_lower(
389        &self,
390        query: &str,
391        query_lower: &str,
392        title: &str,
393    ) -> MatchResult {
394        let title_lower = title.to_lowercase();
395        self.score_with_lowered_title(query, query_lower, title, &title_lower)
396    }
397
398    /// Score a query with both query and title already lowercased.
399    ///
400    /// This avoids per-title lowercasing in hot loops.
401    pub fn score_with_lowered_title(
402        &self,
403        query: &str,
404        query_lower: &str,
405        title: &str,
406        title_lower: &str,
407    ) -> MatchResult {
408        self.score_with_lowered_title_and_words(query, query_lower, title, title_lower, None)
409    }
410
411    /// Score a query with pre-lowercased title and optional word-start cache.
412    pub fn score_with_lowered_title_and_words(
413        &self,
414        query: &str,
415        query_lower: &str,
416        title: &str,
417        title_lower: &str,
418        word_starts: Option<&[usize]>,
419    ) -> MatchResult {
420        // Quick rejection: query longer than title
421        if query.len() > title.len() {
422            return MatchResult::no_match();
423        }
424
425        // Empty query matches everything (show all)
426        if query.is_empty() {
427            return self.score_empty_query(title);
428        }
429
430        // Determine match type
431        let (match_type, positions) =
432            self.detect_match_type_with_words(query_lower, title_lower, title, word_starts);
433
434        if match_type == MatchType::NoMatch {
435            return MatchResult::no_match();
436        }
437
438        // Build evidence ledger and compute score
439        self.compute_score(match_type, positions, query_lower, title)
440    }
441
442    /// Score a query against a title with tags.
443    pub fn score_with_tags(&self, query: &str, title: &str, tags: &[&str]) -> MatchResult {
444        let mut result = self.score(query, title);
445
446        // Check if query matches any tag
447        let query_lower = query.to_lowercase();
448        let tag_match = tags
449            .iter()
450            .any(|tag| crate::contains_ignore_case(tag, &query_lower));
451
452        if tag_match && result.match_type != MatchType::NoMatch {
453            // Strong positive evidence
454            if self.track_evidence {
455                result.evidence.add(
456                    EvidenceKind::TagMatch,
457                    3.0, // 3:1 in favor
458                    EvidenceDescription::Static("query matches tag"),
459                );
460                result.score = result.evidence.posterior_probability();
461            } else if (0.0..1.0).contains(&result.score) {
462                let odds = result.score / (1.0 - result.score);
463                let boosted = odds * 3.0;
464                result.score = boosted / (1.0 + boosted);
465            }
466        }
467
468        result
469    }
470
471    /// Score when query is empty (returns all items with neutral score).
472    fn score_empty_query(&self, title: &str) -> MatchResult {
473        // Shorter titles are more specific, slight preference
474        let length_factor = 1.0 + (1.0 / (title.len() as f64 + 1.0)) * 0.1;
475        if self.track_evidence {
476            let mut evidence = EvidenceLedger::new();
477            evidence.add(
478                EvidenceKind::MatchType,
479                1.0, // Neutral prior
480                EvidenceDescription::Static("empty query matches all"),
481            );
482            evidence.add(
483                EvidenceKind::TitleLength,
484                length_factor,
485                EvidenceDescription::TitleLengthChars { len: title.len() },
486            );
487            let score = evidence.posterior_probability();
488            MatchResult {
489                score,
490                match_type: MatchType::Fuzzy, // Treat as weak match
491                match_positions: Vec::new(),
492                evidence,
493            }
494        } else {
495            let odds = length_factor;
496            let score = odds / (1.0 + odds);
497            MatchResult {
498                score,
499                match_type: MatchType::Fuzzy,
500                match_positions: Vec::new(),
501                evidence: EvidenceLedger::new(),
502            }
503        }
504    }
505
506    /// Detect the type of match and positions of matched characters.
507    fn detect_match_type(
508        &self,
509        query_lower: &str,
510        title_lower: &str,
511        title: &str,
512    ) -> (MatchType, Vec<usize>) {
513        self.detect_match_type_with_words(query_lower, title_lower, title, None)
514    }
515
516    /// Detect match type with optional precomputed word-start positions.
517    fn detect_match_type_with_words(
518        &self,
519        query_lower: &str,
520        title_lower: &str,
521        title: &str,
522        word_starts: Option<&[usize]>,
523    ) -> (MatchType, Vec<usize>) {
524        if query_lower.is_ascii() && title_lower.is_ascii() {
525            return self.detect_match_type_ascii(query_lower, title_lower, word_starts);
526        }
527
528        // Check exact match
529        if query_lower == title_lower {
530            let positions: Vec<usize> = (0..title.chars().count()).collect();
531            return (MatchType::Exact, positions);
532        }
533
534        // Check prefix match
535        if title_lower.starts_with(query_lower) {
536            let positions: Vec<usize> = (0..query_lower.chars().count()).collect();
537            return (MatchType::Prefix, positions);
538        }
539
540        // Check word-start match (e.g., "gd" matches "Go Dashboard")
541        if let Some(positions) = self.word_start_match(query_lower, title_lower) {
542            return (MatchType::WordStart, positions);
543        }
544
545        // Check contiguous substring
546        if let Some(byte_start) = title_lower.find(query_lower) {
547            let char_start = title_lower[..byte_start].chars().count();
548            let char_len = query_lower.chars().count();
549            let positions: Vec<usize> = (char_start..char_start + char_len).collect();
550            return (MatchType::Substring, positions);
551        }
552
553        // Check fuzzy match
554        if let Some(positions) = self.fuzzy_match(query_lower, title_lower) {
555            return (MatchType::Fuzzy, positions);
556        }
557
558        (MatchType::NoMatch, Vec::new())
559    }
560
561    /// ASCII fast-path match detection.
562    fn detect_match_type_ascii(
563        &self,
564        query_lower: &str,
565        title_lower: &str,
566        word_starts: Option<&[usize]>,
567    ) -> (MatchType, Vec<usize>) {
568        let query_bytes = query_lower.as_bytes();
569        let title_bytes = title_lower.as_bytes();
570
571        if query_bytes == title_bytes {
572            let positions: Vec<usize> = (0..title_bytes.len()).collect();
573            return (MatchType::Exact, positions);
574        }
575
576        if title_bytes.starts_with(query_bytes) {
577            let positions: Vec<usize> = (0..query_bytes.len()).collect();
578            return (MatchType::Prefix, positions);
579        }
580
581        if let Some(positions) = self.word_start_match_ascii(query_bytes, title_bytes, word_starts)
582        {
583            return (MatchType::WordStart, positions);
584        }
585
586        if let Some(start) = title_lower.find(query_lower) {
587            let positions: Vec<usize> = (start..start + query_bytes.len()).collect();
588            return (MatchType::Substring, positions);
589        }
590
591        if let Some(positions) = self.fuzzy_match_ascii(query_bytes, title_bytes) {
592            return (MatchType::Fuzzy, positions);
593        }
594
595        (MatchType::NoMatch, Vec::new())
596    }
597
598    /// Check if query matches word starts (e.g., "gd" → "Go Dashboard").
599    fn word_start_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
600        let mut positions = Vec::new();
601        let mut query_chars = query.chars().peekable();
602
603        let title_bytes = title.as_bytes();
604        for (char_index, (i, c)) in title.char_indices().enumerate() {
605            // Is this a word start?
606            let is_word_start = i == 0 || {
607                let prev = title_bytes
608                    .get(i.saturating_sub(1))
609                    .copied()
610                    .unwrap_or(b' ');
611                prev == b' ' || prev == b'-' || prev == b'_'
612            };
613
614            if is_word_start
615                && let Some(&qc) = query_chars.peek()
616                && c == qc
617            {
618                positions.push(char_index);
619                query_chars.next();
620            }
621        }
622
623        if query_chars.peek().is_none() {
624            Some(positions)
625        } else {
626            None
627        }
628    }
629
630    /// ASCII word-start match with optional precomputed word-start positions.
631    fn word_start_match_ascii(
632        &self,
633        query: &[u8],
634        title: &[u8],
635        word_starts: Option<&[usize]>,
636    ) -> Option<Vec<usize>> {
637        let mut positions = Vec::new();
638        let mut query_idx = 0;
639        if query.is_empty() {
640            return Some(positions);
641        }
642
643        if let Some(starts) = word_starts {
644            for &pos in starts {
645                if pos >= title.len() {
646                    continue;
647                }
648                if title[pos] == query[query_idx] {
649                    positions.push(pos);
650                    query_idx += 1;
651                    if query_idx == query.len() {
652                        return Some(positions);
653                    }
654                }
655            }
656        } else {
657            for (i, &b) in title.iter().enumerate() {
658                let is_word_start = i == 0 || matches!(title[i - 1], b' ' | b'-' | b'_');
659                if is_word_start && b == query[query_idx] {
660                    positions.push(i);
661                    query_idx += 1;
662                    if query_idx == query.len() {
663                        return Some(positions);
664                    }
665                }
666            }
667        }
668
669        None
670    }
671
672    /// Check fuzzy match (characters in order).
673    fn fuzzy_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
674        let mut positions = Vec::new();
675        let mut query_chars = query.chars().peekable();
676
677        for (char_index, c) in title.chars().enumerate() {
678            if let Some(&qc) = query_chars.peek()
679                && c == qc
680            {
681                positions.push(char_index);
682                query_chars.next();
683            }
684        }
685
686        if query_chars.peek().is_none() {
687            Some(positions)
688        } else {
689            None
690        }
691    }
692
693    /// ASCII fuzzy match (characters in order).
694    fn fuzzy_match_ascii(&self, query: &[u8], title: &[u8]) -> Option<Vec<usize>> {
695        let mut positions = Vec::new();
696        let mut query_idx = 0;
697        if query.is_empty() {
698            return Some(positions);
699        }
700
701        for (i, &b) in title.iter().enumerate() {
702            if b == query[query_idx] {
703                positions.push(i);
704                query_idx += 1;
705                if query_idx == query.len() {
706                    return Some(positions);
707                }
708            }
709        }
710
711        None
712    }
713
714    /// Compute score from match type and positions.
715    fn compute_score(
716        &self,
717        match_type: MatchType,
718        positions: Vec<usize>,
719        query: &str,
720        title: &str,
721    ) -> MatchResult {
722        let positions_ref = positions.as_slice();
723        if !self.track_evidence {
724            let mut combined_bf = match_type.prior_odds();
725
726            if let Some(&first_pos) = positions_ref.first() {
727                let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
728                combined_bf *= position_factor;
729            }
730
731            let word_boundary_count = self.count_word_boundaries(positions_ref, title);
732            if word_boundary_count > 0 {
733                let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
734                combined_bf *= boundary_factor;
735            }
736
737            if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
738                let total_gap = self.total_gap(positions_ref);
739                let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
740                combined_bf *= gap_factor;
741            }
742
743            let title_len = title.len().max(1);
744            let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
745            combined_bf *= length_factor;
746
747            let score = combined_bf / (1.0 + combined_bf);
748            return MatchResult {
749                score,
750                match_type,
751                match_positions: positions,
752                evidence: EvidenceLedger::new(),
753            };
754        }
755
756        let mut evidence = EvidenceLedger::new();
757
758        // Prior odds from match type
759        let prior_odds = match_type.prior_odds();
760        evidence.add(
761            EvidenceKind::MatchType,
762            prior_odds,
763            EvidenceDescription::Static(match_type.description()),
764        );
765
766        // Position bonus: matches at start are better
767        if let Some(&first_pos) = positions_ref.first() {
768            let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
769            evidence.add(
770                EvidenceKind::Position,
771                position_factor,
772                EvidenceDescription::FirstMatchPos { pos: first_pos },
773            );
774        }
775
776        // Word boundary bonus
777        let word_boundary_count = self.count_word_boundaries(positions_ref, title);
778        if word_boundary_count > 0 {
779            let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
780            evidence.add(
781                EvidenceKind::WordBoundary,
782                boundary_factor,
783                EvidenceDescription::WordBoundaryCount {
784                    count: word_boundary_count,
785                },
786            );
787        }
788
789        // Gap penalty for fuzzy matches
790        if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
791            let total_gap = self.total_gap(positions_ref);
792            let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
793            evidence.add(
794                EvidenceKind::GapPenalty,
795                gap_factor,
796                EvidenceDescription::GapTotal { total: total_gap },
797            );
798        }
799
800        // Title length: prefer shorter (more specific) titles
801        let title_len = title.len().max(1);
802        let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
803        evidence.add(
804            EvidenceKind::TitleLength,
805            length_factor,
806            EvidenceDescription::CoveragePercent {
807                percent: (query.len() as f64 / title_len as f64) * 100.0,
808            },
809        );
810
811        let score = evidence.posterior_probability();
812
813        MatchResult {
814            score,
815            match_type,
816            match_positions: positions,
817            evidence,
818        }
819    }
820
821    /// Count how many matched positions are at word boundaries.
822    fn count_word_boundaries(&self, positions: &[usize], title: &str) -> usize {
823        let title_bytes = title.as_bytes();
824        let mut count = 0;
825        for &pos in positions {
826            let is_boundary = if pos == 0 {
827                true
828            } else {
829                let prev = title_bytes
830                    .get(pos.saturating_sub(1))
831                    .copied()
832                    .unwrap_or(b' ');
833                prev == b' ' || prev == b'-' || prev == b'_'
834            };
835            if is_boundary {
836                count += 1;
837            }
838        }
839        count
840    }
841
842    /// Calculate total gap between matched positions.
843    fn total_gap(&self, positions: &[usize]) -> usize {
844        if positions.len() < 2 {
845            return 0;
846        }
847        let mut total = 0;
848        for window in positions.windows(2) {
849            total += window[1].saturating_sub(window[0]).saturating_sub(1);
850        }
851        total
852    }
853}
854
855// ---------------------------------------------------------------------------
856// Conformal Rank Confidence
857// ---------------------------------------------------------------------------
858
859/// Confidence level for a ranking position.
860///
861/// Derived from distribution-free conformal prediction: we compute
862/// nonconformity scores (score gaps) and calibrate them against the
863/// empirical distribution of all gaps in the result set.
864///
865/// # Invariants
866///
867/// 1. `confidence` is in `[0.0, 1.0]`.
868/// 2. A gap of zero always yields `Unstable` stability.
869/// 3. Deterministic: same scores → same confidence.
870#[derive(Debug, Clone)]
871pub struct RankConfidence {
872    /// Probability that this item truly belongs at this rank position.
873    /// Computed as the fraction of score gaps that are smaller than
874    /// the gap between this item and the next.
875    pub confidence: f64,
876
877    /// Absolute score gap to the next-ranked item (0.0 if last or tied).
878    pub gap_to_next: f64,
879
880    /// Stability classification derived from gap analysis.
881    pub stability: RankStability,
882}
883
884/// Stability classification for a rank position.
885#[derive(Debug, Clone, Copy, PartialEq, Eq)]
886pub enum RankStability {
887    /// Score gap is large relative to the distribution — rank is reliable.
888    Stable,
889    /// Score gap is moderate — rank is plausible but could swap with neighbors.
890    Marginal,
891    /// Score gap is negligible — rank is essentially a tie.
892    Unstable,
893}
894
895impl RankStability {
896    /// Human-readable label.
897    pub fn label(self) -> &'static str {
898        match self {
899            Self::Stable => "stable",
900            Self::Marginal => "marginal",
901            Self::Unstable => "unstable",
902        }
903    }
904}
905
906/// Result of ranking a set of match results with conformal confidence.
907#[derive(Debug, Clone)]
908pub struct RankedResults {
909    /// Items sorted by descending score, each with rank confidence.
910    pub items: Vec<RankedItem>,
911
912    /// Summary statistics about the ranking.
913    pub summary: RankingSummary,
914}
915
916/// A single item in the ranked results.
917#[derive(Debug, Clone)]
918pub struct RankedItem {
919    /// Index into the original (pre-sort) input slice.
920    pub original_index: usize,
921
922    /// The match result.
923    pub result: MatchResult,
924
925    /// Conformal confidence for this rank position.
926    pub rank_confidence: RankConfidence,
927}
928
929/// Summary statistics for a ranked result set.
930#[derive(Debug, Clone)]
931pub struct RankingSummary {
932    /// Number of items in the ranking.
933    pub count: usize,
934
935    /// Number of items with stable rank positions.
936    pub stable_count: usize,
937
938    /// Number of tie groups (sets of items with indistinguishable scores).
939    pub tie_group_count: usize,
940
941    /// Median score gap between adjacent ranked items.
942    pub median_gap: f64,
943}
944
945/// Conformal ranker that assigns distribution-free confidence to rank positions.
946///
947/// # Method
948///
949/// Given sorted scores `s_1 ≥ s_2 ≥ ... ≥ s_n`, we define the nonconformity
950/// score for position `i` as the gap `g_i = s_i - s_{i+1}`.
951///
952/// The conformal p-value for position `i` is:
953///
954/// ```text
955/// p_i = |{j : g_j ≤ g_i}| / (n - 1)
956/// ```
957///
958/// This gives the fraction of gaps that are at most as large as this item's
959/// gap, which we interpret as confidence that the item is correctly ranked
960/// above its successor.
961///
962/// # Tie Detection
963///
964/// Two scores are considered tied when their gap is below `tie_epsilon`.
965/// The default epsilon is `1e-9`, suitable for f64 posterior probabilities.
966///
967/// # Failure Modes
968///
969/// - **All scores identical**: Every position is `Unstable` with confidence 0.
970/// - **Single item**: Confidence is 1.0 (trivially correct ranking).
971/// - **Empty input**: Returns empty results with zeroed summary.
972#[derive(Debug, Clone)]
973pub struct ConformalRanker {
974    /// Threshold below which two scores are considered tied.
975    pub tie_epsilon: f64,
976
977    /// Confidence threshold for `Stable` classification.
978    pub stable_threshold: f64,
979
980    /// Confidence threshold for `Marginal` (below this is `Unstable`).
981    pub marginal_threshold: f64,
982}
983
984impl Default for ConformalRanker {
985    fn default() -> Self {
986        Self {
987            tie_epsilon: 1e-9,
988            stable_threshold: 0.7,
989            marginal_threshold: 0.3,
990        }
991    }
992}
993
994impl ConformalRanker {
995    /// Create a ranker with default thresholds.
996    pub fn new() -> Self {
997        Self::default()
998    }
999
1000    /// Rank a set of match results and assign conformal confidence.
1001    ///
1002    /// Results are sorted by descending score. Ties are broken by
1003    /// `MatchType` (higher variant first), then by shorter title length.
1004    pub fn rank(&self, results: Vec<MatchResult>) -> RankedResults {
1005        let count = results.len();
1006
1007        if count == 0 {
1008            return RankedResults {
1009                items: Vec::new(),
1010                summary: RankingSummary {
1011                    count: 0,
1012                    stable_count: 0,
1013                    tie_group_count: 0,
1014                    median_gap: 0.0,
1015                },
1016            };
1017        }
1018
1019        // Tag each result with its original index, then sort descending by score.
1020        // Tie-break: higher MatchType variant first, then shorter match_positions
1021        // (proxy for title length).
1022        let mut indexed: Vec<(usize, MatchResult)> = results.into_iter().enumerate().collect();
1023        indexed.sort_by(|a, b| {
1024            b.1.score
1025                .partial_cmp(&a.1.score)
1026                .unwrap_or(std::cmp::Ordering::Equal)
1027                .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1028        });
1029
1030        // Compute gaps between adjacent scores.
1031        let gaps: Vec<f64> = if count > 1 {
1032            indexed
1033                .windows(2)
1034                .map(|w| (w[0].1.score - w[1].1.score).max(0.0))
1035                .collect()
1036        } else {
1037            Vec::new()
1038        };
1039
1040        // Sort gaps for computing conformal p-values (fraction of gaps ≤ g_i).
1041        let mut sorted_gaps = gaps.clone();
1042        sorted_gaps.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1043
1044        // Compute rank confidence for each position.
1045        let mut items: Vec<RankedItem> = Vec::with_capacity(count);
1046        let mut stable_count = 0;
1047        let mut tie_group_count = 0;
1048        let mut in_tie_group = false;
1049
1050        for (rank, (orig_idx, result)) in indexed.into_iter().enumerate() {
1051            let gap_to_next = gaps.get(rank).copied().unwrap_or(0.0);
1052
1053            // Conformal p-value: fraction of gaps that are ≤ this gap.
1054            let confidence = if sorted_gaps.is_empty() {
1055                // Single item: trivially ranked correctly.
1056                1.0
1057            } else {
1058                let leq_count =
1059                    sorted_gaps.partition_point(|&g| g <= gap_to_next + self.tie_epsilon * 0.5);
1060                leq_count as f64 / sorted_gaps.len() as f64
1061            };
1062
1063            let is_tie = gap_to_next < self.tie_epsilon;
1064            let stability = if is_tie {
1065                if !in_tie_group {
1066                    tie_group_count += 1;
1067                    in_tie_group = true;
1068                }
1069                RankStability::Unstable
1070            } else {
1071                in_tie_group = false;
1072                if confidence >= self.stable_threshold {
1073                    stable_count += 1;
1074                    RankStability::Stable
1075                } else if confidence >= self.marginal_threshold {
1076                    RankStability::Marginal
1077                } else {
1078                    RankStability::Unstable
1079                }
1080            };
1081
1082            items.push(RankedItem {
1083                original_index: orig_idx,
1084                result,
1085                rank_confidence: RankConfidence {
1086                    confidence,
1087                    gap_to_next,
1088                    stability,
1089                },
1090            });
1091        }
1092
1093        let median_gap = if sorted_gaps.is_empty() {
1094            0.0
1095        } else {
1096            let mid = sorted_gaps.len() / 2;
1097            if sorted_gaps.len().is_multiple_of(2) {
1098                (sorted_gaps[mid - 1] + sorted_gaps[mid]) / 2.0
1099            } else {
1100                sorted_gaps[mid]
1101            }
1102        };
1103
1104        RankedResults {
1105            items,
1106            summary: RankingSummary {
1107                count,
1108                stable_count,
1109                tie_group_count,
1110                median_gap,
1111            },
1112        }
1113    }
1114
1115    /// Convenience: rank the top-k items only.
1116    ///
1117    /// All items are still scored and sorted, but only the top `k` are
1118    /// returned (with correct confidence relative to the full set).
1119    pub fn rank_top_k(&self, results: Vec<MatchResult>, k: usize) -> RankedResults {
1120        let mut ranked = self.rank(results);
1121        ranked.items.truncate(k);
1122        // Stable count may decrease after truncation.
1123        ranked.summary.stable_count = ranked
1124            .items
1125            .iter()
1126            .filter(|item| item.rank_confidence.stability == RankStability::Stable)
1127            .count();
1128        ranked
1129    }
1130}
1131
1132impl fmt::Display for RankConfidence {
1133    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1134        write!(
1135            f,
1136            "confidence={:.3} gap={:.4} ({})",
1137            self.confidence,
1138            self.gap_to_next,
1139            self.stability.label()
1140        )
1141    }
1142}
1143
1144impl fmt::Display for RankingSummary {
1145    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1146        write!(
1147            f,
1148            "{} items, {} stable, {} tie groups, median gap {:.4}",
1149            self.count, self.stable_count, self.tie_group_count, self.median_gap
1150        )
1151    }
1152}
1153
1154// ---------------------------------------------------------------------------
1155// Incremental Scorer (bd-39y4.13)
1156// ---------------------------------------------------------------------------
1157
1158/// Cached entry from a previous scoring pass.
1159#[derive(Debug, Clone)]
1160#[allow(dead_code)]
1161struct CachedEntry {
1162    /// Index into the corpus.
1163    corpus_index: usize,
1164}
1165
1166/// Incremental scorer that caches results across keystrokes.
1167///
1168/// When the user types one more character, the new query is a prefix-extension
1169/// of the old query. Items that didn't match the shorter query can't match the
1170/// longer one (monotonicity of substring/prefix/fuzzy matching), so we skip them.
1171///
1172/// # Invariants
1173///
1174/// 1. **Correctness**: Incremental results are identical to a full rescan.
1175///    Adding characters can only reduce or maintain the match set, never expand it.
1176/// 2. **Determinism**: Same (query, corpus) → identical results.
1177/// 3. **Cache coherence**: Cache is invalidated when corpus changes or query
1178///    does not extend the previous query.
1179///
1180/// # Performance Model
1181///
1182/// Let `N` = corpus size, `M` = number of matches for the previous query.
1183/// - Full scan: O(N × L) where L is average title length.
1184/// - Incremental (query extends previous): O(M × L) where M ≤ N.
1185/// - Typical command palettes have M ≪ N after 2-3 characters.
1186///
1187/// # Failure Modes
1188///
1189/// - **Corpus mutation**: If the corpus changes between calls, results may be
1190///   stale. Call `invalidate()` or let the generation counter detect it.
1191/// - **Non-extending query** (e.g., backspace): Falls back to full scan.
1192///   This is correct but loses the incremental speedup.
1193#[derive(Debug)]
1194pub struct IncrementalScorer {
1195    /// Underlying scorer.
1196    scorer: BayesianScorer,
1197    /// Previous query string.
1198    prev_query: String,
1199    /// Cached matching entries from the previous pass.
1200    cache: Vec<CachedEntry>,
1201    /// Generation counter for corpus change detection.
1202    corpus_generation: u64,
1203    /// Number of items in the corpus at cache time.
1204    corpus_len: usize,
1205    /// Statistics for diagnostics.
1206    stats: IncrementalStats,
1207}
1208
1209/// Diagnostics for incremental scoring performance.
1210#[derive(Debug, Clone, Default)]
1211pub struct IncrementalStats {
1212    /// Number of full rescans performed.
1213    pub full_scans: u64,
1214    /// Number of incremental (pruned) scans performed.
1215    pub incremental_scans: u64,
1216    /// Total items evaluated across all scans.
1217    pub total_evaluated: u64,
1218    /// Total items pruned (skipped) across incremental scans.
1219    pub total_pruned: u64,
1220}
1221
1222impl IncrementalStats {
1223    /// Fraction of evaluations saved by incremental scoring.
1224    pub fn prune_ratio(&self) -> f64 {
1225        let total = self.total_evaluated + self.total_pruned;
1226        if total == 0 {
1227            0.0
1228        } else {
1229            self.total_pruned as f64 / total as f64
1230        }
1231    }
1232}
1233
1234impl IncrementalScorer {
1235    /// Create a new incremental scorer (evidence tracking disabled for speed).
1236    pub fn new() -> Self {
1237        Self {
1238            scorer: BayesianScorer::fast(),
1239            prev_query: String::new(),
1240            cache: Vec::new(),
1241            corpus_generation: 0,
1242            corpus_len: 0,
1243            stats: IncrementalStats::default(),
1244        }
1245    }
1246
1247    /// Create with explicit scorer configuration.
1248    pub fn with_scorer(scorer: BayesianScorer) -> Self {
1249        Self {
1250            scorer,
1251            prev_query: String::new(),
1252            cache: Vec::new(),
1253            corpus_generation: 0,
1254            corpus_len: 0,
1255            stats: IncrementalStats::default(),
1256        }
1257    }
1258
1259    /// Invalidate the cache (e.g., when corpus changes).
1260    pub fn invalidate(&mut self) {
1261        self.prev_query.clear();
1262        self.cache.clear();
1263        self.corpus_generation = self.corpus_generation.wrapping_add(1);
1264    }
1265
1266    /// Get diagnostic statistics.
1267    pub fn stats(&self) -> &IncrementalStats {
1268        &self.stats
1269    }
1270
1271    /// Score a query against a corpus, using cached results when possible.
1272    ///
1273    /// Returns indices into the corpus and their match results, sorted by
1274    /// descending score. Only items with score > 0 are returned.
1275    ///
1276    /// # Arguments
1277    ///
1278    /// * `query` - The current search query.
1279    /// * `corpus` - Slice of title strings to search.
1280    /// * `generation` - Optional generation counter; if it differs from the
1281    ///   cached value, the cache is invalidated.
1282    pub fn score_corpus(
1283        &mut self,
1284        query: &str,
1285        corpus: &[&str],
1286        generation: Option<u64>,
1287    ) -> Vec<(usize, MatchResult)> {
1288        // Detect corpus changes.
1289        let generation_val = generation.unwrap_or(self.corpus_generation);
1290        if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1291            self.invalidate();
1292            self.corpus_generation = generation_val;
1293            self.corpus_len = corpus.len();
1294        }
1295
1296        // Determine if we can use incremental scoring.
1297        let can_prune = !self.prev_query.is_empty()
1298            && query.starts_with(&self.prev_query)
1299            && !self.cache.is_empty();
1300
1301        let query_lower = query.to_lowercase();
1302        let results = if can_prune {
1303            self.score_incremental(query, &query_lower, corpus)
1304        } else {
1305            self.score_full(query, &query_lower, corpus)
1306        };
1307
1308        // Update cache state.
1309        self.prev_query.clear();
1310        self.prev_query.push_str(query);
1311        self.cache = results
1312            .iter()
1313            .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1314            .collect();
1315
1316        results
1317    }
1318
1319    /// Score a query against a corpus with pre-lowercased titles.
1320    ///
1321    /// `corpus_lower` must align 1:1 with `corpus`.
1322    pub fn score_corpus_with_lowered(
1323        &mut self,
1324        query: &str,
1325        corpus: &[&str],
1326        corpus_lower: &[&str],
1327        generation: Option<u64>,
1328    ) -> Vec<(usize, MatchResult)> {
1329        debug_assert_eq!(
1330            corpus.len(),
1331            corpus_lower.len(),
1332            "corpus_lower must match corpus length"
1333        );
1334
1335        // Detect corpus changes.
1336        let generation_val = generation.unwrap_or(self.corpus_generation);
1337        if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1338            self.invalidate();
1339            self.corpus_generation = generation_val;
1340            self.corpus_len = corpus.len();
1341        }
1342
1343        // Determine if we can use incremental scoring.
1344        let can_prune = !self.prev_query.is_empty()
1345            && query.starts_with(&self.prev_query)
1346            && !self.cache.is_empty();
1347
1348        let query_lower = query.to_lowercase();
1349        let results = if can_prune {
1350            self.score_incremental_lowered(query, &query_lower, corpus, corpus_lower)
1351        } else {
1352            self.score_full_lowered(query, &query_lower, corpus, corpus_lower)
1353        };
1354
1355        // Update cache state.
1356        self.prev_query.clear();
1357        self.prev_query.push_str(query);
1358        self.cache = results
1359            .iter()
1360            .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1361            .collect();
1362
1363        results
1364    }
1365
1366    /// Score a query against a corpus with pre-lowercased titles and word-start cache.
1367    ///
1368    /// `corpus_lower` and `word_starts` must align 1:1 with `corpus`.
1369    pub fn score_corpus_with_lowered_and_words(
1370        &mut self,
1371        query: &str,
1372        corpus: &[String],
1373        corpus_lower: &[String],
1374        word_starts: &[Vec<usize>],
1375        generation: Option<u64>,
1376    ) -> Vec<(usize, MatchResult)> {
1377        debug_assert_eq!(
1378            corpus.len(),
1379            corpus_lower.len(),
1380            "corpus_lower must match corpus length"
1381        );
1382        debug_assert_eq!(
1383            corpus.len(),
1384            word_starts.len(),
1385            "word_starts must match corpus length"
1386        );
1387
1388        // Detect corpus changes.
1389        let generation_val = generation.unwrap_or(self.corpus_generation);
1390        if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1391            self.invalidate();
1392            self.corpus_generation = generation_val;
1393            self.corpus_len = corpus.len();
1394        }
1395
1396        // Determine if we can use incremental scoring.
1397        let can_prune = !self.prev_query.is_empty()
1398            && query.starts_with(&self.prev_query)
1399            && !self.cache.is_empty();
1400
1401        let query_lower = query.to_lowercase();
1402        let results = if can_prune {
1403            self.score_incremental_lowered_with_words(
1404                query,
1405                &query_lower,
1406                corpus,
1407                corpus_lower,
1408                word_starts,
1409            )
1410        } else {
1411            self.score_full_lowered_with_words(
1412                query,
1413                &query_lower,
1414                corpus,
1415                corpus_lower,
1416                word_starts,
1417            )
1418        };
1419
1420        // Update cache state.
1421        self.prev_query.clear();
1422        self.prev_query.push_str(query);
1423        self.cache = results
1424            .iter()
1425            .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1426            .collect();
1427
1428        results
1429    }
1430
1431    /// Score an explicit subset of corpus indices with precomputed title caches.
1432    ///
1433    /// This is used by indexed callers that have already narrowed the candidate
1434    /// set, for example prefix-only command-palette queries. It deliberately
1435    /// does not update the incremental query cache: a prefix-only candidate set
1436    /// is not equivalent to the full fuzzy match set and must not constrain a
1437    /// later all-match query.
1438    pub fn score_candidate_indices_with_lowered_and_words(
1439        &mut self,
1440        query: &str,
1441        corpus: &[String],
1442        corpus_lower: &[String],
1443        word_starts: &[Vec<usize>],
1444        candidate_indices: &[usize],
1445        generation: Option<u64>,
1446    ) -> Vec<(usize, MatchResult)> {
1447        debug_assert_eq!(
1448            corpus.len(),
1449            corpus_lower.len(),
1450            "corpus_lower must match corpus length"
1451        );
1452        debug_assert_eq!(
1453            corpus.len(),
1454            word_starts.len(),
1455            "word_starts must match corpus length"
1456        );
1457
1458        let generation_val = generation.unwrap_or(self.corpus_generation);
1459        if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1460            self.invalidate();
1461            self.corpus_generation = generation_val;
1462            self.corpus_len = corpus.len();
1463        }
1464
1465        let candidate_count = candidate_indices
1466            .iter()
1467            .filter(|idx| **idx < corpus.len())
1468            .count();
1469        self.stats.incremental_scans += 1;
1470        self.stats.total_evaluated += candidate_count as u64;
1471        self.stats.total_pruned += corpus.len().saturating_sub(candidate_count) as u64;
1472
1473        let query_lower = query.to_lowercase();
1474        let mut results = Vec::with_capacity(candidate_count);
1475        for &idx in candidate_indices {
1476            let (Some(title), Some(title_lower), Some(starts)) =
1477                (corpus.get(idx), corpus_lower.get(idx), word_starts.get(idx))
1478            else {
1479                continue;
1480            };
1481
1482            let result = self.scorer.score_with_lowered_title_and_words(
1483                query,
1484                &query_lower,
1485                title,
1486                title_lower,
1487                Some(starts),
1488            );
1489            if result.score > 0.0 {
1490                results.push((idx, result));
1491            }
1492        }
1493
1494        results.sort_unstable_by(compare_ranked_match_results);
1495
1496        results
1497    }
1498
1499    /// Full scan: score every item in the corpus.
1500    fn score_full(
1501        &mut self,
1502        query: &str,
1503        query_lower: &str,
1504        corpus: &[&str],
1505    ) -> Vec<(usize, MatchResult)> {
1506        self.stats.full_scans += 1;
1507        self.stats.total_evaluated += corpus.len() as u64;
1508
1509        let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1510        for (i, title) in corpus.iter().enumerate() {
1511            let result = self
1512                .scorer
1513                .score_with_query_lower(query, query_lower, title);
1514            if result.score > 0.0 {
1515                results.push((i, result));
1516            }
1517        }
1518
1519        results.sort_unstable_by(compare_ranked_match_results);
1520
1521        results
1522    }
1523
1524    /// Full scan with pre-lowercased titles.
1525    fn score_full_lowered(
1526        &mut self,
1527        query: &str,
1528        query_lower: &str,
1529        corpus: &[&str],
1530        corpus_lower: &[&str],
1531    ) -> Vec<(usize, MatchResult)> {
1532        self.stats.full_scans += 1;
1533        self.stats.total_evaluated += corpus.len() as u64;
1534
1535        let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1536        for (i, (title, title_lower)) in corpus.iter().zip(corpus_lower.iter()).enumerate() {
1537            let result =
1538                self.scorer
1539                    .score_with_lowered_title(query, query_lower, title, title_lower);
1540            if result.score > 0.0 {
1541                results.push((i, result));
1542            }
1543        }
1544
1545        results.sort_unstable_by(compare_ranked_match_results);
1546
1547        results
1548    }
1549
1550    /// Full scan with pre-lowercased titles and word-start cache.
1551    fn score_full_lowered_with_words(
1552        &mut self,
1553        query: &str,
1554        query_lower: &str,
1555        corpus: &[String],
1556        corpus_lower: &[String],
1557        word_starts: &[Vec<usize>],
1558    ) -> Vec<(usize, MatchResult)> {
1559        self.stats.full_scans += 1;
1560        self.stats.total_evaluated += corpus.len() as u64;
1561
1562        let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1563        for (i, ((title, title_lower), starts)) in corpus
1564            .iter()
1565            .zip(corpus_lower.iter())
1566            .zip(word_starts.iter())
1567            .enumerate()
1568        {
1569            let result = self.scorer.score_with_lowered_title_and_words(
1570                query,
1571                query_lower,
1572                title,
1573                title_lower,
1574                Some(starts),
1575            );
1576            if result.score > 0.0 {
1577                results.push((i, result));
1578            }
1579        }
1580
1581        results.sort_unstable_by(compare_ranked_match_results);
1582
1583        results
1584    }
1585    /// Incremental scan: only re-score items that previously matched.
1586    fn score_incremental(
1587        &mut self,
1588        query: &str,
1589        query_lower: &str,
1590        corpus: &[&str],
1591    ) -> Vec<(usize, MatchResult)> {
1592        self.stats.incremental_scans += 1;
1593
1594        let prev_match_count = self.cache.len();
1595        let pruned = corpus.len().saturating_sub(prev_match_count);
1596        self.stats.total_pruned += pruned as u64;
1597        self.stats.total_evaluated += prev_match_count as u64;
1598
1599        let mut results: Vec<(usize, MatchResult)> =
1600            Vec::with_capacity(self.cache.len().min(corpus.len()));
1601        for entry in &self.cache {
1602            if entry.corpus_index < corpus.len() {
1603                let result = self.scorer.score_with_query_lower(
1604                    query,
1605                    query_lower,
1606                    corpus[entry.corpus_index],
1607                );
1608                if result.score > 0.0 {
1609                    results.push((entry.corpus_index, result));
1610                }
1611            }
1612        }
1613
1614        results.sort_unstable_by(compare_ranked_match_results);
1615
1616        results
1617    }
1618
1619    /// Incremental scan with pre-lowercased titles.
1620    fn score_incremental_lowered(
1621        &mut self,
1622        query: &str,
1623        query_lower: &str,
1624        corpus: &[&str],
1625        corpus_lower: &[&str],
1626    ) -> Vec<(usize, MatchResult)> {
1627        self.stats.incremental_scans += 1;
1628
1629        let prev_match_count = self.cache.len();
1630        let pruned = corpus.len().saturating_sub(prev_match_count);
1631        self.stats.total_pruned += pruned as u64;
1632        self.stats.total_evaluated += prev_match_count as u64;
1633
1634        let mut results: Vec<(usize, MatchResult)> =
1635            Vec::with_capacity(self.cache.len().min(corpus.len()));
1636        for entry in &self.cache {
1637            if entry.corpus_index < corpus.len() {
1638                let title = corpus[entry.corpus_index];
1639                let title_lower = corpus_lower[entry.corpus_index];
1640                let result =
1641                    self.scorer
1642                        .score_with_lowered_title(query, query_lower, title, title_lower);
1643                if result.score > 0.0 {
1644                    results.push((entry.corpus_index, result));
1645                }
1646            }
1647        }
1648
1649        results.sort_unstable_by(compare_ranked_match_results);
1650
1651        results
1652    }
1653
1654    /// Incremental scan with pre-lowercased titles and word-start cache.
1655    fn score_incremental_lowered_with_words(
1656        &mut self,
1657        query: &str,
1658        query_lower: &str,
1659        corpus: &[String],
1660        corpus_lower: &[String],
1661        word_starts: &[Vec<usize>],
1662    ) -> Vec<(usize, MatchResult)> {
1663        self.stats.incremental_scans += 1;
1664
1665        let prev_match_count = self.cache.len();
1666        let pruned = corpus.len().saturating_sub(prev_match_count);
1667        self.stats.total_pruned += pruned as u64;
1668        self.stats.total_evaluated += prev_match_count as u64;
1669
1670        let mut results: Vec<(usize, MatchResult)> =
1671            Vec::with_capacity(self.cache.len().min(corpus.len()));
1672        for entry in &self.cache {
1673            if entry.corpus_index < corpus.len() {
1674                let title = &corpus[entry.corpus_index];
1675                let title_lower = &corpus_lower[entry.corpus_index];
1676                let starts = &word_starts[entry.corpus_index];
1677                let result = self.scorer.score_with_lowered_title_and_words(
1678                    query,
1679                    query_lower,
1680                    title,
1681                    title_lower,
1682                    Some(starts),
1683                );
1684                if result.score > 0.0 {
1685                    results.push((entry.corpus_index, result));
1686                }
1687            }
1688        }
1689
1690        results.sort_by(|a, b| {
1691            b.1.score
1692                .partial_cmp(&a.1.score)
1693                .unwrap_or(std::cmp::Ordering::Equal)
1694                .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1695        });
1696
1697        results
1698    }
1699}
1700
1701impl Default for IncrementalScorer {
1702    fn default() -> Self {
1703        Self::new()
1704    }
1705}
1706
1707// ---------------------------------------------------------------------------
1708// Tests
1709// ---------------------------------------------------------------------------
1710
1711#[cfg(test)]
1712mod tests {
1713    use super::*;
1714
1715    // --- Match Type Tests ---
1716
1717    #[test]
1718    fn exact_match_highest_score() {
1719        let scorer = BayesianScorer::new();
1720        let result = scorer.score("Settings", "Settings");
1721        assert_eq!(result.match_type, MatchType::Exact);
1722        assert!(result.score > 0.95, "Exact match should score > 0.95");
1723    }
1724
1725    #[test]
1726    fn prefix_match_high_score() {
1727        let scorer = BayesianScorer::new();
1728        let result = scorer.score("set", "Settings");
1729        assert_eq!(result.match_type, MatchType::Prefix);
1730        assert!(result.score > 0.85, "Prefix match should score > 0.85");
1731    }
1732
1733    #[test]
1734    fn word_start_match_score() {
1735        let scorer = BayesianScorer::new();
1736        let result = scorer.score("gd", "Go Dashboard");
1737        assert_eq!(result.match_type, MatchType::WordStart);
1738        assert!(result.score > 0.75, "Word-start should score > 0.75");
1739    }
1740
1741    #[test]
1742    fn substring_match_moderate_score() {
1743        let scorer = BayesianScorer::new();
1744        let result = scorer.score("set", "Asset Manager");
1745        assert_eq!(result.match_type, MatchType::Substring);
1746        assert!(result.score > 0.5, "Substring should score > 0.5");
1747    }
1748
1749    #[test]
1750    fn fuzzy_match_low_score() {
1751        let scorer = BayesianScorer::new();
1752        let result = scorer.score("stg", "Settings");
1753        assert_eq!(result.match_type, MatchType::Fuzzy);
1754        assert!(result.score > 0.2, "Fuzzy should score > 0.2");
1755        assert!(result.score < 0.7, "Fuzzy should score < 0.7");
1756    }
1757
1758    #[test]
1759    fn no_match_returns_zero() {
1760        let scorer = BayesianScorer::new();
1761        let result = scorer.score("xyz", "Settings");
1762        assert_eq!(result.match_type, MatchType::NoMatch);
1763        assert_eq!(result.score, 0.0);
1764    }
1765
1766    // --- Internal ASCII Matcher Edge Cases ---
1767
1768    #[test]
1769    fn word_start_match_ascii_empty_query_matches() {
1770        let scorer = BayesianScorer::new();
1771        let positions = scorer
1772            .word_start_match_ascii(b"", b"go dashboard", None)
1773            .expect("empty query should match");
1774        assert!(positions.is_empty());
1775    }
1776
1777    #[test]
1778    fn word_start_match_ascii_skips_out_of_bounds_precomputed_positions() {
1779        let scorer = BayesianScorer::new();
1780        let starts = [999, 0, 3];
1781        let positions = scorer
1782            .word_start_match_ascii(b"gd", b"go dashboard", Some(&starts))
1783            .expect("should still match despite out-of-bounds precomputed starts");
1784        assert_eq!(positions, vec![0, 3]);
1785    }
1786
1787    #[test]
1788    fn fuzzy_match_ascii_empty_query_matches() {
1789        let scorer = BayesianScorer::new();
1790        let positions = scorer
1791            .fuzzy_match_ascii(b"", b"settings")
1792            .expect("empty query should match");
1793        assert!(positions.is_empty());
1794    }
1795
1796    // --- Score Invariants ---
1797
1798    #[test]
1799    fn score_bounded() {
1800        let scorer = BayesianScorer::new();
1801        let test_cases = [
1802            ("a", "abcdefghijklmnop"),
1803            ("full", "full"),
1804            ("", "anything"),
1805            ("xyz", "abc"),
1806            ("stg", "Settings"),
1807        ];
1808
1809        for (query, title) in test_cases {
1810            let result = scorer.score(query, title);
1811            assert!(
1812                result.score >= 0.0 && result.score <= 1.0,
1813                "Score for ({}, {}) = {} not in [0, 1]",
1814                query,
1815                title,
1816                result.score
1817            );
1818        }
1819    }
1820
1821    #[test]
1822    fn score_deterministic() {
1823        let scorer = BayesianScorer::new();
1824        let result1 = scorer.score("nav", "Navigation");
1825        let result2 = scorer.score("nav", "Navigation");
1826        assert!(
1827            (result1.score - result2.score).abs() < f64::EPSILON,
1828            "Same input should produce identical scores"
1829        );
1830    }
1831
1832    #[test]
1833    fn tiebreak_shorter_first() {
1834        let scorer = BayesianScorer::new();
1835        let short = scorer.score("set", "Set");
1836        let long = scorer.score("set", "Settings");
1837        assert!(
1838            short.score >= long.score,
1839            "Shorter title should score >= longer: {} vs {}",
1840            short.score,
1841            long.score
1842        );
1843    }
1844
1845    // --- Case Insensitivity ---
1846
1847    #[test]
1848    fn case_insensitive() {
1849        let scorer = BayesianScorer::new();
1850        let lower = scorer.score("set", "Settings");
1851        let upper = scorer.score("SET", "Settings");
1852        assert!(
1853            (lower.score - upper.score).abs() < f64::EPSILON,
1854            "Case should not affect score"
1855        );
1856    }
1857
1858    // --- Match Positions ---
1859
1860    #[test]
1861    fn match_positions_correct() {
1862        let scorer = BayesianScorer::new();
1863        let result = scorer.score("gd", "Go Dashboard");
1864        assert_eq!(result.match_positions, vec![0, 3]);
1865    }
1866
1867    #[test]
1868    fn fuzzy_match_positions() {
1869        let scorer = BayesianScorer::new();
1870        let result = scorer.score("stg", "Settings");
1871        // s(0), t(3), g(6)
1872        assert_eq!(result.match_positions.len(), 3);
1873        assert_eq!(result.match_positions[0], 0); // 's'
1874    }
1875
1876    // --- Empty Query ---
1877
1878    #[test]
1879    fn empty_query_returns_all() {
1880        let scorer = BayesianScorer::new();
1881        let result = scorer.score("", "Any Command");
1882        assert!(result.score > 0.0, "Empty query should have positive score");
1883        assert!(result.score < 1.0, "Empty query should not be max score");
1884    }
1885
1886    #[test]
1887    fn empty_title_is_safe() {
1888        let scorer = BayesianScorer::new();
1889        let result = scorer.score("x", "");
1890        assert_eq!(result.match_type, MatchType::NoMatch);
1891        assert!(result.score.is_finite(), "Score should remain finite");
1892    }
1893
1894    #[test]
1895    fn empty_query_empty_title_is_finite() {
1896        let scorer = BayesianScorer::new();
1897        let result = scorer.score("", "");
1898        assert!(result.score.is_finite(), "Score should remain finite");
1899        assert_eq!(result.match_type, MatchType::Fuzzy);
1900    }
1901
1902    // --- Query Longer Than Title ---
1903
1904    #[test]
1905    fn query_longer_than_title() {
1906        let scorer = BayesianScorer::new();
1907        let result = scorer.score("verylongquery", "short");
1908        assert_eq!(result.match_type, MatchType::NoMatch);
1909        assert_eq!(result.score, 0.0);
1910    }
1911
1912    #[test]
1913    fn long_query_exact_match_is_handled() {
1914        let scorer = BayesianScorer::new();
1915        let query = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
1916        let result = scorer.score(query, query);
1917        assert_eq!(result.match_type, MatchType::Exact);
1918        assert!(result.score.is_finite());
1919        assert!(result.score > 0.9, "Exact long query should score high");
1920    }
1921
1922    #[test]
1923    fn gap_penalty_prefers_tight_fuzzy_matches() {
1924        let scorer = BayesianScorer::new();
1925        let tight = scorer.score("ace", "abcde");
1926        let gappy = scorer.score("ace", "a...c...e");
1927        assert_eq!(tight.match_type, MatchType::Fuzzy);
1928        assert_eq!(gappy.match_type, MatchType::Fuzzy);
1929        assert!(
1930            tight.score > gappy.score,
1931            "Tight fuzzy match should score higher than gappy: {} vs {}",
1932            tight.score,
1933            gappy.score
1934        );
1935    }
1936
1937    // --- Tag Matching ---
1938
1939    #[test]
1940    fn tag_match_boosts_score() {
1941        let scorer = BayesianScorer::new();
1942        // Use a query that matches the title (fuzzy)
1943        let without_tag = scorer.score("set", "Settings");
1944        let with_tag = scorer.score_with_tags("set", "Settings", &["config", "setup"]);
1945        // Tag "setup" contains "set", so it should boost the score
1946        assert!(
1947            with_tag.score > without_tag.score,
1948            "Tag match should boost score: {} > {}",
1949            with_tag.score,
1950            without_tag.score
1951        );
1952    }
1953
1954    // --- Evidence Ledger ---
1955
1956    #[test]
1957    fn evidence_ledger_tracks_factors() {
1958        let scorer = BayesianScorer::new();
1959        let result = scorer.score("set", "Settings");
1960
1961        assert!(!result.evidence.entries().is_empty());
1962
1963        // Should have match type entry
1964        assert!(
1965            result
1966                .evidence
1967                .entries()
1968                .iter()
1969                .any(|e| e.kind == EvidenceKind::MatchType)
1970        );
1971    }
1972
1973    #[test]
1974    fn evidence_ledger_display() {
1975        let scorer = BayesianScorer::new();
1976        let result = scorer.score("gd", "Go Dashboard");
1977        let display = format!("{}", result.evidence);
1978        assert!(display.contains("Evidence Ledger"));
1979        assert!(display.contains("Posterior P:"));
1980    }
1981
1982    // --- Property Tests ---
1983
1984    #[test]
1985    fn property_ordering_total() {
1986        let scorer = BayesianScorer::new();
1987        let titles = ["Settings", "Set Theme", "Reset View", "Asset"];
1988
1989        let mut scores: Vec<(f64, &str)> = titles
1990            .iter()
1991            .map(|t| (scorer.score("set", t).score, *t))
1992            .collect();
1993
1994        // Sort should be stable and total
1995        scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
1996
1997        // Verify no NaN or infinite scores
1998        for (score, _) in &scores {
1999            assert!(score.is_finite());
2000        }
2001    }
2002
2003    #[test]
2004    fn property_prefix_monotonic() {
2005        let scorer = BayesianScorer::new();
2006        // Longer exact prefix match should score higher
2007        let one_char = scorer.score("s", "Settings");
2008        let three_char = scorer.score("set", "Settings");
2009        // Both are prefix matches, longer should be better
2010        assert!(
2011            three_char.score >= one_char.score,
2012            "Longer prefix should score >= shorter"
2013        );
2014    }
2015
2016    // --- Match Type Prior Odds ---
2017
2018    #[test]
2019    fn match_type_prior_ordering() {
2020        assert!(MatchType::Exact.prior_odds() > MatchType::Prefix.prior_odds());
2021        assert!(MatchType::Prefix.prior_odds() > MatchType::WordStart.prior_odds());
2022        assert!(MatchType::WordStart.prior_odds() > MatchType::Substring.prior_odds());
2023        assert!(MatchType::Substring.prior_odds() > MatchType::Fuzzy.prior_odds());
2024        assert!(MatchType::Fuzzy.prior_odds() > MatchType::NoMatch.prior_odds());
2025    }
2026
2027    // -----------------------------------------------------------------------
2028    // Conformal Ranker Tests
2029    // -----------------------------------------------------------------------
2030
2031    #[test]
2032    fn conformal_tie_breaks_by_match_type() {
2033        let mut exact = MatchResult::no_match();
2034        exact.score = 0.5;
2035        exact.match_type = MatchType::Exact;
2036
2037        let mut prefix = MatchResult::no_match();
2038        prefix.score = 0.5;
2039        prefix.match_type = MatchType::Prefix;
2040
2041        let mut word_start = MatchResult::no_match();
2042        word_start.score = 0.5;
2043        word_start.match_type = MatchType::WordStart;
2044
2045        let mut substring = MatchResult::no_match();
2046        substring.score = 0.5;
2047        substring.match_type = MatchType::Substring;
2048
2049        let ranker = ConformalRanker::new();
2050        let ranked = ranker.rank(vec![substring, word_start, prefix, exact]);
2051        let order: Vec<MatchType> = ranked
2052            .items
2053            .iter()
2054            .map(|item| item.result.match_type)
2055            .collect();
2056
2057        assert_eq!(
2058            order,
2059            vec![
2060                MatchType::Exact,
2061                MatchType::Prefix,
2062                MatchType::WordStart,
2063                MatchType::Substring
2064            ]
2065        );
2066    }
2067
2068    #[test]
2069    fn conformal_empty_input() {
2070        let ranker = ConformalRanker::new();
2071        let ranked = ranker.rank(Vec::new());
2072        assert_eq!(ranked.items.len(), 0);
2073        assert_eq!(ranked.summary.count, 0);
2074        assert_eq!(ranked.summary.stable_count, 0);
2075        assert_eq!(ranked.summary.tie_group_count, 0);
2076        assert_eq!(ranked.summary.median_gap, 0.0);
2077    }
2078
2079    #[test]
2080    fn conformal_single_item() {
2081        let scorer = BayesianScorer::new();
2082        let results = vec![scorer.score("set", "Settings")];
2083        let ranker = ConformalRanker::new();
2084        let ranked = ranker.rank(results);
2085
2086        assert_eq!(ranked.items.len(), 1);
2087        assert_eq!(ranked.items[0].rank_confidence.confidence, 1.0);
2088        assert_eq!(ranked.summary.count, 1);
2089    }
2090
2091    #[test]
2092    fn conformal_sorted_descending() {
2093        let scorer = BayesianScorer::new();
2094        let results = vec![
2095            scorer.score("set", "Settings"),
2096            scorer.score("set", "Asset Manager"),
2097            scorer.score("set", "Reset Configuration Panel"),
2098        ];
2099        let ranker = ConformalRanker::new();
2100        let ranked = ranker.rank(results);
2101
2102        // Verify descending score order.
2103        for w in ranked.items.windows(2) {
2104            let a = w[0].result.score;
2105            let b = w[1].result.score;
2106            assert!(a >= b, "Items should be sorted descending: {a} >= {b}");
2107        }
2108    }
2109
2110    #[test]
2111    fn conformal_confidence_bounded() {
2112        let scorer = BayesianScorer::new();
2113        let titles = [
2114            "Settings",
2115            "Set Theme",
2116            "Asset Manager",
2117            "Reset View",
2118            "Offset Tool",
2119            "Test Suite",
2120        ];
2121        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2122        let ranker = ConformalRanker::new();
2123        let ranked = ranker.rank(results);
2124
2125        for item in &ranked.items {
2126            assert!(
2127                item.rank_confidence.confidence >= 0.0 && item.rank_confidence.confidence <= 1.0,
2128                "Confidence must be in [0, 1], got {}",
2129                item.rank_confidence.confidence
2130            );
2131            assert!(
2132                item.rank_confidence.gap_to_next >= 0.0,
2133                "Gap must be non-negative"
2134            );
2135        }
2136    }
2137
2138    #[test]
2139    fn conformal_deterministic() {
2140        let scorer = BayesianScorer::new();
2141        let titles = ["Settings", "Set Theme", "Asset", "Reset"];
2142
2143        let results1: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2144        let results2: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2145
2146        let ranker = ConformalRanker::new();
2147        let ranked1 = ranker.rank(results1);
2148        let ranked2 = ranker.rank(results2);
2149
2150        for (a, b) in ranked1.items.iter().zip(ranked2.items.iter()) {
2151            assert!(
2152                (a.rank_confidence.confidence - b.rank_confidence.confidence).abs() < f64::EPSILON,
2153                "Confidence should be deterministic"
2154            );
2155            assert_eq!(
2156                a.original_index, b.original_index,
2157                "Rank order should be deterministic"
2158            );
2159        }
2160    }
2161
2162    #[test]
2163    fn conformal_ties_detected() {
2164        // Create items with identical scores.
2165        let mut r1 = MatchResult::no_match();
2166        r1.score = 0.8;
2167        r1.match_type = MatchType::Prefix;
2168        let mut r2 = MatchResult::no_match();
2169        r2.score = 0.8;
2170        r2.match_type = MatchType::Prefix;
2171        let mut r3 = MatchResult::no_match();
2172        r3.score = 0.5;
2173        r3.match_type = MatchType::Substring;
2174
2175        let ranker = ConformalRanker::new();
2176        let ranked = ranker.rank(vec![r1, r2, r3]);
2177
2178        // The first two have identical scores — their gap is 0 → unstable.
2179        assert_eq!(
2180            ranked.items[0].rank_confidence.stability,
2181            RankStability::Unstable,
2182            "Tied items should be Unstable"
2183        );
2184        assert!(
2185            ranked.summary.tie_group_count >= 1,
2186            "Should detect at least one tie group"
2187        );
2188    }
2189
2190    #[test]
2191    fn conformal_all_identical_scores() {
2192        let mut results = Vec::new();
2193        for _ in 0..5 {
2194            let mut r = MatchResult::no_match();
2195            r.score = 0.5;
2196            r.match_type = MatchType::Fuzzy;
2197            results.push(r);
2198        }
2199
2200        let ranker = ConformalRanker::new();
2201        let ranked = ranker.rank(results);
2202
2203        // All gaps are zero → all unstable.
2204        for item in &ranked.items {
2205            assert_eq!(item.rank_confidence.stability, RankStability::Unstable);
2206        }
2207    }
2208
2209    #[test]
2210    fn conformal_well_separated_scores_are_stable() {
2211        let mut results = Vec::new();
2212        // Scores well spread out: 0.9, 0.6, 0.3, 0.1.
2213        for &s in &[0.9, 0.6, 0.3, 0.1] {
2214            let mut r = MatchResult::no_match();
2215            r.score = s;
2216            r.match_type = MatchType::Prefix;
2217            results.push(r);
2218        }
2219
2220        let ranker = ConformalRanker::new();
2221        let ranked = ranker.rank(results);
2222
2223        // With well-separated scores, most should be stable.
2224        assert!(
2225            ranked.summary.stable_count >= 2,
2226            "Well-separated scores should yield stable positions, got {}",
2227            ranked.summary.stable_count
2228        );
2229    }
2230
2231    #[test]
2232    fn conformal_top_k_truncates() {
2233        let scorer = BayesianScorer::new();
2234        let titles = [
2235            "Settings",
2236            "Set Theme",
2237            "Asset Manager",
2238            "Reset View",
2239            "Offset Tool",
2240        ];
2241        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2242
2243        let ranker = ConformalRanker::new();
2244        let ranked = ranker.rank_top_k(results, 2);
2245
2246        assert_eq!(ranked.items.len(), 2);
2247        // Top-k items should still have confidence from the full ranking.
2248        assert!(ranked.items[0].rank_confidence.confidence > 0.0);
2249    }
2250
2251    #[test]
2252    fn conformal_original_indices_preserved() {
2253        let scorer = BayesianScorer::new();
2254        let titles = ["Zebra Tool", "Settings", "Apple"];
2255        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2256
2257        let ranker = ConformalRanker::new();
2258        let ranked = ranker.rank(results);
2259
2260        // "Settings" (index 1) should be ranked first (prefix match).
2261        assert_eq!(
2262            ranked.items[0].original_index, 1,
2263            "Settings should be first; original_index should be 1"
2264        );
2265    }
2266
2267    #[test]
2268    fn conformal_summary_display() {
2269        let scorer = BayesianScorer::new();
2270        let results = vec![
2271            scorer.score("set", "Settings"),
2272            scorer.score("set", "Set Theme"),
2273        ];
2274        let ranker = ConformalRanker::new();
2275        let ranked = ranker.rank(results);
2276
2277        let display = format!("{}", ranked.summary);
2278        assert!(display.contains("2 items"));
2279    }
2280
2281    #[test]
2282    fn conformal_rank_confidence_display() {
2283        let rc = RankConfidence {
2284            confidence: 0.85,
2285            gap_to_next: 0.1234,
2286            stability: RankStability::Stable,
2287        };
2288        let display = format!("{}", rc);
2289        assert!(display.contains("0.850"));
2290        assert!(display.contains("stable"));
2291    }
2292
2293    #[test]
2294    fn conformal_stability_labels() {
2295        assert_eq!(RankStability::Stable.label(), "stable");
2296        assert_eq!(RankStability::Marginal.label(), "marginal");
2297        assert_eq!(RankStability::Unstable.label(), "unstable");
2298    }
2299
2300    // --- Property: gap_to_next of last item is always 0 ---
2301
2302    #[test]
2303    fn conformal_last_item_gap_zero() {
2304        let scorer = BayesianScorer::new();
2305        let results = vec![
2306            scorer.score("set", "Settings"),
2307            scorer.score("set", "Asset"),
2308            scorer.score("set", "Reset"),
2309        ];
2310        let ranker = ConformalRanker::new();
2311        let ranked = ranker.rank(results);
2312
2313        let last = ranked.items.last().unwrap();
2314        assert_eq!(
2315            last.rank_confidence.gap_to_next, 0.0,
2316            "Last item gap should be 0"
2317        );
2318    }
2319
2320    // --- Property: median_gap is non-negative ---
2321
2322    #[test]
2323    fn conformal_median_gap_non_negative() {
2324        let scorer = BayesianScorer::new();
2325        let titles = [
2326            "Settings",
2327            "Set Theme",
2328            "Asset Manager",
2329            "Reset View",
2330            "Offset Tool",
2331            "Test Suite",
2332            "System Settings",
2333            "Reset Defaults",
2334        ];
2335        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2336        let ranker = ConformalRanker::new();
2337        let ranked = ranker.rank(results);
2338
2339        assert!(
2340            ranked.summary.median_gap >= 0.0,
2341            "Median gap must be non-negative"
2342        );
2343    }
2344
2345    // -----------------------------------------------------------------------
2346    // IncrementalScorer Tests (bd-39y4.13)
2347    // -----------------------------------------------------------------------
2348
2349    fn test_corpus() -> Vec<&'static str> {
2350        vec![
2351            "Open File",
2352            "Save File",
2353            "Close Tab",
2354            "Git: Commit",
2355            "Git: Push",
2356            "Git: Pull",
2357            "Go to Line",
2358            "Find in Files",
2359            "Toggle Terminal",
2360            "Format Document",
2361        ]
2362    }
2363
2364    #[test]
2365    fn incremental_full_scan_on_first_call() {
2366        let mut scorer = IncrementalScorer::new();
2367        let corpus = test_corpus();
2368        let results = scorer.score_corpus("git", &corpus, None);
2369
2370        assert!(!results.is_empty(), "Should find matches for 'git'");
2371        assert_eq!(scorer.stats().full_scans, 1);
2372        assert_eq!(scorer.stats().incremental_scans, 0);
2373    }
2374
2375    #[test]
2376    fn incremental_prunes_on_query_extension() {
2377        let mut scorer = IncrementalScorer::new();
2378        let corpus = test_corpus();
2379
2380        // First query: "g" matches many items
2381        let r1 = scorer.score_corpus("g", &corpus, None);
2382        assert_eq!(scorer.stats().full_scans, 1);
2383
2384        // Extended query: "gi" — should use incremental path
2385        let r2 = scorer.score_corpus("gi", &corpus, None);
2386        assert_eq!(scorer.stats().incremental_scans, 1);
2387        assert!(r2.len() <= r1.len(), "Extended query should match <= items");
2388
2389        // Further extension: "git" — still incremental
2390        let r3 = scorer.score_corpus("git", &corpus, None);
2391        assert_eq!(scorer.stats().incremental_scans, 2);
2392        assert!(r3.len() <= r2.len());
2393    }
2394
2395    #[test]
2396    fn incremental_correctness_matches_full_scan() {
2397        let corpus = test_corpus();
2398
2399        // Incremental path
2400        let mut inc = IncrementalScorer::new();
2401        inc.score_corpus("g", &corpus, None);
2402        let inc_results = inc.score_corpus("git", &corpus, None);
2403
2404        // Full scan path (fresh scorer, no cache)
2405        let mut full = IncrementalScorer::new();
2406        let full_results = full.score_corpus("git", &corpus, None);
2407
2408        // Results should be identical.
2409        assert_eq!(
2410            inc_results.len(),
2411            full_results.len(),
2412            "Incremental and full scan should return same count"
2413        );
2414
2415        for (a, b) in inc_results.iter().zip(full_results.iter()) {
2416            assert_eq!(a.0, b.0, "Same corpus indices");
2417            assert!(
2418                (a.1.score - b.1.score).abs() < f64::EPSILON,
2419                "Same scores: {} vs {}",
2420                a.1.score,
2421                b.1.score
2422            );
2423        }
2424    }
2425
2426    #[test]
2427    fn incremental_falls_back_on_non_extension() {
2428        let mut scorer = IncrementalScorer::new();
2429        let corpus = test_corpus();
2430
2431        scorer.score_corpus("git", &corpus, None);
2432        assert_eq!(scorer.stats().full_scans, 1);
2433
2434        // "fi" doesn't extend "git" — must full scan
2435        scorer.score_corpus("fi", &corpus, None);
2436        assert_eq!(scorer.stats().full_scans, 2);
2437    }
2438
2439    #[test]
2440    fn incremental_invalidate_forces_full_scan() {
2441        let mut scorer = IncrementalScorer::new();
2442        let corpus = test_corpus();
2443
2444        scorer.score_corpus("g", &corpus, None);
2445        scorer.invalidate();
2446
2447        // Even though "gi" extends "g", cache was cleared
2448        scorer.score_corpus("gi", &corpus, None);
2449        assert_eq!(scorer.stats().full_scans, 2);
2450        assert_eq!(scorer.stats().incremental_scans, 0);
2451    }
2452
2453    #[test]
2454    fn incremental_generation_change_invalidates() {
2455        let mut scorer = IncrementalScorer::new();
2456        let corpus = test_corpus();
2457
2458        scorer.score_corpus("g", &corpus, Some(1));
2459
2460        // Generation changed — cache invalid
2461        scorer.score_corpus("gi", &corpus, Some(2));
2462        assert_eq!(scorer.stats().full_scans, 2);
2463    }
2464
2465    #[test]
2466    fn incremental_corpus_size_change_invalidates() {
2467        let mut scorer = IncrementalScorer::new();
2468        let corpus1 = test_corpus();
2469        let corpus2 = &corpus1[..5];
2470
2471        scorer.score_corpus("g", &corpus1, None);
2472        scorer.score_corpus("gi", corpus2, None);
2473        // Corpus size changed → full scan
2474        assert_eq!(scorer.stats().full_scans, 2);
2475    }
2476
2477    #[test]
2478    fn incremental_skips_out_of_bounds_cache_entries() {
2479        let mut scorer = IncrementalScorer::new();
2480
2481        // Simulate a cache produced for a larger corpus. The scorer should safely
2482        // skip entries that are out of bounds for the current corpus.
2483        scorer.cache = vec![
2484            CachedEntry { corpus_index: 0 },
2485            CachedEntry { corpus_index: 999 },
2486        ];
2487
2488        let corpus = ["a"];
2489        let results = scorer.score_incremental("a", "a", &corpus);
2490        assert_eq!(results.len(), 1);
2491        assert_eq!(results[0].0, 0);
2492
2493        let corpus_lower = ["a"];
2494        let results = scorer.score_incremental_lowered("a", "a", &corpus, &corpus_lower);
2495        assert_eq!(results.len(), 1);
2496        assert_eq!(results[0].0, 0);
2497
2498        let corpus_s = vec!["a".to_string()];
2499        let corpus_lower_s = vec!["a".to_string()];
2500        let word_starts = vec![vec![0]];
2501        let results = scorer.score_incremental_lowered_with_words(
2502            "a",
2503            "a",
2504            &corpus_s,
2505            &corpus_lower_s,
2506            &word_starts,
2507        );
2508        assert_eq!(results.len(), 1);
2509        assert_eq!(results[0].0, 0);
2510    }
2511
2512    #[test]
2513    fn incremental_empty_query() {
2514        let mut scorer = IncrementalScorer::new();
2515        let corpus = test_corpus();
2516
2517        let results = scorer.score_corpus("", &corpus, None);
2518        // Empty query matches everything (with weak scores)
2519        assert_eq!(results.len(), corpus.len());
2520    }
2521
2522    #[test]
2523    fn incremental_no_match_query() {
2524        let mut scorer = IncrementalScorer::new();
2525        let corpus = test_corpus();
2526
2527        let results = scorer.score_corpus("zzz", &corpus, None);
2528        assert!(results.is_empty());
2529    }
2530
2531    #[test]
2532    fn incremental_stats_prune_ratio() {
2533        let mut scorer = IncrementalScorer::new();
2534        let corpus = test_corpus();
2535
2536        scorer.score_corpus("g", &corpus, None);
2537        scorer.score_corpus("gi", &corpus, None);
2538        scorer.score_corpus("git", &corpus, None);
2539
2540        let stats = scorer.stats();
2541        assert!(
2542            stats.prune_ratio() > 0.0,
2543            "Prune ratio should be > 0 after incremental scans"
2544        );
2545        assert!(stats.total_pruned > 0, "Should have pruned some items");
2546    }
2547
2548    #[test]
2549    fn incremental_results_sorted_descending() {
2550        let mut scorer = IncrementalScorer::new();
2551        let corpus = test_corpus();
2552
2553        let results = scorer.score_corpus("o", &corpus, None);
2554        for w in results.windows(2) {
2555            let a = w[0].1.score;
2556            let b = w[1].1.score;
2557            assert!(a >= b, "Results should be sorted descending: {a} >= {b}");
2558        }
2559    }
2560
2561    #[test]
2562    fn incremental_lowered_matches_full() {
2563        let corpus = vec![
2564            "Open File".to_string(),
2565            "Save File".to_string(),
2566            "Close".to_string(),
2567            "Launch 🚀".to_string(),
2568        ];
2569        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
2570        let lower: Vec<String> = corpus.iter().map(|s| s.to_lowercase()).collect();
2571        let word_starts: Vec<Vec<usize>> = lower
2572            .iter()
2573            .map(|title_lower| {
2574                let bytes = title_lower.as_bytes();
2575                title_lower
2576                    .char_indices()
2577                    .filter_map(|(i, _)| {
2578                        let is_word_start = i == 0 || {
2579                            let prev = bytes.get(i.saturating_sub(1)).copied().unwrap_or(b' ');
2580                            prev == b' ' || prev == b'-' || prev == b'_'
2581                        };
2582                        is_word_start.then_some(i)
2583                    })
2584                    .collect()
2585            })
2586            .collect();
2587
2588        let mut full = IncrementalScorer::new();
2589        let mut lowered = IncrementalScorer::new();
2590
2591        let a = full.score_corpus("fi", &corpus_refs, None);
2592        let b =
2593            lowered.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
2594
2595        assert_eq!(a.len(), b.len());
2596        for ((ia, ra), (ib, rb)) in a.iter().zip(b.iter()) {
2597            assert_eq!(ia, ib);
2598            assert_eq!(ra.match_type, rb.match_type);
2599            assert_eq!(ra.match_positions, rb.match_positions);
2600            assert!(
2601                (ra.score - rb.score).abs() < 1e-12,
2602                "score mismatch: {} vs {}",
2603                ra.score,
2604                rb.score
2605            );
2606        }
2607    }
2608
2609    #[test]
2610    fn incremental_deterministic() {
2611        let corpus = test_corpus();
2612
2613        let mut s1 = IncrementalScorer::new();
2614        let r1 = s1.score_corpus("git", &corpus, None);
2615
2616        let mut s2 = IncrementalScorer::new();
2617        let r2 = s2.score_corpus("git", &corpus, None);
2618
2619        assert_eq!(r1.len(), r2.len());
2620        for (a, b) in r1.iter().zip(r2.iter()) {
2621            assert_eq!(a.0, b.0);
2622            assert!((a.1.score - b.1.score).abs() < f64::EPSILON);
2623        }
2624    }
2625
2626    // -----------------------------------------------------------------------
2627    // MatchType::description coverage (bd-z1c65)
2628    // -----------------------------------------------------------------------
2629
2630    #[test]
2631    fn match_type_descriptions() {
2632        assert_eq!(MatchType::Exact.description(), "exact match");
2633        assert_eq!(MatchType::Prefix.description(), "prefix match");
2634        assert_eq!(MatchType::WordStart.description(), "word-start match");
2635        assert_eq!(MatchType::Substring.description(), "contiguous substring");
2636        assert_eq!(MatchType::Fuzzy.description(), "fuzzy match");
2637        assert_eq!(MatchType::NoMatch.description(), "no match");
2638    }
2639
2640    #[test]
2641    fn match_type_no_match_prior_is_zero() {
2642        assert_eq!(MatchType::NoMatch.prior_odds(), 0.0);
2643    }
2644
2645    // -----------------------------------------------------------------------
2646    // EvidenceDescription Display variants (bd-z1c65)
2647    // -----------------------------------------------------------------------
2648
2649    #[test]
2650    fn evidence_description_static_display() {
2651        let d = EvidenceDescription::Static("test message");
2652        assert_eq!(format!("{d}"), "test message");
2653    }
2654
2655    #[test]
2656    fn evidence_description_title_length_display() {
2657        let d = EvidenceDescription::TitleLengthChars { len: 42 };
2658        assert_eq!(format!("{d}"), "title length 42 chars");
2659    }
2660
2661    #[test]
2662    fn evidence_description_first_match_pos_display() {
2663        let d = EvidenceDescription::FirstMatchPos { pos: 5 };
2664        assert_eq!(format!("{d}"), "first match at position 5");
2665    }
2666
2667    #[test]
2668    fn evidence_description_word_boundary_count_display() {
2669        let d = EvidenceDescription::WordBoundaryCount { count: 3 };
2670        assert_eq!(format!("{d}"), "3 word boundary matches");
2671    }
2672
2673    #[test]
2674    fn evidence_description_gap_total_display() {
2675        let d = EvidenceDescription::GapTotal { total: 7 };
2676        assert_eq!(format!("{d}"), "total gap of 7 characters");
2677    }
2678
2679    #[test]
2680    fn evidence_description_coverage_percent_display() {
2681        let d = EvidenceDescription::CoveragePercent { percent: 83.5 };
2682        let s = format!("{d}");
2683        assert!(s.contains("84%"), "Should round: {s}");
2684    }
2685
2686    // -----------------------------------------------------------------------
2687    // EvidenceEntry Display (bd-z1c65)
2688    // -----------------------------------------------------------------------
2689
2690    #[test]
2691    fn evidence_entry_display_supports() {
2692        let entry = EvidenceEntry {
2693            kind: EvidenceKind::Position,
2694            bayes_factor: 1.5,
2695            description: EvidenceDescription::FirstMatchPos { pos: 0 },
2696        };
2697        let s = format!("{entry}");
2698        assert!(s.contains("supports"));
2699        assert!(s.contains("Position"));
2700    }
2701
2702    #[test]
2703    fn evidence_entry_display_opposes() {
2704        let entry = EvidenceEntry {
2705            kind: EvidenceKind::GapPenalty,
2706            bayes_factor: 0.5,
2707            description: EvidenceDescription::GapTotal { total: 10 },
2708        };
2709        let s = format!("{entry}");
2710        assert!(s.contains("opposes"));
2711    }
2712
2713    #[test]
2714    fn evidence_entry_display_neutral() {
2715        let entry = EvidenceEntry {
2716            kind: EvidenceKind::TitleLength,
2717            bayes_factor: 1.0,
2718            description: EvidenceDescription::Static("neutral factor"),
2719        };
2720        let s = format!("{entry}");
2721        assert!(s.contains("neutral"));
2722    }
2723
2724    // -----------------------------------------------------------------------
2725    // EvidenceLedger direct method tests (bd-z1c65)
2726    // -----------------------------------------------------------------------
2727
2728    #[test]
2729    fn ledger_combined_bayes_factor() {
2730        let mut ledger = EvidenceLedger::new();
2731        ledger.add(
2732            EvidenceKind::Position,
2733            2.0,
2734            EvidenceDescription::Static("a"),
2735        );
2736        ledger.add(
2737            EvidenceKind::WordBoundary,
2738            3.0,
2739            EvidenceDescription::Static("b"),
2740        );
2741        assert!((ledger.combined_bayes_factor() - 6.0).abs() < f64::EPSILON);
2742    }
2743
2744    #[test]
2745    fn ledger_combined_bayes_factor_empty() {
2746        let ledger = EvidenceLedger::new();
2747        assert!((ledger.combined_bayes_factor() - 1.0).abs() < f64::EPSILON);
2748    }
2749
2750    #[test]
2751    fn ledger_prior_odds_present() {
2752        let mut ledger = EvidenceLedger::new();
2753        ledger.add(
2754            EvidenceKind::MatchType,
2755            9.0,
2756            EvidenceDescription::Static("prefix"),
2757        );
2758        assert_eq!(ledger.prior_odds(), Some(9.0));
2759    }
2760
2761    #[test]
2762    fn ledger_prior_odds_absent() {
2763        let ledger = EvidenceLedger::new();
2764        assert_eq!(ledger.prior_odds(), None);
2765    }
2766
2767    #[test]
2768    fn ledger_posterior_probability_no_prior() {
2769        let mut ledger = EvidenceLedger::new();
2770        ledger.add(
2771            EvidenceKind::Position,
2772            2.0,
2773            EvidenceDescription::Static("pos"),
2774        );
2775        // No MatchType entry → prior defaults to 1.0
2776        // posterior_odds = 1.0 * 2.0 = 2.0 → prob = 2/3
2777        let prob = ledger.posterior_probability();
2778        assert!((prob - 2.0 / 3.0).abs() < 1e-9);
2779    }
2780
2781    #[test]
2782    fn ledger_posterior_probability_infinite_odds() {
2783        let mut ledger = EvidenceLedger::new();
2784        ledger.add(
2785            EvidenceKind::MatchType,
2786            f64::INFINITY,
2787            EvidenceDescription::Static("inf"),
2788        );
2789        assert_eq!(ledger.posterior_probability(), 1.0);
2790    }
2791
2792    // -----------------------------------------------------------------------
2793    // BayesianScorer::fast() path (no evidence tracking) (bd-z1c65)
2794    // -----------------------------------------------------------------------
2795
2796    #[test]
2797    fn fast_scorer_no_evidence() {
2798        let scorer = BayesianScorer::fast();
2799        let result = scorer.score("set", "Settings");
2800        assert!(result.score > 0.0);
2801        assert!(result.evidence.entries().is_empty());
2802    }
2803
2804    #[test]
2805    fn fast_scorer_score_matches_probability_range() {
2806        let scorer = BayesianScorer::fast();
2807        let result = scorer.score("set", "Settings");
2808        assert!(result.score >= 0.0 && result.score <= 1.0);
2809    }
2810
2811    #[test]
2812    fn fast_scorer_empty_query() {
2813        let scorer = BayesianScorer::fast();
2814        let result = scorer.score("", "Test");
2815        assert!(result.score > 0.0);
2816        assert!(result.evidence.entries().is_empty());
2817    }
2818
2819    #[test]
2820    fn fast_scorer_fuzzy_with_gap_penalty() {
2821        let scorer = BayesianScorer::fast();
2822        let result = scorer.score("ace", "a...b...c...d...e");
2823        assert_eq!(result.match_type, MatchType::Fuzzy);
2824        assert!(result.score > 0.0);
2825    }
2826
2827    #[test]
2828    fn fast_scorer_tag_boost() {
2829        let scorer = BayesianScorer::fast();
2830        let without = scorer.score("set", "Settings");
2831        let with = scorer.score_with_tags("set", "Settings", &["setup"]);
2832        assert!(with.score > without.score);
2833    }
2834
2835    #[test]
2836    fn fast_scorer_tag_no_boost_at_score_one() {
2837        let scorer = BayesianScorer::fast();
2838        // score_with_tags fast path only boosts when score is in (0, 1)
2839        let result = scorer.score_with_tags("Settings", "Settings", &["settings"]);
2840        assert!(result.score <= 1.0);
2841    }
2842
2843    // -----------------------------------------------------------------------
2844    // Unicode (non-ASCII) matching paths (bd-z1c65)
2845    // -----------------------------------------------------------------------
2846
2847    #[test]
2848    fn unicode_exact_match() {
2849        let scorer = BayesianScorer::new();
2850        let result = scorer.score("日本語", "日本語");
2851        assert_eq!(result.match_type, MatchType::Exact);
2852        assert!(result.score > 0.9);
2853    }
2854
2855    #[test]
2856    fn unicode_prefix_match() {
2857        let scorer = BayesianScorer::new();
2858        let result = scorer.score("日本", "日本語テスト");
2859        assert_eq!(result.match_type, MatchType::Prefix);
2860    }
2861
2862    #[test]
2863    fn unicode_substring_match() {
2864        let scorer = BayesianScorer::new();
2865        let result = scorer.score("本語", "日本語テスト");
2866        assert_eq!(result.match_type, MatchType::Substring);
2867    }
2868
2869    #[test]
2870    fn unicode_fuzzy_match() {
2871        let scorer = BayesianScorer::new();
2872        let result = scorer.score("日テ", "日本語テスト");
2873        assert_eq!(result.match_type, MatchType::Fuzzy);
2874    }
2875
2876    #[test]
2877    fn unicode_no_match() {
2878        let scorer = BayesianScorer::new();
2879        let result = scorer.score("ωψξ", "αβγ");
2880        assert_eq!(result.match_type, MatchType::NoMatch);
2881    }
2882
2883    #[test]
2884    fn unicode_word_start_match() {
2885        let scorer = BayesianScorer::new();
2886        // Word boundaries at space/dash/underscore
2887        let result = scorer.score("gd", "go dashboard");
2888        assert_eq!(result.match_type, MatchType::WordStart);
2889    }
2890
2891    // -----------------------------------------------------------------------
2892    // score_with_query_lower / score_with_lowered_title (bd-z1c65)
2893    // -----------------------------------------------------------------------
2894
2895    #[test]
2896    fn score_with_query_lower_matches_score() {
2897        let scorer = BayesianScorer::new();
2898        let direct = scorer.score("Set", "Settings");
2899        let pre = scorer.score_with_query_lower("Set", "set", "Settings");
2900        assert!((direct.score - pre.score).abs() < f64::EPSILON);
2901        assert_eq!(direct.match_type, pre.match_type);
2902    }
2903
2904    #[test]
2905    fn score_with_lowered_title_matches_score() {
2906        let scorer = BayesianScorer::new();
2907        let direct = scorer.score("set", "Settings");
2908        let pre = scorer.score_with_lowered_title("set", "set", "Settings", "settings");
2909        assert!((direct.score - pre.score).abs() < f64::EPSILON);
2910    }
2911
2912    #[test]
2913    fn score_with_lowered_title_and_words_matches() {
2914        let scorer = BayesianScorer::new();
2915        let direct = scorer.score("gd", "Go Dashboard");
2916        let pre = scorer.score_with_lowered_title_and_words(
2917            "gd",
2918            "gd",
2919            "Go Dashboard",
2920            "go dashboard",
2921            Some(&[0, 3]),
2922        );
2923        assert_eq!(direct.match_type, pre.match_type);
2924        assert!((direct.score - pre.score).abs() < f64::EPSILON);
2925    }
2926
2927    // -----------------------------------------------------------------------
2928    // Tag matching edge cases (bd-z1c65)
2929    // -----------------------------------------------------------------------
2930
2931    #[test]
2932    fn tag_match_no_title_match_returns_nomatch() {
2933        let scorer = BayesianScorer::new();
2934        let result = scorer.score_with_tags("xyz", "Settings", &["xyz"]);
2935        // Tag matches "xyz" but title doesn't match → no boost applied
2936        assert_eq!(result.match_type, MatchType::NoMatch);
2937        assert_eq!(result.score, 0.0);
2938    }
2939
2940    #[test]
2941    fn tag_match_no_matching_tag() {
2942        let scorer = BayesianScorer::new();
2943        let without = scorer.score("set", "Settings");
2944        let with = scorer.score_with_tags("set", "Settings", &["foo", "bar"]);
2945        // No tag matches → score unchanged
2946        assert!((without.score - with.score).abs() < f64::EPSILON);
2947    }
2948
2949    #[test]
2950    fn tag_match_case_insensitive() {
2951        let scorer = BayesianScorer::new();
2952        let result = scorer.score_with_tags("set", "Settings", &["SETUP"]);
2953        let without = scorer.score("set", "Settings");
2954        assert!(result.score > without.score);
2955    }
2956
2957    // -----------------------------------------------------------------------
2958    // MatchResult::no_match() (bd-z1c65)
2959    // -----------------------------------------------------------------------
2960
2961    #[test]
2962    fn no_match_result_has_evidence() {
2963        let r = MatchResult::no_match();
2964        assert_eq!(r.score, 0.0);
2965        assert_eq!(r.match_type, MatchType::NoMatch);
2966        assert!(r.match_positions.is_empty());
2967        assert_eq!(r.evidence.entries().len(), 1);
2968        assert_eq!(r.evidence.entries()[0].kind, EvidenceKind::MatchType);
2969        assert_eq!(r.evidence.entries()[0].bayes_factor, 0.0);
2970    }
2971
2972    // -----------------------------------------------------------------------
2973    // count_word_boundaries / total_gap edge cases (bd-z1c65)
2974    // -----------------------------------------------------------------------
2975
2976    #[test]
2977    fn count_word_boundaries_at_start() {
2978        let scorer = BayesianScorer::new();
2979        // Position 0 is always a word boundary
2980        let count = scorer.count_word_boundaries(&[0], "hello");
2981        assert_eq!(count, 1);
2982    }
2983
2984    #[test]
2985    fn count_word_boundaries_after_separators() {
2986        let scorer = BayesianScorer::new();
2987        // "a-b_c d" → positions 0, 2, 4, 6 are word starts
2988        let count = scorer.count_word_boundaries(&[0, 2, 4, 6], "a-b_c d");
2989        assert_eq!(count, 4);
2990    }
2991
2992    #[test]
2993    fn count_word_boundaries_mid_word() {
2994        let scorer = BayesianScorer::new();
2995        // Position 2 in "hello" is mid-word
2996        let count = scorer.count_word_boundaries(&[2], "hello");
2997        assert_eq!(count, 0);
2998    }
2999
3000    #[test]
3001    fn total_gap_empty() {
3002        let scorer = BayesianScorer::new();
3003        assert_eq!(scorer.total_gap(&[]), 0);
3004    }
3005
3006    #[test]
3007    fn total_gap_single() {
3008        let scorer = BayesianScorer::new();
3009        assert_eq!(scorer.total_gap(&[5]), 0);
3010    }
3011
3012    #[test]
3013    fn total_gap_contiguous() {
3014        let scorer = BayesianScorer::new();
3015        // [0,1,2] → gaps are 0,0 → total 0
3016        assert_eq!(scorer.total_gap(&[0, 1, 2]), 0);
3017    }
3018
3019    #[test]
3020    fn total_gap_with_gaps() {
3021        let scorer = BayesianScorer::new();
3022        // [0, 3, 7] → gaps: (3-0-1)=2, (7-3-1)=3 → total 5
3023        assert_eq!(scorer.total_gap(&[0, 3, 7]), 5);
3024    }
3025
3026    // -----------------------------------------------------------------------
3027    // ConformalRanker edge cases (bd-z1c65)
3028    // -----------------------------------------------------------------------
3029
3030    #[test]
3031    fn conformal_custom_thresholds() {
3032        let ranker = ConformalRanker {
3033            tie_epsilon: 0.1,
3034            stable_threshold: 0.9,
3035            marginal_threshold: 0.5,
3036        };
3037        // With tie_epsilon=0.1, scores within 0.1 are ties
3038        let mut r1 = MatchResult::no_match();
3039        r1.score = 0.5;
3040        r1.match_type = MatchType::Prefix;
3041        let mut r2 = MatchResult::no_match();
3042        r2.score = 0.45;
3043        r2.match_type = MatchType::Prefix;
3044
3045        let ranked = ranker.rank(vec![r1, r2]);
3046        assert_eq!(
3047            ranked.items[0].rank_confidence.stability,
3048            RankStability::Unstable,
3049            "Gap 0.05 < epsilon 0.1 → tie"
3050        );
3051    }
3052
3053    #[test]
3054    fn conformal_rank_top_k_larger_than_count() {
3055        let scorer = BayesianScorer::new();
3056        let results = vec![scorer.score("set", "Settings")];
3057        let ranker = ConformalRanker::new();
3058        let ranked = ranker.rank_top_k(results, 100);
3059        assert_eq!(ranked.items.len(), 1);
3060    }
3061
3062    #[test]
3063    fn conformal_rank_top_k_zero() {
3064        let scorer = BayesianScorer::new();
3065        let results = vec![
3066            scorer.score("set", "Settings"),
3067            scorer.score("set", "Asset"),
3068        ];
3069        let ranker = ConformalRanker::new();
3070        let ranked = ranker.rank_top_k(results, 0);
3071        assert!(ranked.items.is_empty());
3072        assert_eq!(ranked.summary.stable_count, 0);
3073    }
3074
3075    #[test]
3076    fn conformal_rank_confidence_display_marginal() {
3077        let rc = RankConfidence {
3078            confidence: 0.5,
3079            gap_to_next: 0.05,
3080            stability: RankStability::Marginal,
3081        };
3082        let s = format!("{rc}");
3083        assert!(s.contains("marginal"));
3084    }
3085
3086    #[test]
3087    fn conformal_rank_confidence_display_unstable() {
3088        let rc = RankConfidence {
3089            confidence: 0.1,
3090            gap_to_next: 0.001,
3091            stability: RankStability::Unstable,
3092        };
3093        let s = format!("{rc}");
3094        assert!(s.contains("unstable"));
3095    }
3096
3097    #[test]
3098    fn conformal_median_gap_even_count() {
3099        // 4 items → 3 gaps → odd, but let's use 5 items → 4 gaps → even
3100        let mut results = Vec::new();
3101        for &s in &[0.9, 0.7, 0.5, 0.3, 0.1] {
3102            let mut r = MatchResult::no_match();
3103            r.score = s;
3104            r.match_type = MatchType::Prefix;
3105            results.push(r);
3106        }
3107        let ranker = ConformalRanker::new();
3108        let ranked = ranker.rank(results);
3109        // 4 gaps, all 0.2 → median = 0.2
3110        assert!(
3111            (ranked.summary.median_gap - 0.2).abs() < 0.01,
3112            "median_gap={}, expected ~0.2",
3113            ranked.summary.median_gap
3114        );
3115    }
3116
3117    // -----------------------------------------------------------------------
3118    // IncrementalScorer additional coverage (bd-z1c65)
3119    // -----------------------------------------------------------------------
3120
3121    #[test]
3122    fn incremental_with_scorer_uses_provided_scorer() {
3123        let scorer = BayesianScorer::new(); // evidence tracking ON
3124        let mut inc = IncrementalScorer::with_scorer(scorer);
3125        let corpus = test_corpus();
3126        let results = inc.score_corpus("git", &corpus, None);
3127        assert!(!results.is_empty());
3128        // Evidence tracking is on, so evidence should be populated
3129        assert!(!results[0].1.evidence.entries().is_empty());
3130    }
3131
3132    #[test]
3133    fn incremental_default_trait() {
3134        let inc = IncrementalScorer::default();
3135        assert_eq!(inc.stats().full_scans, 0);
3136        assert_eq!(inc.stats().incremental_scans, 0);
3137    }
3138
3139    #[test]
3140    fn incremental_stats_prune_ratio_zero_when_empty() {
3141        let stats = IncrementalStats::default();
3142        assert_eq!(stats.prune_ratio(), 0.0);
3143    }
3144
3145    #[test]
3146    fn incremental_score_corpus_with_lowered() {
3147        let corpus = vec!["Open File", "Save File", "Close Tab"];
3148        let lower = vec!["open file", "save file", "close tab"];
3149        let mut inc = IncrementalScorer::new();
3150        let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3151        assert!(!results.is_empty());
3152        // All results should reference valid corpus indices
3153        for (idx, _) in &results {
3154            assert!(*idx < corpus.len());
3155        }
3156    }
3157
3158    #[test]
3159    fn incremental_score_corpus_with_lowered_incremental_path() {
3160        let corpus = vec!["Open File", "Save File", "Close Tab"];
3161        let lower = vec!["open file", "save file", "close tab"];
3162        let mut inc = IncrementalScorer::new();
3163        inc.score_corpus_with_lowered("f", &corpus, &lower, None);
3164        let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3165        assert_eq!(inc.stats().incremental_scans, 1);
3166        assert!(!results.is_empty());
3167    }
3168
3169    #[test]
3170    fn incremental_score_corpus_with_lowered_and_words() {
3171        let corpus = vec![
3172            "Open File".to_string(),
3173            "Save File".to_string(),
3174            "Close Tab".to_string(),
3175        ];
3176        let lower = vec![
3177            "open file".to_string(),
3178            "save file".to_string(),
3179            "close tab".to_string(),
3180        ];
3181        let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3182
3183        let mut inc = IncrementalScorer::new();
3184        let results =
3185            inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3186        assert!(!results.is_empty());
3187    }
3188
3189    #[test]
3190    fn incremental_score_corpus_with_lowered_and_words_incremental_path() {
3191        let corpus = vec![
3192            "Open File".to_string(),
3193            "Save File".to_string(),
3194            "Close Tab".to_string(),
3195        ];
3196        let lower = vec![
3197            "open file".to_string(),
3198            "save file".to_string(),
3199            "close tab".to_string(),
3200        ];
3201        let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3202
3203        let mut inc = IncrementalScorer::new();
3204        inc.score_corpus_with_lowered_and_words("f", &corpus, &lower, &word_starts, None);
3205        let results =
3206            inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3207        assert_eq!(inc.stats().incremental_scans, 1);
3208        assert!(!results.is_empty());
3209    }
3210
3211    // -----------------------------------------------------------------------
3212    // BayesianScorer::Default (bd-z1c65)
3213    // -----------------------------------------------------------------------
3214
3215    #[test]
3216    fn bayesian_scorer_default_has_evidence_off() {
3217        let scorer = BayesianScorer::default();
3218        assert!(!scorer.track_evidence);
3219    }
3220
3221    // -----------------------------------------------------------------------
3222    // Word boundary separator: hyphen and underscore (bd-z1c65)
3223    // -----------------------------------------------------------------------
3224
3225    #[test]
3226    fn word_start_match_with_hyphens() {
3227        let scorer = BayesianScorer::new();
3228        let result = scorer.score("fc", "file-commander");
3229        assert_eq!(result.match_type, MatchType::WordStart);
3230        assert_eq!(result.match_positions, vec![0, 5]);
3231    }
3232
3233    #[test]
3234    fn word_start_match_with_underscores() {
3235        let scorer = BayesianScorer::new();
3236        let result = scorer.score("fc", "file_commander");
3237        assert_eq!(result.match_type, MatchType::WordStart);
3238        assert_eq!(result.match_positions, vec![0, 5]);
3239    }
3240
3241    // -----------------------------------------------------------------------
3242    // MatchType Ord derivation (bd-z1c65)
3243    // -----------------------------------------------------------------------
3244
3245    #[test]
3246    fn match_type_ord() {
3247        let mut types = vec![
3248            MatchType::Exact,
3249            MatchType::NoMatch,
3250            MatchType::Fuzzy,
3251            MatchType::Prefix,
3252            MatchType::Substring,
3253            MatchType::WordStart,
3254        ];
3255        types.sort();
3256        assert_eq!(
3257            types,
3258            vec![
3259                MatchType::NoMatch,
3260                MatchType::Fuzzy,
3261                MatchType::Substring,
3262                MatchType::WordStart,
3263                MatchType::Prefix,
3264                MatchType::Exact,
3265            ]
3266        );
3267    }
3268
3269    // -----------------------------------------------------------------------
3270    // EvidenceKind equality (bd-z1c65)
3271    // -----------------------------------------------------------------------
3272
3273    #[test]
3274    fn evidence_kind_equality() {
3275        assert_eq!(EvidenceKind::MatchType, EvidenceKind::MatchType);
3276        assert_ne!(EvidenceKind::Position, EvidenceKind::GapPenalty);
3277    }
3278
3279    // -----------------------------------------------------------------------
3280    // Scoring position bonus (bd-z1c65)
3281    // -----------------------------------------------------------------------
3282
3283    #[test]
3284    fn earlier_substring_scores_higher() {
3285        let scorer = BayesianScorer::new();
3286        let early = scorer.score("set", "set in stone");
3287        let late = scorer.score("set", "the asset");
3288        // "set" at position 0 vs "set" at position 5
3289        assert!(
3290            early.score > late.score,
3291            "Earlier match should score higher: {} vs {}",
3292            early.score,
3293            late.score
3294        );
3295    }
3296}
3297
3298// ===========================================================================
3299// Performance regression tests (bd-39y4.5)
3300// ===========================================================================
3301
3302#[cfg(test)]
3303mod perf_tests {
3304    use super::*;
3305    use std::time::Instant;
3306
3307    #[derive(Debug, Clone, Copy)]
3308    struct PerfStats {
3309        p50_us: u64,
3310        p95_us: u64,
3311        p99_us: u64,
3312        mean_us: f64,
3313        variance_us: f64,
3314        samples: usize,
3315    }
3316
3317    /// Budget thresholds for single-query scoring.
3318    /// These are generous to avoid CI flakes but catch >2x regressions.
3319    const SINGLE_SCORE_BUDGET_US: u64 = 10; // 10µs per score call
3320    const CORPUS_100_BUDGET_US: u64 = 500; // 500µs for 100-item full scan
3321    const CORPUS_1000_BUDGET_US: u64 = 5_000; // 5ms for 1000-item full scan
3322    const CORPUS_5000_BUDGET_US: u64 = 25_000; // 25ms for 5000-item full scan
3323    const INCREMENTAL_7KEY_100_BUDGET_US: u64 = 5_000; // 5ms for 7 keystrokes on 100 items (relaxed from 2ms for CI stability)
3324    const INCREMENTAL_7KEY_1000_BUDGET_US: u64 = 15_000; // 15ms for 7 keystrokes on 1000 items
3325
3326    const COVERAGE_BUDGET_MULTIPLIER: u64 = 5;
3327
3328    fn is_coverage_run() -> bool {
3329        std::env::var("LLVM_PROFILE_FILE").is_ok() || std::env::var("CARGO_LLVM_COV").is_ok()
3330    }
3331
3332    fn coverage_budget_us_with_mode(base: u64, coverage_run: bool) -> u64 {
3333        // Coverage instrumentation adds substantial overhead (and noise) to these
3334        // microbench-style regression tests, so we relax budgets enough to keep
3335        // `cargo llvm-cov` stable while still logging the timings.
3336        if coverage_run {
3337            base.saturating_mul(COVERAGE_BUDGET_MULTIPLIER)
3338        } else {
3339            base
3340        }
3341    }
3342
3343    fn coverage_budget_us(base: u64) -> u64 {
3344        coverage_budget_us_with_mode(base, is_coverage_run())
3345    }
3346
3347    #[test]
3348    fn coverage_budget_us_with_mode_respects_flag() {
3349        assert_eq!(coverage_budget_us_with_mode(123, false), 123);
3350        assert_eq!(
3351            coverage_budget_us_with_mode(123, true),
3352            123 * COVERAGE_BUDGET_MULTIPLIER
3353        );
3354    }
3355
3356    /// Generate a command corpus of the specified size with realistic variety.
3357    fn make_corpus(size: usize) -> Vec<String> {
3358        let base = [
3359            "Open File",
3360            "Save File",
3361            "Close Tab",
3362            "Split Editor Right",
3363            "Split Editor Down",
3364            "Toggle Terminal",
3365            "Go to Line",
3366            "Find in Files",
3367            "Replace in Files",
3368            "Git: Commit",
3369            "Git: Push",
3370            "Git: Pull",
3371            "Debug: Start",
3372            "Debug: Stop",
3373            "Debug: Step Over",
3374            "Format Document",
3375            "Rename Symbol",
3376            "Go to Definition",
3377            "Find All References",
3378            "Toggle Sidebar",
3379        ];
3380        base.iter()
3381            .cycle()
3382            .take(size)
3383            .enumerate()
3384            .map(|(i, cmd)| {
3385                if i < base.len() {
3386                    (*cmd).to_string()
3387                } else {
3388                    format!("{} ({})", cmd, i)
3389                }
3390            })
3391            .collect()
3392    }
3393
3394    fn measure_stats_us(iterations: usize, mut f: impl FnMut()) -> PerfStats {
3395        let mut times = Vec::with_capacity(iterations);
3396        // Warmup
3397        for _ in 0..3 {
3398            f();
3399        }
3400        for _ in 0..iterations {
3401            let start = Instant::now();
3402            f();
3403            times.push(start.elapsed().as_micros() as u64);
3404        }
3405        times.sort_unstable();
3406        let len = times.len();
3407        let p50 = times[len / 2];
3408        let p95 = times[((len as f64 * 0.95) as usize).min(len.saturating_sub(1))];
3409        let p99 = times[((len as f64 * 0.99) as usize).min(len.saturating_sub(1))];
3410        let mean = times.iter().copied().map(|value| value as f64).sum::<f64>() / len as f64;
3411        let variance = times
3412            .iter()
3413            .map(|value| {
3414                let delta = *value as f64 - mean;
3415                delta * delta
3416            })
3417            .sum::<f64>()
3418            / len as f64;
3419
3420        PerfStats {
3421            p50_us: p50,
3422            p95_us: p95,
3423            p99_us: p99,
3424            mean_us: mean,
3425            variance_us: variance,
3426            samples: len,
3427        }
3428    }
3429
3430    #[test]
3431    fn perf_single_score_under_budget() {
3432        let scorer = BayesianScorer::fast();
3433        let stats = measure_stats_us(200, || {
3434            std::hint::black_box(scorer.score("git co", "Git: Commit"));
3435        });
3436        eprintln!(
3437            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_score_single\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3438            stats.samples,
3439            stats.p50_us,
3440            stats.p95_us,
3441            stats.p99_us,
3442            stats.mean_us,
3443            stats.variance_us
3444        );
3445        let budget = coverage_budget_us(SINGLE_SCORE_BUDGET_US);
3446        assert!(
3447            stats.p50_us <= budget,
3448            "Single score p50 = {}µs exceeds budget {}µs",
3449            stats.p50_us,
3450            budget,
3451        );
3452    }
3453
3454    #[test]
3455    fn perf_corpus_100_under_budget() {
3456        let scorer = BayesianScorer::fast();
3457        let corpus = make_corpus(100);
3458        let stats = measure_stats_us(50, || {
3459            let mut results: Vec<_> = corpus
3460                .iter()
3461                .map(|t| scorer.score("git co", t))
3462                .filter(|r| r.score > 0.0)
3463                .collect();
3464            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3465            std::hint::black_box(&results);
3466        });
3467        eprintln!(
3468            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3469            stats.samples,
3470            stats.p50_us,
3471            stats.p95_us,
3472            stats.p99_us,
3473            stats.mean_us,
3474            stats.variance_us
3475        );
3476        let budget = coverage_budget_us(CORPUS_100_BUDGET_US);
3477        assert!(
3478            stats.p95_us <= budget,
3479            "100-item corpus p95 = {}µs exceeds budget {}µs",
3480            stats.p95_us,
3481            budget,
3482        );
3483    }
3484
3485    #[test]
3486    fn perf_corpus_1000_under_budget() {
3487        let scorer = BayesianScorer::fast();
3488        let corpus = make_corpus(1_000);
3489        let stats = measure_stats_us(20, || {
3490            let mut results: Vec<_> = corpus
3491                .iter()
3492                .map(|t| scorer.score("git co", t))
3493                .filter(|r| r.score > 0.0)
3494                .collect();
3495            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3496            std::hint::black_box(&results);
3497        });
3498        eprintln!(
3499            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3500            stats.samples,
3501            stats.p50_us,
3502            stats.p95_us,
3503            stats.p99_us,
3504            stats.mean_us,
3505            stats.variance_us
3506        );
3507        let budget = coverage_budget_us(CORPUS_1000_BUDGET_US);
3508        assert!(
3509            stats.p95_us <= budget,
3510            "1000-item corpus p95 = {}µs exceeds budget {}µs",
3511            stats.p95_us,
3512            budget,
3513        );
3514    }
3515
3516    #[test]
3517    fn perf_corpus_5000_under_budget() {
3518        let scorer = BayesianScorer::fast();
3519        let corpus = make_corpus(5_000);
3520        let stats = measure_stats_us(10, || {
3521            let mut results: Vec<_> = corpus
3522                .iter()
3523                .map(|t| scorer.score("git co", t))
3524                .filter(|r| r.score > 0.0)
3525                .collect();
3526            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3527            std::hint::black_box(&results);
3528        });
3529        eprintln!(
3530            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_5000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3531            stats.samples,
3532            stats.p50_us,
3533            stats.p95_us,
3534            stats.p99_us,
3535            stats.mean_us,
3536            stats.variance_us
3537        );
3538        let budget = coverage_budget_us(CORPUS_5000_BUDGET_US);
3539        assert!(
3540            stats.p95_us <= budget,
3541            "5000-item corpus p95 = {}µs exceeds budget {}µs",
3542            stats.p95_us,
3543            budget,
3544        );
3545    }
3546
3547    #[test]
3548    fn perf_incremental_7key_100_under_budget() {
3549        let corpus = make_corpus(100);
3550        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3551        let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3552
3553        let stats = measure_stats_us(30, || {
3554            let mut inc = IncrementalScorer::new();
3555            for query in &queries {
3556                let results = inc.score_corpus(query, &corpus_refs, None);
3557                std::hint::black_box(&results);
3558            }
3559        });
3560        eprintln!(
3561            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3562            stats.samples,
3563            stats.p50_us,
3564            stats.p95_us,
3565            stats.p99_us,
3566            stats.mean_us,
3567            stats.variance_us
3568        );
3569        let budget = coverage_budget_us(INCREMENTAL_7KEY_100_BUDGET_US);
3570        assert!(
3571            stats.p95_us <= budget,
3572            "Incremental 7-key 100-item p95 = {}µs exceeds budget {}µs",
3573            stats.p95_us,
3574            budget,
3575        );
3576    }
3577
3578    #[test]
3579    fn perf_incremental_7key_1000_under_budget() {
3580        let corpus = make_corpus(1_000);
3581        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3582        let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3583
3584        let stats = measure_stats_us(10, || {
3585            let mut inc = IncrementalScorer::new();
3586            for query in &queries {
3587                let results = inc.score_corpus(query, &corpus_refs, None);
3588                std::hint::black_box(&results);
3589            }
3590        });
3591        eprintln!(
3592            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3593            stats.samples,
3594            stats.p50_us,
3595            stats.p95_us,
3596            stats.p99_us,
3597            stats.mean_us,
3598            stats.variance_us
3599        );
3600        let budget = coverage_budget_us(INCREMENTAL_7KEY_1000_BUDGET_US);
3601        assert!(
3602            stats.p95_us <= budget,
3603            "Incremental 7-key 1000-item p95 = {}µs exceeds budget {}µs",
3604            stats.p95_us,
3605            budget,
3606        );
3607    }
3608
3609    #[test]
3610    fn perf_incremental_faster_than_naive() {
3611        let corpus = make_corpus(100);
3612        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3613        let scorer = BayesianScorer::fast();
3614        let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3615
3616        let naive_stats = measure_stats_us(30, || {
3617            for query in &queries {
3618                let mut results: Vec<_> = corpus
3619                    .iter()
3620                    .map(|t| scorer.score(query, t))
3621                    .filter(|r| r.score > 0.0)
3622                    .collect();
3623                results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3624                std::hint::black_box(&results);
3625            }
3626        });
3627
3628        let inc_stats = measure_stats_us(30, || {
3629            let mut inc = IncrementalScorer::new();
3630            for query in &queries {
3631                let results = inc.score_corpus(query, &corpus_refs, None);
3632                std::hint::black_box(&results);
3633            }
3634        });
3635        eprintln!(
3636            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_vs_naive\",\"samples\":{},\"naive_p50_us\":{},\"naive_p95_us\":{},\"inc_p50_us\":{},\"inc_p95_us\":{}}}",
3637            naive_stats.samples,
3638            naive_stats.p50_us,
3639            naive_stats.p95_us,
3640            inc_stats.p50_us,
3641            inc_stats.p95_us
3642        );
3643
3644        // Incremental should not be more than 2x slower than naive
3645        // (in practice it's faster, but we set a relaxed threshold)
3646        assert!(
3647            inc_stats.p50_us <= naive_stats.p50_us * 2 + 50, // +50µs tolerance for measurement noise
3648            "Incremental p50 = {}µs is >2x slower than naive p50 = {}µs",
3649            inc_stats.p50_us,
3650            naive_stats.p50_us,
3651        );
3652    }
3653
3654    fn scaling_ratio_us(p50_small_us: u64, p50_large_us: u64) -> f64 {
3655        if p50_small_us > 0 {
3656            p50_large_us as f64 / p50_small_us as f64
3657        } else {
3658            0.0
3659        }
3660    }
3661
3662    #[test]
3663    fn scaling_ratio_handles_zero_divisor() {
3664        assert_eq!(scaling_ratio_us(0, 123), 0.0);
3665        assert_eq!(scaling_ratio_us(10, 25), 2.5);
3666    }
3667
3668    #[test]
3669    fn perf_scaling_sublinear() {
3670        let scorer = BayesianScorer::fast();
3671        let corpus_100 = make_corpus(100);
3672        let corpus_1000 = make_corpus(1_000);
3673
3674        let stats_100 = measure_stats_us(30, || {
3675            let mut results: Vec<_> = corpus_100
3676                .iter()
3677                .map(|t| scorer.score("git", t))
3678                .filter(|r| r.score > 0.0)
3679                .collect();
3680            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3681            std::hint::black_box(&results);
3682        });
3683
3684        let stats_1000 = measure_stats_us(20, || {
3685            let mut results: Vec<_> = corpus_1000
3686                .iter()
3687                .map(|t| scorer.score("git", t))
3688                .filter(|r| r.score > 0.0)
3689                .collect();
3690            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3691            std::hint::black_box(&results);
3692        });
3693        eprintln!(
3694            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_scaling\",\"samples_100\":{},\"p50_100_us\":{},\"samples_1000\":{},\"p50_1000_us\":{}}}",
3695            stats_100.samples, stats_100.p50_us, stats_1000.samples, stats_1000.p50_us
3696        );
3697
3698        // 10x corpus ⇒ O(n log n) theoretical ratio ≈ 15x.  Use 25x to absorb
3699        // system noise when tests run in parallel (still catches O(n²) = 100x).
3700        let ratio = scaling_ratio_us(stats_100.p50_us, stats_1000.p50_us);
3701        assert!(
3702            ratio < 25.0,
3703            "1000/100 scaling ratio = {:.1}x exceeds 25x threshold (100: {}µs, 1000: {}µs)",
3704            ratio,
3705            stats_100.p50_us,
3706            stats_1000.p50_us,
3707        );
3708    }
3709}