Skip to main content

ftui_widgets/command_palette/
scorer.rs

1#![forbid(unsafe_code)]
2
3//! Bayesian Match Scoring for Command Palette.
4//!
5//! This module implements a probabilistic scoring model using Bayes factors
6//! to compute match relevance. An evidence ledger tracks each scoring factor
7//! and its contribution, enabling explainable ranking decisions.
8//!
9//! # Mathematical Model
10//!
11//! We compute the posterior odds ratio:
12//!
13//! ```text
14//! P(relevant | evidence) / P(not_relevant | evidence)
15//!     = [P(relevant) / P(not_relevant)] × Π_i BF_i
16//!
17//! where BF_i = P(evidence_i | relevant) / P(evidence_i | not_relevant)
18//!            = likelihood ratio for evidence type i
19//! ```
20//!
21//! The prior odds depend on match type:
22//! - Exact match: 99:1 (very likely relevant)
23//! - Prefix match: 9:1 (likely relevant)
24//! - Word-start match: 4:1 (probably relevant)
25//! - Substring match: 2:1 (possibly relevant)
26//! - Fuzzy match: 1:3 (unlikely without other evidence)
27//!
28//! The final score is the posterior probability P(relevant | evidence).
29//!
30//! # Evidence Ledger
31//!
32//! Each match includes an evidence ledger that explains the score:
33//! - Match type contribution (prior odds)
34//! - Word boundary bonus (Bayes factor ~2.0)
35//! - Position bonus (earlier = higher factor)
36//! - Gap penalty (gaps reduce factor)
37//! - Tag match bonus (strong positive evidence)
38//!
39//! # Invariants
40//!
41//! 1. Scores are bounded: 0.0 ≤ score ≤ 1.0 (probability)
42//! 2. Determinism: same input → identical score
43//! 3. Monotonicity: longer exact prefixes score ≥ shorter
44//! 4. Transitivity: score ordering is consistent
45
46use std::cmp::Ordering;
47use std::fmt;
48
49// ---------------------------------------------------------------------------
50// Match Types
51// ---------------------------------------------------------------------------
52
53/// Type of match between query and title.
54#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
55pub enum MatchType {
56    /// No match found.
57    NoMatch,
58    /// Characters found in order but with gaps.
59    Fuzzy,
60    /// Query found as contiguous substring.
61    Substring,
62    /// Query matches start of word boundaries.
63    WordStart,
64    /// Title starts with query.
65    Prefix,
66    /// Query equals title exactly.
67    Exact,
68}
69
70impl MatchType {
71    /// Prior odds ratio P(relevant) / P(not_relevant) for this match type.
72    ///
73    /// These are derived from empirical observations of user intent:
74    /// - Exact matches are almost always what the user wants
75    /// - Prefix matches are very likely relevant
76    /// - Fuzzy matches need additional evidence
77    pub fn prior_odds(self) -> f64 {
78        match self {
79            Self::Exact => 99.0,    // 99:1 odds → P ≈ 0.99
80            Self::Prefix => 9.0,    // 9:1 odds → P ≈ 0.90
81            Self::WordStart => 4.0, // 4:1 odds → P ≈ 0.80
82            Self::Substring => 2.0, // 2:1 odds → P ≈ 0.67
83            Self::Fuzzy => 0.333,   // 1:3 odds → P ≈ 0.25
84            Self::NoMatch => 0.0,   // Impossible
85        }
86    }
87
88    /// Human-readable description for the evidence ledger.
89    pub fn description(self) -> &'static str {
90        match self {
91            Self::Exact => "exact match",
92            Self::Prefix => "prefix match",
93            Self::WordStart => "word-start match",
94            Self::Substring => "contiguous substring",
95            Self::Fuzzy => "fuzzy match",
96            Self::NoMatch => "no match",
97        }
98    }
99}
100
101fn compare_ranked_match_results(
102    left: &(usize, MatchResult),
103    right: &(usize, MatchResult),
104) -> Ordering {
105    right
106        .1
107        .score
108        .partial_cmp(&left.1.score)
109        .unwrap_or(Ordering::Equal)
110        .then_with(|| right.1.match_type.cmp(&left.1.match_type))
111}
112
113// ---------------------------------------------------------------------------
114// Evidence Entry
115// ---------------------------------------------------------------------------
116
117/// A single piece of evidence contributing to the match score.
118#[derive(Debug, Clone)]
119pub struct EvidenceEntry {
120    /// Type of evidence.
121    pub kind: EvidenceKind,
122    /// Bayes factor: likelihood ratio P(evidence | relevant) / P(evidence | ¬relevant).
123    /// Values > 1.0 support relevance, < 1.0 oppose it.
124    pub bayes_factor: f64,
125    /// Human-readable explanation.
126    pub description: EvidenceDescription,
127}
128
129/// Types of evidence that contribute to match scoring.
130#[derive(Debug, Clone, Copy, PartialEq, Eq)]
131pub enum EvidenceKind {
132    /// Base match type (prior).
133    MatchType,
134    /// Match at word boundary.
135    WordBoundary,
136    /// Match position (earlier is better).
137    Position,
138    /// Gap between matched characters.
139    GapPenalty,
140    /// Query also matches a tag.
141    TagMatch,
142    /// Title length factor (shorter is more specific).
143    TitleLength,
144}
145
146impl fmt::Display for EvidenceEntry {
147    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148        let direction = if self.bayes_factor > 1.0 {
149            "supports"
150        } else if self.bayes_factor < 1.0 {
151            "opposes"
152        } else {
153            "neutral"
154        };
155        write!(
156            f,
157            "{:?}: BF={:.2} ({}) - {}",
158            self.kind, self.bayes_factor, direction, self.description
159        )
160    }
161}
162
163/// Human-readable evidence description (lazy formatting).
164#[derive(Debug, Clone)]
165pub enum EvidenceDescription {
166    Static(&'static str),
167    TitleLengthChars { len: usize },
168    FirstMatchPos { pos: usize },
169    WordBoundaryCount { count: usize },
170    GapTotal { total: usize },
171    CoveragePercent { percent: f64 },
172}
173
174impl fmt::Display for EvidenceDescription {
175    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176        match self {
177            Self::Static(msg) => write!(f, "{msg}"),
178            Self::TitleLengthChars { len } => write!(f, "title length {} chars", len),
179            Self::FirstMatchPos { pos } => write!(f, "first match at position {}", pos),
180            Self::WordBoundaryCount { count } => write!(f, "{} word boundary matches", count),
181            Self::GapTotal { total } => write!(f, "total gap of {} characters", total),
182            Self::CoveragePercent { percent } => write!(f, "query covers {:.0}% of title", percent),
183        }
184    }
185}
186
187// ---------------------------------------------------------------------------
188// Evidence Ledger
189// ---------------------------------------------------------------------------
190
191/// A ledger of evidence explaining a match score.
192///
193/// This provides full transparency into why a match received its score,
194/// enabling debugging and user explanations.
195#[derive(Debug, Clone, Default)]
196pub struct EvidenceLedger {
197    entries: Vec<EvidenceEntry>,
198}
199
200impl EvidenceLedger {
201    /// Create a new empty ledger.
202    pub fn new() -> Self {
203        Self::default()
204    }
205
206    /// Add an evidence entry.
207    pub fn add(&mut self, kind: EvidenceKind, bayes_factor: f64, description: EvidenceDescription) {
208        self.entries.push(EvidenceEntry {
209            kind,
210            bayes_factor,
211            description,
212        });
213    }
214
215    /// Get all entries.
216    pub fn entries(&self) -> &[EvidenceEntry] {
217        &self.entries
218    }
219
220    /// Compute the combined Bayes factor (product of all factors).
221    pub fn combined_bayes_factor(&self) -> f64 {
222        self.entries.iter().map(|e| e.bayes_factor).product()
223    }
224
225    /// Get the prior odds (from MatchType entry, if present).
226    #[must_use = "use the prior odds (if any) for diagnostics"]
227    pub fn prior_odds(&self) -> Option<f64> {
228        self.entries
229            .iter()
230            .find(|e| e.kind == EvidenceKind::MatchType)
231            .map(|e| e.bayes_factor)
232    }
233
234    /// Compute posterior probability from prior odds and evidence.
235    ///
236    /// posterior_prob = posterior_odds / (1 + posterior_odds)
237    /// where posterior_odds = prior_odds × combined_bf
238    pub fn posterior_probability(&self) -> f64 {
239        let prior = self.prior_odds().unwrap_or(1.0);
240        // Exclude prior from BF since it's already the odds
241        let bf: f64 = self
242            .entries
243            .iter()
244            .filter(|e| e.kind != EvidenceKind::MatchType)
245            .map(|e| e.bayes_factor)
246            .product();
247
248        let posterior_odds = prior * bf;
249        if posterior_odds.is_infinite() {
250            1.0
251        } else {
252            posterior_odds / (1.0 + posterior_odds)
253        }
254    }
255}
256
257impl EvidenceLedger {
258    /// Format this ledger as a JSONL line for structured logging.
259    #[must_use]
260    pub fn to_jsonl(&self) -> String {
261        let entries_json: Vec<String> = self
262            .entries
263            .iter()
264            .map(|e| {
265                format!(
266                    r#"{{"kind":"{:?}","bf":{:.4},"desc":"{}"}}"#,
267                    e.kind, e.bayes_factor, e.description
268                )
269            })
270            .collect();
271        format!(
272            r#"{{"schema":"palette-scoring-v1","entries":[{}],"combined_bf":{:.6},"posterior_prob":{:.6}}}"#,
273            entries_json.join(","),
274            self.combined_bayes_factor(),
275            self.posterior_probability(),
276        )
277    }
278}
279
280impl fmt::Display for EvidenceLedger {
281    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282        writeln!(f, "Evidence Ledger:")?;
283        for entry in &self.entries {
284            writeln!(f, "  {}", entry)?;
285        }
286        writeln!(f, "  Combined BF: {:.3}", self.combined_bayes_factor())?;
287        writeln!(f, "  Posterior P: {:.3}", self.posterior_probability())?;
288        Ok(())
289    }
290}
291
292// ---------------------------------------------------------------------------
293// Match Result
294// ---------------------------------------------------------------------------
295
296/// Result of scoring a query against a title.
297#[derive(Debug, Clone)]
298pub struct MatchResult {
299    /// Computed relevance score (posterior probability).
300    pub score: f64,
301    /// Type of match detected.
302    pub match_type: MatchType,
303    /// Positions of matched characters in the title.
304    pub match_positions: Vec<usize>,
305    /// Evidence ledger explaining the score.
306    pub evidence: EvidenceLedger,
307}
308
309impl MatchResult {
310    /// Create a no-match result.
311    pub fn no_match() -> Self {
312        let mut evidence = EvidenceLedger::new();
313        evidence.add(
314            EvidenceKind::MatchType,
315            0.0,
316            EvidenceDescription::Static("no matching characters found"),
317        );
318        Self {
319            score: 0.0,
320            match_type: MatchType::NoMatch,
321            match_positions: Vec::new(),
322            evidence,
323        }
324    }
325}
326
327// ---------------------------------------------------------------------------
328// Scorer
329// ---------------------------------------------------------------------------
330
331/// Bayesian fuzzy matcher for command palette.
332///
333/// Computes relevance scores using a probabilistic model with
334/// evidence tracking for explainable ranking.
335#[derive(Debug, Clone, Default)]
336pub struct BayesianScorer {
337    /// Whether to track detailed evidence (slower but explainable).
338    pub track_evidence: bool,
339}
340
341impl BayesianScorer {
342    /// Create a new scorer with evidence tracking enabled.
343    pub fn new() -> Self {
344        Self {
345            track_evidence: true,
346        }
347    }
348
349    /// Create a scorer without evidence tracking (faster).
350    pub fn fast() -> Self {
351        Self {
352            track_evidence: false,
353        }
354    }
355
356    /// Score a query against a title.
357    ///
358    /// Returns a MatchResult with score, match type, positions, and evidence.
359    pub fn score(&self, query: &str, title: &str) -> MatchResult {
360        // Quick rejection: query longer than title
361        if query.len() > title.len() {
362            return MatchResult::no_match();
363        }
364
365        // Empty query matches everything (show all)
366        if query.is_empty() {
367            return self.score_empty_query(title);
368        }
369
370        // Normalize for case-insensitive matching
371        let query_lower = query.to_lowercase();
372        let title_lower = title.to_lowercase();
373
374        // Determine match type
375        let (match_type, positions) = self.detect_match_type(&query_lower, &title_lower, title);
376
377        if match_type == MatchType::NoMatch {
378            return MatchResult::no_match();
379        }
380
381        // Build evidence ledger and compute score
382        self.compute_score(match_type, positions, &query_lower, title)
383    }
384
385    /// Score a query using a pre-lowercased query string.
386    ///
387    /// This avoids repeated query normalization when scoring against many titles.
388    pub fn score_with_query_lower(
389        &self,
390        query: &str,
391        query_lower: &str,
392        title: &str,
393    ) -> MatchResult {
394        let title_lower = title.to_lowercase();
395        self.score_with_lowered_title(query, query_lower, title, &title_lower)
396    }
397
398    /// Score a query with both query and title already lowercased.
399    ///
400    /// This avoids per-title lowercasing in hot loops.
401    pub fn score_with_lowered_title(
402        &self,
403        query: &str,
404        query_lower: &str,
405        title: &str,
406        title_lower: &str,
407    ) -> MatchResult {
408        self.score_with_lowered_title_and_words(query, query_lower, title, title_lower, None)
409    }
410
411    /// Score a query with pre-lowercased title and optional word-start cache.
412    pub fn score_with_lowered_title_and_words(
413        &self,
414        query: &str,
415        query_lower: &str,
416        title: &str,
417        title_lower: &str,
418        word_starts: Option<&[usize]>,
419    ) -> MatchResult {
420        // Quick rejection: query longer than title
421        if query.len() > title.len() {
422            return MatchResult::no_match();
423        }
424
425        // Empty query matches everything (show all)
426        if query.is_empty() {
427            return self.score_empty_query(title);
428        }
429
430        // Determine match type
431        let (match_type, positions) =
432            self.detect_match_type_with_words(query_lower, title_lower, title, word_starts);
433
434        if match_type == MatchType::NoMatch {
435            return MatchResult::no_match();
436        }
437
438        // Build evidence ledger and compute score
439        self.compute_score(match_type, positions, query_lower, title)
440    }
441
442    /// Score a query against a title with tags.
443    pub fn score_with_tags(&self, query: &str, title: &str, tags: &[&str]) -> MatchResult {
444        let mut result = self.score(query, title);
445
446        // Check if query matches any tag
447        let query_lower = query.to_lowercase();
448        let tag_match = tags
449            .iter()
450            .any(|tag| crate::contains_ignore_case(tag, &query_lower));
451
452        if tag_match && result.match_type != MatchType::NoMatch {
453            // Strong positive evidence
454            if self.track_evidence {
455                result.evidence.add(
456                    EvidenceKind::TagMatch,
457                    3.0, // 3:1 in favor
458                    EvidenceDescription::Static("query matches tag"),
459                );
460                result.score = result.evidence.posterior_probability();
461            } else if (0.0..1.0).contains(&result.score) {
462                let odds = result.score / (1.0 - result.score);
463                let boosted = odds * 3.0;
464                result.score = boosted / (1.0 + boosted);
465            }
466        }
467
468        result
469    }
470
471    /// Score when query is empty (returns all items with neutral score).
472    fn score_empty_query(&self, title: &str) -> MatchResult {
473        // Shorter titles are more specific, slight preference
474        let length_factor = 1.0 + (1.0 / (title.len() as f64 + 1.0)) * 0.1;
475        if self.track_evidence {
476            let mut evidence = EvidenceLedger::new();
477            evidence.add(
478                EvidenceKind::MatchType,
479                1.0, // Neutral prior
480                EvidenceDescription::Static("empty query matches all"),
481            );
482            evidence.add(
483                EvidenceKind::TitleLength,
484                length_factor,
485                EvidenceDescription::TitleLengthChars { len: title.len() },
486            );
487            let score = evidence.posterior_probability();
488            MatchResult {
489                score,
490                match_type: MatchType::Fuzzy, // Treat as weak match
491                match_positions: Vec::new(),
492                evidence,
493            }
494        } else {
495            let odds = length_factor;
496            let score = odds / (1.0 + odds);
497            MatchResult {
498                score,
499                match_type: MatchType::Fuzzy,
500                match_positions: Vec::new(),
501                evidence: EvidenceLedger::new(),
502            }
503        }
504    }
505
506    /// Detect the type of match and positions of matched characters.
507    fn detect_match_type(
508        &self,
509        query_lower: &str,
510        title_lower: &str,
511        title: &str,
512    ) -> (MatchType, Vec<usize>) {
513        self.detect_match_type_with_words(query_lower, title_lower, title, None)
514    }
515
516    /// Detect match type with optional precomputed word-start positions.
517    fn detect_match_type_with_words(
518        &self,
519        query_lower: &str,
520        title_lower: &str,
521        title: &str,
522        word_starts: Option<&[usize]>,
523    ) -> (MatchType, Vec<usize>) {
524        if query_lower.is_ascii() && title_lower.is_ascii() {
525            return self.detect_match_type_ascii(query_lower, title_lower, word_starts);
526        }
527
528        // Check exact match
529        if query_lower == title_lower {
530            let positions: Vec<usize> = (0..title.chars().count()).collect();
531            return (MatchType::Exact, positions);
532        }
533
534        // Check prefix match
535        if title_lower.starts_with(query_lower) {
536            let positions: Vec<usize> = (0..query_lower.chars().count()).collect();
537            return (MatchType::Prefix, positions);
538        }
539
540        // Check word-start match (e.g., "gd" matches "Go Dashboard")
541        if let Some(positions) = self.word_start_match(query_lower, title_lower) {
542            return (MatchType::WordStart, positions);
543        }
544
545        // Check contiguous substring
546        if let Some(byte_start) = title_lower.find(query_lower) {
547            let char_start = title_lower[..byte_start].chars().count();
548            let char_len = query_lower.chars().count();
549            let positions: Vec<usize> = (char_start..char_start + char_len).collect();
550            return (MatchType::Substring, positions);
551        }
552
553        // Check fuzzy match
554        if let Some(positions) = self.fuzzy_match(query_lower, title_lower) {
555            return (MatchType::Fuzzy, positions);
556        }
557
558        (MatchType::NoMatch, Vec::new())
559    }
560
561    /// ASCII fast-path match detection.
562    fn detect_match_type_ascii(
563        &self,
564        query_lower: &str,
565        title_lower: &str,
566        word_starts: Option<&[usize]>,
567    ) -> (MatchType, Vec<usize>) {
568        let query_bytes = query_lower.as_bytes();
569        let title_bytes = title_lower.as_bytes();
570
571        if query_bytes == title_bytes {
572            let positions: Vec<usize> = (0..title_bytes.len()).collect();
573            return (MatchType::Exact, positions);
574        }
575
576        if title_bytes.starts_with(query_bytes) {
577            let positions: Vec<usize> = (0..query_bytes.len()).collect();
578            return (MatchType::Prefix, positions);
579        }
580
581        if let Some(positions) = self.word_start_match_ascii(query_bytes, title_bytes, word_starts)
582        {
583            return (MatchType::WordStart, positions);
584        }
585
586        if let Some(start) = title_lower.find(query_lower) {
587            let positions: Vec<usize> = (start..start + query_bytes.len()).collect();
588            return (MatchType::Substring, positions);
589        }
590
591        if let Some(positions) = self.fuzzy_match_ascii(query_bytes, title_bytes) {
592            return (MatchType::Fuzzy, positions);
593        }
594
595        (MatchType::NoMatch, Vec::new())
596    }
597
598    /// Check if query matches word starts (e.g., "gd" → "Go Dashboard").
599    fn word_start_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
600        let mut positions = Vec::new();
601        let mut query_chars = query.chars().peekable();
602
603        let title_bytes = title.as_bytes();
604        for (char_index, (i, c)) in title.char_indices().enumerate() {
605            // Is this a word start?
606            let is_word_start = i == 0 || {
607                let prev = title_bytes
608                    .get(i.saturating_sub(1))
609                    .copied()
610                    .unwrap_or(b' ');
611                prev == b' ' || prev == b'-' || prev == b'_'
612            };
613
614            if is_word_start
615                && let Some(&qc) = query_chars.peek()
616                && c == qc
617            {
618                positions.push(char_index);
619                query_chars.next();
620            }
621        }
622
623        if query_chars.peek().is_none() {
624            Some(positions)
625        } else {
626            None
627        }
628    }
629
630    /// ASCII word-start match with optional precomputed word-start positions.
631    fn word_start_match_ascii(
632        &self,
633        query: &[u8],
634        title: &[u8],
635        word_starts: Option<&[usize]>,
636    ) -> Option<Vec<usize>> {
637        let mut positions = Vec::new();
638        let mut query_idx = 0;
639        if query.is_empty() {
640            return Some(positions);
641        }
642
643        if let Some(starts) = word_starts {
644            for &pos in starts {
645                if pos >= title.len() {
646                    continue;
647                }
648                if title[pos] == query[query_idx] {
649                    positions.push(pos);
650                    query_idx += 1;
651                    if query_idx == query.len() {
652                        return Some(positions);
653                    }
654                }
655            }
656        } else {
657            for (i, &b) in title.iter().enumerate() {
658                let is_word_start = i == 0 || matches!(title[i - 1], b' ' | b'-' | b'_');
659                if is_word_start && b == query[query_idx] {
660                    positions.push(i);
661                    query_idx += 1;
662                    if query_idx == query.len() {
663                        return Some(positions);
664                    }
665                }
666            }
667        }
668
669        None
670    }
671
672    /// Check fuzzy match (characters in order).
673    fn fuzzy_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
674        let mut positions = Vec::new();
675        let mut query_chars = query.chars().peekable();
676
677        for (char_index, c) in title.chars().enumerate() {
678            if let Some(&qc) = query_chars.peek()
679                && c == qc
680            {
681                positions.push(char_index);
682                query_chars.next();
683            }
684        }
685
686        if query_chars.peek().is_none() {
687            Some(positions)
688        } else {
689            None
690        }
691    }
692
693    /// ASCII fuzzy match (characters in order).
694    fn fuzzy_match_ascii(&self, query: &[u8], title: &[u8]) -> Option<Vec<usize>> {
695        let mut positions = Vec::new();
696        let mut query_idx = 0;
697        if query.is_empty() {
698            return Some(positions);
699        }
700
701        for (i, &b) in title.iter().enumerate() {
702            if b == query[query_idx] {
703                positions.push(i);
704                query_idx += 1;
705                if query_idx == query.len() {
706                    return Some(positions);
707                }
708            }
709        }
710
711        None
712    }
713
714    /// Compute score from match type and positions.
715    fn compute_score(
716        &self,
717        match_type: MatchType,
718        positions: Vec<usize>,
719        query: &str,
720        title: &str,
721    ) -> MatchResult {
722        let positions_ref = positions.as_slice();
723        if !self.track_evidence {
724            let mut combined_bf = match_type.prior_odds();
725
726            if let Some(&first_pos) = positions_ref.first() {
727                let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
728                combined_bf *= position_factor;
729            }
730
731            let word_boundary_count = self.count_word_boundaries(positions_ref, title);
732            if word_boundary_count > 0 {
733                let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
734                combined_bf *= boundary_factor;
735            }
736
737            if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
738                let total_gap = self.total_gap(positions_ref);
739                let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
740                combined_bf *= gap_factor;
741            }
742
743            let title_len = title.len().max(1);
744            let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
745            combined_bf *= length_factor;
746
747            let score = combined_bf / (1.0 + combined_bf);
748            return MatchResult {
749                score,
750                match_type,
751                match_positions: positions,
752                evidence: EvidenceLedger::new(),
753            };
754        }
755
756        let mut evidence = EvidenceLedger::new();
757
758        // Prior odds from match type
759        let prior_odds = match_type.prior_odds();
760        evidence.add(
761            EvidenceKind::MatchType,
762            prior_odds,
763            EvidenceDescription::Static(match_type.description()),
764        );
765
766        // Position bonus: matches at start are better
767        if let Some(&first_pos) = positions_ref.first() {
768            let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
769            evidence.add(
770                EvidenceKind::Position,
771                position_factor,
772                EvidenceDescription::FirstMatchPos { pos: first_pos },
773            );
774        }
775
776        // Word boundary bonus
777        let word_boundary_count = self.count_word_boundaries(positions_ref, title);
778        if word_boundary_count > 0 {
779            let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
780            evidence.add(
781                EvidenceKind::WordBoundary,
782                boundary_factor,
783                EvidenceDescription::WordBoundaryCount {
784                    count: word_boundary_count,
785                },
786            );
787        }
788
789        // Gap penalty for fuzzy matches
790        if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
791            let total_gap = self.total_gap(positions_ref);
792            let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
793            evidence.add(
794                EvidenceKind::GapPenalty,
795                gap_factor,
796                EvidenceDescription::GapTotal { total: total_gap },
797            );
798        }
799
800        // Title length: prefer shorter (more specific) titles
801        let title_len = title.len().max(1);
802        let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
803        evidence.add(
804            EvidenceKind::TitleLength,
805            length_factor,
806            EvidenceDescription::CoveragePercent {
807                percent: (query.len() as f64 / title_len as f64) * 100.0,
808            },
809        );
810
811        let score = evidence.posterior_probability();
812
813        MatchResult {
814            score,
815            match_type,
816            match_positions: positions,
817            evidence,
818        }
819    }
820
821    /// Count how many matched positions are at word boundaries.
822    fn count_word_boundaries(&self, positions: &[usize], title: &str) -> usize {
823        let title_bytes = title.as_bytes();
824        let mut count = 0;
825        for &pos in positions {
826            let is_boundary = if pos == 0 {
827                true
828            } else {
829                let prev = title_bytes
830                    .get(pos.saturating_sub(1))
831                    .copied()
832                    .unwrap_or(b' ');
833                prev == b' ' || prev == b'-' || prev == b'_'
834            };
835            if is_boundary {
836                count += 1;
837            }
838        }
839        count
840    }
841
842    /// Calculate total gap between matched positions.
843    fn total_gap(&self, positions: &[usize]) -> usize {
844        if positions.len() < 2 {
845            return 0;
846        }
847        let mut total = 0;
848        for window in positions.windows(2) {
849            total += window[1].saturating_sub(window[0]).saturating_sub(1);
850        }
851        total
852    }
853}
854
855// ---------------------------------------------------------------------------
856// Conformal Rank Confidence
857// ---------------------------------------------------------------------------
858
859/// Confidence level for a ranking position.
860///
861/// Derived from distribution-free conformal prediction: we compute
862/// nonconformity scores (score gaps) and calibrate them against the
863/// empirical distribution of all gaps in the result set.
864///
865/// # Invariants
866///
867/// 1. `confidence` is in `[0.0, 1.0]`.
868/// 2. A gap of zero always yields `Unstable` stability.
869/// 3. Deterministic: same scores → same confidence.
870#[derive(Debug, Clone)]
871pub struct RankConfidence {
872    /// Probability that this item truly belongs at this rank position.
873    /// Computed as the fraction of score gaps that are smaller than
874    /// the gap between this item and the next.
875    pub confidence: f64,
876
877    /// Absolute score gap to the next-ranked item (0.0 if last or tied).
878    pub gap_to_next: f64,
879
880    /// Stability classification derived from gap analysis.
881    pub stability: RankStability,
882}
883
884/// Stability classification for a rank position.
885#[derive(Debug, Clone, Copy, PartialEq, Eq)]
886pub enum RankStability {
887    /// Score gap is large relative to the distribution — rank is reliable.
888    Stable,
889    /// Score gap is moderate — rank is plausible but could swap with neighbors.
890    Marginal,
891    /// Score gap is negligible — rank is essentially a tie.
892    Unstable,
893}
894
895impl RankStability {
896    /// Human-readable label.
897    pub fn label(self) -> &'static str {
898        match self {
899            Self::Stable => "stable",
900            Self::Marginal => "marginal",
901            Self::Unstable => "unstable",
902        }
903    }
904}
905
906/// Result of ranking a set of match results with conformal confidence.
907#[derive(Debug, Clone)]
908pub struct RankedResults {
909    /// Items sorted by descending score, each with rank confidence.
910    pub items: Vec<RankedItem>,
911
912    /// Summary statistics about the ranking.
913    pub summary: RankingSummary,
914}
915
916/// A single item in the ranked results.
917#[derive(Debug, Clone)]
918pub struct RankedItem {
919    /// Index into the original (pre-sort) input slice.
920    pub original_index: usize,
921
922    /// The match result.
923    pub result: MatchResult,
924
925    /// Conformal confidence for this rank position.
926    pub rank_confidence: RankConfidence,
927}
928
929/// Summary statistics for a ranked result set.
930#[derive(Debug, Clone)]
931pub struct RankingSummary {
932    /// Number of items in the ranking.
933    pub count: usize,
934
935    /// Number of items with stable rank positions.
936    pub stable_count: usize,
937
938    /// Number of tie groups (sets of items with indistinguishable scores).
939    pub tie_group_count: usize,
940
941    /// Median score gap between adjacent ranked items.
942    pub median_gap: f64,
943}
944
945/// Conformal ranker that assigns distribution-free confidence to rank positions.
946///
947/// # Method
948///
949/// Given sorted scores `s_1 ≥ s_2 ≥ ... ≥ s_n`, we define the nonconformity
950/// score for position `i` as the gap `g_i = s_i - s_{i+1}`.
951///
952/// The conformal p-value for position `i` is:
953///
954/// ```text
955/// p_i = |{j : g_j ≤ g_i}| / (n - 1)
956/// ```
957///
958/// This gives the fraction of gaps that are at most as large as this item's
959/// gap, which we interpret as confidence that the item is correctly ranked
960/// above its successor.
961///
962/// # Tie Detection
963///
964/// Two scores are considered tied when their gap is below `tie_epsilon`.
965/// The default epsilon is `1e-9`, suitable for f64 posterior probabilities.
966///
967/// # Failure Modes
968///
969/// - **All scores identical**: Every position is `Unstable` with confidence 0.
970/// - **Single item**: Confidence is 1.0 (trivially correct ranking).
971/// - **Empty input**: Returns empty results with zeroed summary.
972#[derive(Debug, Clone)]
973pub struct ConformalRanker {
974    /// Threshold below which two scores are considered tied.
975    pub tie_epsilon: f64,
976
977    /// Confidence threshold for `Stable` classification.
978    pub stable_threshold: f64,
979
980    /// Confidence threshold for `Marginal` (below this is `Unstable`).
981    pub marginal_threshold: f64,
982}
983
984impl Default for ConformalRanker {
985    fn default() -> Self {
986        Self {
987            tie_epsilon: 1e-9,
988            stable_threshold: 0.7,
989            marginal_threshold: 0.3,
990        }
991    }
992}
993
994impl ConformalRanker {
995    /// Create a ranker with default thresholds.
996    pub fn new() -> Self {
997        Self::default()
998    }
999
1000    /// Rank a set of match results and assign conformal confidence.
1001    ///
1002    /// Results are sorted by descending score. Ties are broken by
1003    /// `MatchType` (higher variant first), then by shorter title length.
1004    pub fn rank(&self, results: Vec<MatchResult>) -> RankedResults {
1005        let count = results.len();
1006
1007        if count == 0 {
1008            return RankedResults {
1009                items: Vec::new(),
1010                summary: RankingSummary {
1011                    count: 0,
1012                    stable_count: 0,
1013                    tie_group_count: 0,
1014                    median_gap: 0.0,
1015                },
1016            };
1017        }
1018
1019        // Tag each result with its original index, then sort descending by score.
1020        // Tie-break: higher MatchType variant first, then shorter match_positions
1021        // (proxy for title length).
1022        let mut indexed: Vec<(usize, MatchResult)> = results.into_iter().enumerate().collect();
1023        indexed.sort_by(|a, b| {
1024            b.1.score
1025                .partial_cmp(&a.1.score)
1026                .unwrap_or(std::cmp::Ordering::Equal)
1027                .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1028        });
1029
1030        // Compute gaps between adjacent scores.
1031        let gaps: Vec<f64> = if count > 1 {
1032            indexed
1033                .windows(2)
1034                .map(|w| (w[0].1.score - w[1].1.score).max(0.0))
1035                .collect()
1036        } else {
1037            Vec::new()
1038        };
1039
1040        // Sort gaps for computing conformal p-values (fraction of gaps ≤ g_i).
1041        let mut sorted_gaps = gaps.clone();
1042        sorted_gaps.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1043
1044        // Compute rank confidence for each position.
1045        let mut items: Vec<RankedItem> = Vec::with_capacity(count);
1046        let mut stable_count = 0;
1047        let mut tie_group_count = 0;
1048        let mut in_tie_group = false;
1049
1050        for (rank, (orig_idx, result)) in indexed.into_iter().enumerate() {
1051            let gap_to_next = gaps.get(rank).copied().unwrap_or(0.0);
1052
1053            // Conformal p-value: fraction of gaps that are ≤ this gap.
1054            let confidence = if sorted_gaps.is_empty() {
1055                // Single item: trivially ranked correctly.
1056                1.0
1057            } else {
1058                let leq_count =
1059                    sorted_gaps.partition_point(|&g| g <= gap_to_next + self.tie_epsilon * 0.5);
1060                leq_count as f64 / sorted_gaps.len() as f64
1061            };
1062
1063            let is_tie = gap_to_next < self.tie_epsilon;
1064            let stability = if is_tie {
1065                if !in_tie_group {
1066                    tie_group_count += 1;
1067                    in_tie_group = true;
1068                }
1069                RankStability::Unstable
1070            } else {
1071                in_tie_group = false;
1072                if confidence >= self.stable_threshold {
1073                    stable_count += 1;
1074                    RankStability::Stable
1075                } else if confidence >= self.marginal_threshold {
1076                    RankStability::Marginal
1077                } else {
1078                    RankStability::Unstable
1079                }
1080            };
1081
1082            items.push(RankedItem {
1083                original_index: orig_idx,
1084                result,
1085                rank_confidence: RankConfidence {
1086                    confidence,
1087                    gap_to_next,
1088                    stability,
1089                },
1090            });
1091        }
1092
1093        let median_gap = if sorted_gaps.is_empty() {
1094            0.0
1095        } else {
1096            let mid = sorted_gaps.len() / 2;
1097            if sorted_gaps.len().is_multiple_of(2) {
1098                (sorted_gaps[mid - 1] + sorted_gaps[mid]) / 2.0
1099            } else {
1100                sorted_gaps[mid]
1101            }
1102        };
1103
1104        RankedResults {
1105            items,
1106            summary: RankingSummary {
1107                count,
1108                stable_count,
1109                tie_group_count,
1110                median_gap,
1111            },
1112        }
1113    }
1114
1115    /// Convenience: rank the top-k items only.
1116    ///
1117    /// All items are still scored and sorted, but only the top `k` are
1118    /// returned (with correct confidence relative to the full set).
1119    pub fn rank_top_k(&self, results: Vec<MatchResult>, k: usize) -> RankedResults {
1120        let mut ranked = self.rank(results);
1121        ranked.items.truncate(k);
1122        // Stable count may decrease after truncation.
1123        ranked.summary.stable_count = ranked
1124            .items
1125            .iter()
1126            .filter(|item| item.rank_confidence.stability == RankStability::Stable)
1127            .count();
1128        ranked
1129    }
1130}
1131
1132impl fmt::Display for RankConfidence {
1133    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1134        write!(
1135            f,
1136            "confidence={:.3} gap={:.4} ({})",
1137            self.confidence,
1138            self.gap_to_next,
1139            self.stability.label()
1140        )
1141    }
1142}
1143
1144impl fmt::Display for RankingSummary {
1145    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1146        write!(
1147            f,
1148            "{} items, {} stable, {} tie groups, median gap {:.4}",
1149            self.count, self.stable_count, self.tie_group_count, self.median_gap
1150        )
1151    }
1152}
1153
1154// ---------------------------------------------------------------------------
1155// Incremental Scorer (bd-39y4.13)
1156// ---------------------------------------------------------------------------
1157
1158/// Cached entry from a previous scoring pass.
1159#[derive(Debug, Clone)]
1160#[allow(dead_code)]
1161struct CachedEntry {
1162    /// Index into the corpus.
1163    corpus_index: usize,
1164}
1165
1166/// Incremental scorer that caches results across keystrokes.
1167///
1168/// When the user types one more character, the new query is a prefix-extension
1169/// of the old query. Items that didn't match the shorter query can't match the
1170/// longer one (monotonicity of substring/prefix/fuzzy matching), so we skip them.
1171///
1172/// # Invariants
1173///
1174/// 1. **Correctness**: Incremental results are identical to a full rescan.
1175///    Adding characters can only reduce or maintain the match set, never expand it.
1176/// 2. **Determinism**: Same (query, corpus) → identical results.
1177/// 3. **Cache coherence**: Cache is invalidated when corpus changes or query
1178///    does not extend the previous query.
1179///
1180/// # Performance Model
1181///
1182/// Let `N` = corpus size, `M` = number of matches for the previous query.
1183/// - Full scan: O(N × L) where L is average title length.
1184/// - Incremental (query extends previous): O(M × L) where M ≤ N.
1185/// - Typical command palettes have M ≪ N after 2-3 characters.
1186///
1187/// # Failure Modes
1188///
1189/// - **Corpus mutation**: If the corpus changes between calls, results may be
1190///   stale. Call `invalidate()` or let the generation counter detect it.
1191/// - **Non-extending query** (e.g., backspace): Falls back to full scan.
1192///   This is correct but loses the incremental speedup.
1193#[derive(Debug)]
1194pub struct IncrementalScorer {
1195    /// Underlying scorer.
1196    scorer: BayesianScorer,
1197    /// Previous query string.
1198    prev_query: String,
1199    /// Cached matching entries from the previous pass.
1200    cache: Vec<CachedEntry>,
1201    /// Generation counter for corpus change detection.
1202    corpus_generation: u64,
1203    /// Number of items in the corpus at cache time.
1204    corpus_len: usize,
1205    /// Statistics for diagnostics.
1206    stats: IncrementalStats,
1207}
1208
1209/// Diagnostics for incremental scoring performance.
1210#[derive(Debug, Clone, Default)]
1211pub struct IncrementalStats {
1212    /// Number of full rescans performed.
1213    pub full_scans: u64,
1214    /// Number of incremental (pruned) scans performed.
1215    pub incremental_scans: u64,
1216    /// Total items evaluated across all scans.
1217    pub total_evaluated: u64,
1218    /// Total items pruned (skipped) across incremental scans.
1219    pub total_pruned: u64,
1220}
1221
1222impl IncrementalStats {
1223    /// Fraction of evaluations saved by incremental scoring.
1224    pub fn prune_ratio(&self) -> f64 {
1225        let total = self.total_evaluated + self.total_pruned;
1226        if total == 0 {
1227            0.0
1228        } else {
1229            self.total_pruned as f64 / total as f64
1230        }
1231    }
1232}
1233
1234impl IncrementalScorer {
1235    /// Create a new incremental scorer (evidence tracking disabled for speed).
1236    pub fn new() -> Self {
1237        Self {
1238            scorer: BayesianScorer::fast(),
1239            prev_query: String::new(),
1240            cache: Vec::new(),
1241            corpus_generation: 0,
1242            corpus_len: 0,
1243            stats: IncrementalStats::default(),
1244        }
1245    }
1246
1247    /// Create with explicit scorer configuration.
1248    pub fn with_scorer(scorer: BayesianScorer) -> Self {
1249        Self {
1250            scorer,
1251            prev_query: String::new(),
1252            cache: Vec::new(),
1253            corpus_generation: 0,
1254            corpus_len: 0,
1255            stats: IncrementalStats::default(),
1256        }
1257    }
1258
1259    /// Invalidate the cache (e.g., when corpus changes).
1260    pub fn invalidate(&mut self) {
1261        self.prev_query.clear();
1262        self.cache.clear();
1263        self.corpus_generation = self.corpus_generation.wrapping_add(1);
1264    }
1265
1266    /// Get diagnostic statistics.
1267    pub fn stats(&self) -> &IncrementalStats {
1268        &self.stats
1269    }
1270
1271    /// Score a query against a corpus, using cached results when possible.
1272    ///
1273    /// Returns indices into the corpus and their match results, sorted by
1274    /// descending score. Only items with score > 0 are returned.
1275    ///
1276    /// # Arguments
1277    ///
1278    /// * `query` - The current search query.
1279    /// * `corpus` - Slice of title strings to search.
1280    /// * `generation` - Optional generation counter; if it differs from the
1281    ///   cached value, the cache is invalidated.
1282    pub fn score_corpus(
1283        &mut self,
1284        query: &str,
1285        corpus: &[&str],
1286        generation: Option<u64>,
1287    ) -> Vec<(usize, MatchResult)> {
1288        // Detect corpus changes.
1289        let generation_val = generation.unwrap_or(self.corpus_generation);
1290        if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1291            self.invalidate();
1292            self.corpus_generation = generation_val;
1293            self.corpus_len = corpus.len();
1294        }
1295
1296        // Determine if we can use incremental scoring.
1297        let can_prune = !self.prev_query.is_empty()
1298            && query.starts_with(&self.prev_query)
1299            && !self.cache.is_empty();
1300
1301        let query_lower = query.to_lowercase();
1302        let results = if can_prune {
1303            self.score_incremental(query, &query_lower, corpus)
1304        } else {
1305            self.score_full(query, &query_lower, corpus)
1306        };
1307
1308        // Update cache state.
1309        self.prev_query.clear();
1310        self.prev_query.push_str(query);
1311        self.cache = results
1312            .iter()
1313            .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1314            .collect();
1315
1316        results
1317    }
1318
1319    /// Score a query against a corpus with pre-lowercased titles.
1320    ///
1321    /// `corpus_lower` must align 1:1 with `corpus`.
1322    pub fn score_corpus_with_lowered(
1323        &mut self,
1324        query: &str,
1325        corpus: &[&str],
1326        corpus_lower: &[&str],
1327        generation: Option<u64>,
1328    ) -> Vec<(usize, MatchResult)> {
1329        debug_assert_eq!(
1330            corpus.len(),
1331            corpus_lower.len(),
1332            "corpus_lower must match corpus length"
1333        );
1334
1335        // Detect corpus changes.
1336        let generation_val = generation.unwrap_or(self.corpus_generation);
1337        if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1338            self.invalidate();
1339            self.corpus_generation = generation_val;
1340            self.corpus_len = corpus.len();
1341        }
1342
1343        // Determine if we can use incremental scoring.
1344        let can_prune = !self.prev_query.is_empty()
1345            && query.starts_with(&self.prev_query)
1346            && !self.cache.is_empty();
1347
1348        let query_lower = query.to_lowercase();
1349        let results = if can_prune {
1350            self.score_incremental_lowered(query, &query_lower, corpus, corpus_lower)
1351        } else {
1352            self.score_full_lowered(query, &query_lower, corpus, corpus_lower)
1353        };
1354
1355        // Update cache state.
1356        self.prev_query.clear();
1357        self.prev_query.push_str(query);
1358        self.cache = results
1359            .iter()
1360            .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1361            .collect();
1362
1363        results
1364    }
1365
1366    /// Score a query against a corpus with pre-lowercased titles and word-start cache.
1367    ///
1368    /// `corpus_lower` and `word_starts` must align 1:1 with `corpus`.
1369    pub fn score_corpus_with_lowered_and_words(
1370        &mut self,
1371        query: &str,
1372        corpus: &[String],
1373        corpus_lower: &[String],
1374        word_starts: &[Vec<usize>],
1375        generation: Option<u64>,
1376    ) -> Vec<(usize, MatchResult)> {
1377        debug_assert_eq!(
1378            corpus.len(),
1379            corpus_lower.len(),
1380            "corpus_lower must match corpus length"
1381        );
1382        debug_assert_eq!(
1383            corpus.len(),
1384            word_starts.len(),
1385            "word_starts must match corpus length"
1386        );
1387
1388        // Detect corpus changes.
1389        let generation_val = generation.unwrap_or(self.corpus_generation);
1390        if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
1391            self.invalidate();
1392            self.corpus_generation = generation_val;
1393            self.corpus_len = corpus.len();
1394        }
1395
1396        // Determine if we can use incremental scoring.
1397        let can_prune = !self.prev_query.is_empty()
1398            && query.starts_with(&self.prev_query)
1399            && !self.cache.is_empty();
1400
1401        let query_lower = query.to_lowercase();
1402        let results = if can_prune {
1403            self.score_incremental_lowered_with_words(
1404                query,
1405                &query_lower,
1406                corpus,
1407                corpus_lower,
1408                word_starts,
1409            )
1410        } else {
1411            self.score_full_lowered_with_words(
1412                query,
1413                &query_lower,
1414                corpus,
1415                corpus_lower,
1416                word_starts,
1417            )
1418        };
1419
1420        // Update cache state.
1421        self.prev_query.clear();
1422        self.prev_query.push_str(query);
1423        self.cache = results
1424            .iter()
1425            .map(|(idx, _result)| CachedEntry { corpus_index: *idx })
1426            .collect();
1427
1428        results
1429    }
1430
1431    /// Full scan: score every item in the corpus.
1432    fn score_full(
1433        &mut self,
1434        query: &str,
1435        query_lower: &str,
1436        corpus: &[&str],
1437    ) -> Vec<(usize, MatchResult)> {
1438        self.stats.full_scans += 1;
1439        self.stats.total_evaluated += corpus.len() as u64;
1440
1441        let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1442        for (i, title) in corpus.iter().enumerate() {
1443            let result = self
1444                .scorer
1445                .score_with_query_lower(query, query_lower, title);
1446            if result.score > 0.0 {
1447                results.push((i, result));
1448            }
1449        }
1450
1451        results.sort_unstable_by(compare_ranked_match_results);
1452
1453        results
1454    }
1455
1456    /// Full scan with pre-lowercased titles.
1457    fn score_full_lowered(
1458        &mut self,
1459        query: &str,
1460        query_lower: &str,
1461        corpus: &[&str],
1462        corpus_lower: &[&str],
1463    ) -> Vec<(usize, MatchResult)> {
1464        self.stats.full_scans += 1;
1465        self.stats.total_evaluated += corpus.len() as u64;
1466
1467        let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1468        for (i, (title, title_lower)) in corpus.iter().zip(corpus_lower.iter()).enumerate() {
1469            let result =
1470                self.scorer
1471                    .score_with_lowered_title(query, query_lower, title, title_lower);
1472            if result.score > 0.0 {
1473                results.push((i, result));
1474            }
1475        }
1476
1477        results.sort_unstable_by(compare_ranked_match_results);
1478
1479        results
1480    }
1481
1482    /// Full scan with pre-lowercased titles and word-start cache.
1483    fn score_full_lowered_with_words(
1484        &mut self,
1485        query: &str,
1486        query_lower: &str,
1487        corpus: &[String],
1488        corpus_lower: &[String],
1489        word_starts: &[Vec<usize>],
1490    ) -> Vec<(usize, MatchResult)> {
1491        self.stats.full_scans += 1;
1492        self.stats.total_evaluated += corpus.len() as u64;
1493
1494        let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
1495        for (i, ((title, title_lower), starts)) in corpus
1496            .iter()
1497            .zip(corpus_lower.iter())
1498            .zip(word_starts.iter())
1499            .enumerate()
1500        {
1501            let result = self.scorer.score_with_lowered_title_and_words(
1502                query,
1503                query_lower,
1504                title,
1505                title_lower,
1506                Some(starts),
1507            );
1508            if result.score > 0.0 {
1509                results.push((i, result));
1510            }
1511        }
1512
1513        results.sort_unstable_by(compare_ranked_match_results);
1514
1515        results
1516    }
1517    /// Incremental scan: only re-score items that previously matched.
1518    fn score_incremental(
1519        &mut self,
1520        query: &str,
1521        query_lower: &str,
1522        corpus: &[&str],
1523    ) -> Vec<(usize, MatchResult)> {
1524        self.stats.incremental_scans += 1;
1525
1526        let prev_match_count = self.cache.len();
1527        let pruned = corpus.len().saturating_sub(prev_match_count);
1528        self.stats.total_pruned += pruned as u64;
1529        self.stats.total_evaluated += prev_match_count as u64;
1530
1531        let mut results: Vec<(usize, MatchResult)> =
1532            Vec::with_capacity(self.cache.len().min(corpus.len()));
1533        for entry in &self.cache {
1534            if entry.corpus_index < corpus.len() {
1535                let result = self.scorer.score_with_query_lower(
1536                    query,
1537                    query_lower,
1538                    corpus[entry.corpus_index],
1539                );
1540                if result.score > 0.0 {
1541                    results.push((entry.corpus_index, result));
1542                }
1543            }
1544        }
1545
1546        results.sort_unstable_by(compare_ranked_match_results);
1547
1548        results
1549    }
1550
1551    /// Incremental scan with pre-lowercased titles.
1552    fn score_incremental_lowered(
1553        &mut self,
1554        query: &str,
1555        query_lower: &str,
1556        corpus: &[&str],
1557        corpus_lower: &[&str],
1558    ) -> Vec<(usize, MatchResult)> {
1559        self.stats.incremental_scans += 1;
1560
1561        let prev_match_count = self.cache.len();
1562        let pruned = corpus.len().saturating_sub(prev_match_count);
1563        self.stats.total_pruned += pruned as u64;
1564        self.stats.total_evaluated += prev_match_count as u64;
1565
1566        let mut results: Vec<(usize, MatchResult)> =
1567            Vec::with_capacity(self.cache.len().min(corpus.len()));
1568        for entry in &self.cache {
1569            if entry.corpus_index < corpus.len() {
1570                let title = corpus[entry.corpus_index];
1571                let title_lower = corpus_lower[entry.corpus_index];
1572                let result =
1573                    self.scorer
1574                        .score_with_lowered_title(query, query_lower, title, title_lower);
1575                if result.score > 0.0 {
1576                    results.push((entry.corpus_index, result));
1577                }
1578            }
1579        }
1580
1581        results.sort_unstable_by(compare_ranked_match_results);
1582
1583        results
1584    }
1585
1586    /// Incremental scan with pre-lowercased titles and word-start cache.
1587    fn score_incremental_lowered_with_words(
1588        &mut self,
1589        query: &str,
1590        query_lower: &str,
1591        corpus: &[String],
1592        corpus_lower: &[String],
1593        word_starts: &[Vec<usize>],
1594    ) -> Vec<(usize, MatchResult)> {
1595        self.stats.incremental_scans += 1;
1596
1597        let prev_match_count = self.cache.len();
1598        let pruned = corpus.len().saturating_sub(prev_match_count);
1599        self.stats.total_pruned += pruned as u64;
1600        self.stats.total_evaluated += prev_match_count as u64;
1601
1602        let mut results: Vec<(usize, MatchResult)> =
1603            Vec::with_capacity(self.cache.len().min(corpus.len()));
1604        for entry in &self.cache {
1605            if entry.corpus_index < corpus.len() {
1606                let title = &corpus[entry.corpus_index];
1607                let title_lower = &corpus_lower[entry.corpus_index];
1608                let starts = &word_starts[entry.corpus_index];
1609                let result = self.scorer.score_with_lowered_title_and_words(
1610                    query,
1611                    query_lower,
1612                    title,
1613                    title_lower,
1614                    Some(starts),
1615                );
1616                if result.score > 0.0 {
1617                    results.push((entry.corpus_index, result));
1618                }
1619            }
1620        }
1621
1622        results.sort_by(|a, b| {
1623            b.1.score
1624                .partial_cmp(&a.1.score)
1625                .unwrap_or(std::cmp::Ordering::Equal)
1626                .then_with(|| b.1.match_type.cmp(&a.1.match_type))
1627        });
1628
1629        results
1630    }
1631}
1632
1633impl Default for IncrementalScorer {
1634    fn default() -> Self {
1635        Self::new()
1636    }
1637}
1638
1639// ---------------------------------------------------------------------------
1640// Tests
1641// ---------------------------------------------------------------------------
1642
1643#[cfg(test)]
1644mod tests {
1645    use super::*;
1646
1647    // --- Match Type Tests ---
1648
1649    #[test]
1650    fn exact_match_highest_score() {
1651        let scorer = BayesianScorer::new();
1652        let result = scorer.score("Settings", "Settings");
1653        assert_eq!(result.match_type, MatchType::Exact);
1654        assert!(result.score > 0.95, "Exact match should score > 0.95");
1655    }
1656
1657    #[test]
1658    fn prefix_match_high_score() {
1659        let scorer = BayesianScorer::new();
1660        let result = scorer.score("set", "Settings");
1661        assert_eq!(result.match_type, MatchType::Prefix);
1662        assert!(result.score > 0.85, "Prefix match should score > 0.85");
1663    }
1664
1665    #[test]
1666    fn word_start_match_score() {
1667        let scorer = BayesianScorer::new();
1668        let result = scorer.score("gd", "Go Dashboard");
1669        assert_eq!(result.match_type, MatchType::WordStart);
1670        assert!(result.score > 0.75, "Word-start should score > 0.75");
1671    }
1672
1673    #[test]
1674    fn substring_match_moderate_score() {
1675        let scorer = BayesianScorer::new();
1676        let result = scorer.score("set", "Asset Manager");
1677        assert_eq!(result.match_type, MatchType::Substring);
1678        assert!(result.score > 0.5, "Substring should score > 0.5");
1679    }
1680
1681    #[test]
1682    fn fuzzy_match_low_score() {
1683        let scorer = BayesianScorer::new();
1684        let result = scorer.score("stg", "Settings");
1685        assert_eq!(result.match_type, MatchType::Fuzzy);
1686        assert!(result.score > 0.2, "Fuzzy should score > 0.2");
1687        assert!(result.score < 0.7, "Fuzzy should score < 0.7");
1688    }
1689
1690    #[test]
1691    fn no_match_returns_zero() {
1692        let scorer = BayesianScorer::new();
1693        let result = scorer.score("xyz", "Settings");
1694        assert_eq!(result.match_type, MatchType::NoMatch);
1695        assert_eq!(result.score, 0.0);
1696    }
1697
1698    // --- Internal ASCII Matcher Edge Cases ---
1699
1700    #[test]
1701    fn word_start_match_ascii_empty_query_matches() {
1702        let scorer = BayesianScorer::new();
1703        let positions = scorer
1704            .word_start_match_ascii(b"", b"go dashboard", None)
1705            .expect("empty query should match");
1706        assert!(positions.is_empty());
1707    }
1708
1709    #[test]
1710    fn word_start_match_ascii_skips_out_of_bounds_precomputed_positions() {
1711        let scorer = BayesianScorer::new();
1712        let starts = [999, 0, 3];
1713        let positions = scorer
1714            .word_start_match_ascii(b"gd", b"go dashboard", Some(&starts))
1715            .expect("should still match despite out-of-bounds precomputed starts");
1716        assert_eq!(positions, vec![0, 3]);
1717    }
1718
1719    #[test]
1720    fn fuzzy_match_ascii_empty_query_matches() {
1721        let scorer = BayesianScorer::new();
1722        let positions = scorer
1723            .fuzzy_match_ascii(b"", b"settings")
1724            .expect("empty query should match");
1725        assert!(positions.is_empty());
1726    }
1727
1728    // --- Score Invariants ---
1729
1730    #[test]
1731    fn score_bounded() {
1732        let scorer = BayesianScorer::new();
1733        let test_cases = [
1734            ("a", "abcdefghijklmnop"),
1735            ("full", "full"),
1736            ("", "anything"),
1737            ("xyz", "abc"),
1738            ("stg", "Settings"),
1739        ];
1740
1741        for (query, title) in test_cases {
1742            let result = scorer.score(query, title);
1743            assert!(
1744                result.score >= 0.0 && result.score <= 1.0,
1745                "Score for ({}, {}) = {} not in [0, 1]",
1746                query,
1747                title,
1748                result.score
1749            );
1750        }
1751    }
1752
1753    #[test]
1754    fn score_deterministic() {
1755        let scorer = BayesianScorer::new();
1756        let result1 = scorer.score("nav", "Navigation");
1757        let result2 = scorer.score("nav", "Navigation");
1758        assert!(
1759            (result1.score - result2.score).abs() < f64::EPSILON,
1760            "Same input should produce identical scores"
1761        );
1762    }
1763
1764    #[test]
1765    fn tiebreak_shorter_first() {
1766        let scorer = BayesianScorer::new();
1767        let short = scorer.score("set", "Set");
1768        let long = scorer.score("set", "Settings");
1769        assert!(
1770            short.score >= long.score,
1771            "Shorter title should score >= longer: {} vs {}",
1772            short.score,
1773            long.score
1774        );
1775    }
1776
1777    // --- Case Insensitivity ---
1778
1779    #[test]
1780    fn case_insensitive() {
1781        let scorer = BayesianScorer::new();
1782        let lower = scorer.score("set", "Settings");
1783        let upper = scorer.score("SET", "Settings");
1784        assert!(
1785            (lower.score - upper.score).abs() < f64::EPSILON,
1786            "Case should not affect score"
1787        );
1788    }
1789
1790    // --- Match Positions ---
1791
1792    #[test]
1793    fn match_positions_correct() {
1794        let scorer = BayesianScorer::new();
1795        let result = scorer.score("gd", "Go Dashboard");
1796        assert_eq!(result.match_positions, vec![0, 3]);
1797    }
1798
1799    #[test]
1800    fn fuzzy_match_positions() {
1801        let scorer = BayesianScorer::new();
1802        let result = scorer.score("stg", "Settings");
1803        // s(0), t(3), g(6)
1804        assert_eq!(result.match_positions.len(), 3);
1805        assert_eq!(result.match_positions[0], 0); // 's'
1806    }
1807
1808    // --- Empty Query ---
1809
1810    #[test]
1811    fn empty_query_returns_all() {
1812        let scorer = BayesianScorer::new();
1813        let result = scorer.score("", "Any Command");
1814        assert!(result.score > 0.0, "Empty query should have positive score");
1815        assert!(result.score < 1.0, "Empty query should not be max score");
1816    }
1817
1818    #[test]
1819    fn empty_title_is_safe() {
1820        let scorer = BayesianScorer::new();
1821        let result = scorer.score("x", "");
1822        assert_eq!(result.match_type, MatchType::NoMatch);
1823        assert!(result.score.is_finite(), "Score should remain finite");
1824    }
1825
1826    #[test]
1827    fn empty_query_empty_title_is_finite() {
1828        let scorer = BayesianScorer::new();
1829        let result = scorer.score("", "");
1830        assert!(result.score.is_finite(), "Score should remain finite");
1831        assert_eq!(result.match_type, MatchType::Fuzzy);
1832    }
1833
1834    // --- Query Longer Than Title ---
1835
1836    #[test]
1837    fn query_longer_than_title() {
1838        let scorer = BayesianScorer::new();
1839        let result = scorer.score("verylongquery", "short");
1840        assert_eq!(result.match_type, MatchType::NoMatch);
1841        assert_eq!(result.score, 0.0);
1842    }
1843
1844    #[test]
1845    fn long_query_exact_match_is_handled() {
1846        let scorer = BayesianScorer::new();
1847        let query = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
1848        let result = scorer.score(query, query);
1849        assert_eq!(result.match_type, MatchType::Exact);
1850        assert!(result.score.is_finite());
1851        assert!(result.score > 0.9, "Exact long query should score high");
1852    }
1853
1854    #[test]
1855    fn gap_penalty_prefers_tight_fuzzy_matches() {
1856        let scorer = BayesianScorer::new();
1857        let tight = scorer.score("ace", "abcde");
1858        let gappy = scorer.score("ace", "a...c...e");
1859        assert_eq!(tight.match_type, MatchType::Fuzzy);
1860        assert_eq!(gappy.match_type, MatchType::Fuzzy);
1861        assert!(
1862            tight.score > gappy.score,
1863            "Tight fuzzy match should score higher than gappy: {} vs {}",
1864            tight.score,
1865            gappy.score
1866        );
1867    }
1868
1869    // --- Tag Matching ---
1870
1871    #[test]
1872    fn tag_match_boosts_score() {
1873        let scorer = BayesianScorer::new();
1874        // Use a query that matches the title (fuzzy)
1875        let without_tag = scorer.score("set", "Settings");
1876        let with_tag = scorer.score_with_tags("set", "Settings", &["config", "setup"]);
1877        // Tag "setup" contains "set", so it should boost the score
1878        assert!(
1879            with_tag.score > without_tag.score,
1880            "Tag match should boost score: {} > {}",
1881            with_tag.score,
1882            without_tag.score
1883        );
1884    }
1885
1886    // --- Evidence Ledger ---
1887
1888    #[test]
1889    fn evidence_ledger_tracks_factors() {
1890        let scorer = BayesianScorer::new();
1891        let result = scorer.score("set", "Settings");
1892
1893        assert!(!result.evidence.entries().is_empty());
1894
1895        // Should have match type entry
1896        assert!(
1897            result
1898                .evidence
1899                .entries()
1900                .iter()
1901                .any(|e| e.kind == EvidenceKind::MatchType)
1902        );
1903    }
1904
1905    #[test]
1906    fn evidence_ledger_display() {
1907        let scorer = BayesianScorer::new();
1908        let result = scorer.score("gd", "Go Dashboard");
1909        let display = format!("{}", result.evidence);
1910        assert!(display.contains("Evidence Ledger"));
1911        assert!(display.contains("Posterior P:"));
1912    }
1913
1914    // --- Property Tests ---
1915
1916    #[test]
1917    fn property_ordering_total() {
1918        let scorer = BayesianScorer::new();
1919        let titles = ["Settings", "Set Theme", "Reset View", "Asset"];
1920
1921        let mut scores: Vec<(f64, &str)> = titles
1922            .iter()
1923            .map(|t| (scorer.score("set", t).score, *t))
1924            .collect();
1925
1926        // Sort should be stable and total
1927        scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
1928
1929        // Verify no NaN or infinite scores
1930        for (score, _) in &scores {
1931            assert!(score.is_finite());
1932        }
1933    }
1934
1935    #[test]
1936    fn property_prefix_monotonic() {
1937        let scorer = BayesianScorer::new();
1938        // Longer exact prefix match should score higher
1939        let one_char = scorer.score("s", "Settings");
1940        let three_char = scorer.score("set", "Settings");
1941        // Both are prefix matches, longer should be better
1942        assert!(
1943            three_char.score >= one_char.score,
1944            "Longer prefix should score >= shorter"
1945        );
1946    }
1947
1948    // --- Match Type Prior Odds ---
1949
1950    #[test]
1951    fn match_type_prior_ordering() {
1952        assert!(MatchType::Exact.prior_odds() > MatchType::Prefix.prior_odds());
1953        assert!(MatchType::Prefix.prior_odds() > MatchType::WordStart.prior_odds());
1954        assert!(MatchType::WordStart.prior_odds() > MatchType::Substring.prior_odds());
1955        assert!(MatchType::Substring.prior_odds() > MatchType::Fuzzy.prior_odds());
1956        assert!(MatchType::Fuzzy.prior_odds() > MatchType::NoMatch.prior_odds());
1957    }
1958
1959    // -----------------------------------------------------------------------
1960    // Conformal Ranker Tests
1961    // -----------------------------------------------------------------------
1962
1963    #[test]
1964    fn conformal_tie_breaks_by_match_type() {
1965        let mut exact = MatchResult::no_match();
1966        exact.score = 0.5;
1967        exact.match_type = MatchType::Exact;
1968
1969        let mut prefix = MatchResult::no_match();
1970        prefix.score = 0.5;
1971        prefix.match_type = MatchType::Prefix;
1972
1973        let mut word_start = MatchResult::no_match();
1974        word_start.score = 0.5;
1975        word_start.match_type = MatchType::WordStart;
1976
1977        let mut substring = MatchResult::no_match();
1978        substring.score = 0.5;
1979        substring.match_type = MatchType::Substring;
1980
1981        let ranker = ConformalRanker::new();
1982        let ranked = ranker.rank(vec![substring, word_start, prefix, exact]);
1983        let order: Vec<MatchType> = ranked
1984            .items
1985            .iter()
1986            .map(|item| item.result.match_type)
1987            .collect();
1988
1989        assert_eq!(
1990            order,
1991            vec![
1992                MatchType::Exact,
1993                MatchType::Prefix,
1994                MatchType::WordStart,
1995                MatchType::Substring
1996            ]
1997        );
1998    }
1999
2000    #[test]
2001    fn conformal_empty_input() {
2002        let ranker = ConformalRanker::new();
2003        let ranked = ranker.rank(Vec::new());
2004        assert_eq!(ranked.items.len(), 0);
2005        assert_eq!(ranked.summary.count, 0);
2006        assert_eq!(ranked.summary.stable_count, 0);
2007        assert_eq!(ranked.summary.tie_group_count, 0);
2008        assert_eq!(ranked.summary.median_gap, 0.0);
2009    }
2010
2011    #[test]
2012    fn conformal_single_item() {
2013        let scorer = BayesianScorer::new();
2014        let results = vec![scorer.score("set", "Settings")];
2015        let ranker = ConformalRanker::new();
2016        let ranked = ranker.rank(results);
2017
2018        assert_eq!(ranked.items.len(), 1);
2019        assert_eq!(ranked.items[0].rank_confidence.confidence, 1.0);
2020        assert_eq!(ranked.summary.count, 1);
2021    }
2022
2023    #[test]
2024    fn conformal_sorted_descending() {
2025        let scorer = BayesianScorer::new();
2026        let results = vec![
2027            scorer.score("set", "Settings"),
2028            scorer.score("set", "Asset Manager"),
2029            scorer.score("set", "Reset Configuration Panel"),
2030        ];
2031        let ranker = ConformalRanker::new();
2032        let ranked = ranker.rank(results);
2033
2034        // Verify descending score order.
2035        for w in ranked.items.windows(2) {
2036            let a = w[0].result.score;
2037            let b = w[1].result.score;
2038            assert!(a >= b, "Items should be sorted descending: {a} >= {b}");
2039        }
2040    }
2041
2042    #[test]
2043    fn conformal_confidence_bounded() {
2044        let scorer = BayesianScorer::new();
2045        let titles = [
2046            "Settings",
2047            "Set Theme",
2048            "Asset Manager",
2049            "Reset View",
2050            "Offset Tool",
2051            "Test Suite",
2052        ];
2053        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2054        let ranker = ConformalRanker::new();
2055        let ranked = ranker.rank(results);
2056
2057        for item in &ranked.items {
2058            assert!(
2059                item.rank_confidence.confidence >= 0.0 && item.rank_confidence.confidence <= 1.0,
2060                "Confidence must be in [0, 1], got {}",
2061                item.rank_confidence.confidence
2062            );
2063            assert!(
2064                item.rank_confidence.gap_to_next >= 0.0,
2065                "Gap must be non-negative"
2066            );
2067        }
2068    }
2069
2070    #[test]
2071    fn conformal_deterministic() {
2072        let scorer = BayesianScorer::new();
2073        let titles = ["Settings", "Set Theme", "Asset", "Reset"];
2074
2075        let results1: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2076        let results2: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2077
2078        let ranker = ConformalRanker::new();
2079        let ranked1 = ranker.rank(results1);
2080        let ranked2 = ranker.rank(results2);
2081
2082        for (a, b) in ranked1.items.iter().zip(ranked2.items.iter()) {
2083            assert!(
2084                (a.rank_confidence.confidence - b.rank_confidence.confidence).abs() < f64::EPSILON,
2085                "Confidence should be deterministic"
2086            );
2087            assert_eq!(
2088                a.original_index, b.original_index,
2089                "Rank order should be deterministic"
2090            );
2091        }
2092    }
2093
2094    #[test]
2095    fn conformal_ties_detected() {
2096        // Create items with identical scores.
2097        let mut r1 = MatchResult::no_match();
2098        r1.score = 0.8;
2099        r1.match_type = MatchType::Prefix;
2100        let mut r2 = MatchResult::no_match();
2101        r2.score = 0.8;
2102        r2.match_type = MatchType::Prefix;
2103        let mut r3 = MatchResult::no_match();
2104        r3.score = 0.5;
2105        r3.match_type = MatchType::Substring;
2106
2107        let ranker = ConformalRanker::new();
2108        let ranked = ranker.rank(vec![r1, r2, r3]);
2109
2110        // The first two have identical scores — their gap is 0 → unstable.
2111        assert_eq!(
2112            ranked.items[0].rank_confidence.stability,
2113            RankStability::Unstable,
2114            "Tied items should be Unstable"
2115        );
2116        assert!(
2117            ranked.summary.tie_group_count >= 1,
2118            "Should detect at least one tie group"
2119        );
2120    }
2121
2122    #[test]
2123    fn conformal_all_identical_scores() {
2124        let mut results = Vec::new();
2125        for _ in 0..5 {
2126            let mut r = MatchResult::no_match();
2127            r.score = 0.5;
2128            r.match_type = MatchType::Fuzzy;
2129            results.push(r);
2130        }
2131
2132        let ranker = ConformalRanker::new();
2133        let ranked = ranker.rank(results);
2134
2135        // All gaps are zero → all unstable.
2136        for item in &ranked.items {
2137            assert_eq!(item.rank_confidence.stability, RankStability::Unstable);
2138        }
2139    }
2140
2141    #[test]
2142    fn conformal_well_separated_scores_are_stable() {
2143        let mut results = Vec::new();
2144        // Scores well spread out: 0.9, 0.6, 0.3, 0.1.
2145        for &s in &[0.9, 0.6, 0.3, 0.1] {
2146            let mut r = MatchResult::no_match();
2147            r.score = s;
2148            r.match_type = MatchType::Prefix;
2149            results.push(r);
2150        }
2151
2152        let ranker = ConformalRanker::new();
2153        let ranked = ranker.rank(results);
2154
2155        // With well-separated scores, most should be stable.
2156        assert!(
2157            ranked.summary.stable_count >= 2,
2158            "Well-separated scores should yield stable positions, got {}",
2159            ranked.summary.stable_count
2160        );
2161    }
2162
2163    #[test]
2164    fn conformal_top_k_truncates() {
2165        let scorer = BayesianScorer::new();
2166        let titles = [
2167            "Settings",
2168            "Set Theme",
2169            "Asset Manager",
2170            "Reset View",
2171            "Offset Tool",
2172        ];
2173        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2174
2175        let ranker = ConformalRanker::new();
2176        let ranked = ranker.rank_top_k(results, 2);
2177
2178        assert_eq!(ranked.items.len(), 2);
2179        // Top-k items should still have confidence from the full ranking.
2180        assert!(ranked.items[0].rank_confidence.confidence > 0.0);
2181    }
2182
2183    #[test]
2184    fn conformal_original_indices_preserved() {
2185        let scorer = BayesianScorer::new();
2186        let titles = ["Zebra Tool", "Settings", "Apple"];
2187        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2188
2189        let ranker = ConformalRanker::new();
2190        let ranked = ranker.rank(results);
2191
2192        // "Settings" (index 1) should be ranked first (prefix match).
2193        assert_eq!(
2194            ranked.items[0].original_index, 1,
2195            "Settings should be first; original_index should be 1"
2196        );
2197    }
2198
2199    #[test]
2200    fn conformal_summary_display() {
2201        let scorer = BayesianScorer::new();
2202        let results = vec![
2203            scorer.score("set", "Settings"),
2204            scorer.score("set", "Set Theme"),
2205        ];
2206        let ranker = ConformalRanker::new();
2207        let ranked = ranker.rank(results);
2208
2209        let display = format!("{}", ranked.summary);
2210        assert!(display.contains("2 items"));
2211    }
2212
2213    #[test]
2214    fn conformal_rank_confidence_display() {
2215        let rc = RankConfidence {
2216            confidence: 0.85,
2217            gap_to_next: 0.1234,
2218            stability: RankStability::Stable,
2219        };
2220        let display = format!("{}", rc);
2221        assert!(display.contains("0.850"));
2222        assert!(display.contains("stable"));
2223    }
2224
2225    #[test]
2226    fn conformal_stability_labels() {
2227        assert_eq!(RankStability::Stable.label(), "stable");
2228        assert_eq!(RankStability::Marginal.label(), "marginal");
2229        assert_eq!(RankStability::Unstable.label(), "unstable");
2230    }
2231
2232    // --- Property: gap_to_next of last item is always 0 ---
2233
2234    #[test]
2235    fn conformal_last_item_gap_zero() {
2236        let scorer = BayesianScorer::new();
2237        let results = vec![
2238            scorer.score("set", "Settings"),
2239            scorer.score("set", "Asset"),
2240            scorer.score("set", "Reset"),
2241        ];
2242        let ranker = ConformalRanker::new();
2243        let ranked = ranker.rank(results);
2244
2245        let last = ranked.items.last().unwrap();
2246        assert_eq!(
2247            last.rank_confidence.gap_to_next, 0.0,
2248            "Last item gap should be 0"
2249        );
2250    }
2251
2252    // --- Property: median_gap is non-negative ---
2253
2254    #[test]
2255    fn conformal_median_gap_non_negative() {
2256        let scorer = BayesianScorer::new();
2257        let titles = [
2258            "Settings",
2259            "Set Theme",
2260            "Asset Manager",
2261            "Reset View",
2262            "Offset Tool",
2263            "Test Suite",
2264            "System Settings",
2265            "Reset Defaults",
2266        ];
2267        let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
2268        let ranker = ConformalRanker::new();
2269        let ranked = ranker.rank(results);
2270
2271        assert!(
2272            ranked.summary.median_gap >= 0.0,
2273            "Median gap must be non-negative"
2274        );
2275    }
2276
2277    // -----------------------------------------------------------------------
2278    // IncrementalScorer Tests (bd-39y4.13)
2279    // -----------------------------------------------------------------------
2280
2281    fn test_corpus() -> Vec<&'static str> {
2282        vec![
2283            "Open File",
2284            "Save File",
2285            "Close Tab",
2286            "Git: Commit",
2287            "Git: Push",
2288            "Git: Pull",
2289            "Go to Line",
2290            "Find in Files",
2291            "Toggle Terminal",
2292            "Format Document",
2293        ]
2294    }
2295
2296    #[test]
2297    fn incremental_full_scan_on_first_call() {
2298        let mut scorer = IncrementalScorer::new();
2299        let corpus = test_corpus();
2300        let results = scorer.score_corpus("git", &corpus, None);
2301
2302        assert!(!results.is_empty(), "Should find matches for 'git'");
2303        assert_eq!(scorer.stats().full_scans, 1);
2304        assert_eq!(scorer.stats().incremental_scans, 0);
2305    }
2306
2307    #[test]
2308    fn incremental_prunes_on_query_extension() {
2309        let mut scorer = IncrementalScorer::new();
2310        let corpus = test_corpus();
2311
2312        // First query: "g" matches many items
2313        let r1 = scorer.score_corpus("g", &corpus, None);
2314        assert_eq!(scorer.stats().full_scans, 1);
2315
2316        // Extended query: "gi" — should use incremental path
2317        let r2 = scorer.score_corpus("gi", &corpus, None);
2318        assert_eq!(scorer.stats().incremental_scans, 1);
2319        assert!(r2.len() <= r1.len(), "Extended query should match <= items");
2320
2321        // Further extension: "git" — still incremental
2322        let r3 = scorer.score_corpus("git", &corpus, None);
2323        assert_eq!(scorer.stats().incremental_scans, 2);
2324        assert!(r3.len() <= r2.len());
2325    }
2326
2327    #[test]
2328    fn incremental_correctness_matches_full_scan() {
2329        let corpus = test_corpus();
2330
2331        // Incremental path
2332        let mut inc = IncrementalScorer::new();
2333        inc.score_corpus("g", &corpus, None);
2334        let inc_results = inc.score_corpus("git", &corpus, None);
2335
2336        // Full scan path (fresh scorer, no cache)
2337        let mut full = IncrementalScorer::new();
2338        let full_results = full.score_corpus("git", &corpus, None);
2339
2340        // Results should be identical.
2341        assert_eq!(
2342            inc_results.len(),
2343            full_results.len(),
2344            "Incremental and full scan should return same count"
2345        );
2346
2347        for (a, b) in inc_results.iter().zip(full_results.iter()) {
2348            assert_eq!(a.0, b.0, "Same corpus indices");
2349            assert!(
2350                (a.1.score - b.1.score).abs() < f64::EPSILON,
2351                "Same scores: {} vs {}",
2352                a.1.score,
2353                b.1.score
2354            );
2355        }
2356    }
2357
2358    #[test]
2359    fn incremental_falls_back_on_non_extension() {
2360        let mut scorer = IncrementalScorer::new();
2361        let corpus = test_corpus();
2362
2363        scorer.score_corpus("git", &corpus, None);
2364        assert_eq!(scorer.stats().full_scans, 1);
2365
2366        // "fi" doesn't extend "git" — must full scan
2367        scorer.score_corpus("fi", &corpus, None);
2368        assert_eq!(scorer.stats().full_scans, 2);
2369    }
2370
2371    #[test]
2372    fn incremental_invalidate_forces_full_scan() {
2373        let mut scorer = IncrementalScorer::new();
2374        let corpus = test_corpus();
2375
2376        scorer.score_corpus("g", &corpus, None);
2377        scorer.invalidate();
2378
2379        // Even though "gi" extends "g", cache was cleared
2380        scorer.score_corpus("gi", &corpus, None);
2381        assert_eq!(scorer.stats().full_scans, 2);
2382        assert_eq!(scorer.stats().incremental_scans, 0);
2383    }
2384
2385    #[test]
2386    fn incremental_generation_change_invalidates() {
2387        let mut scorer = IncrementalScorer::new();
2388        let corpus = test_corpus();
2389
2390        scorer.score_corpus("g", &corpus, Some(1));
2391
2392        // Generation changed — cache invalid
2393        scorer.score_corpus("gi", &corpus, Some(2));
2394        assert_eq!(scorer.stats().full_scans, 2);
2395    }
2396
2397    #[test]
2398    fn incremental_corpus_size_change_invalidates() {
2399        let mut scorer = IncrementalScorer::new();
2400        let corpus1 = test_corpus();
2401        let corpus2 = &corpus1[..5];
2402
2403        scorer.score_corpus("g", &corpus1, None);
2404        scorer.score_corpus("gi", corpus2, None);
2405        // Corpus size changed → full scan
2406        assert_eq!(scorer.stats().full_scans, 2);
2407    }
2408
2409    #[test]
2410    fn incremental_skips_out_of_bounds_cache_entries() {
2411        let mut scorer = IncrementalScorer::new();
2412
2413        // Simulate a cache produced for a larger corpus. The scorer should safely
2414        // skip entries that are out of bounds for the current corpus.
2415        scorer.cache = vec![
2416            CachedEntry { corpus_index: 0 },
2417            CachedEntry { corpus_index: 999 },
2418        ];
2419
2420        let corpus = ["a"];
2421        let results = scorer.score_incremental("a", "a", &corpus);
2422        assert_eq!(results.len(), 1);
2423        assert_eq!(results[0].0, 0);
2424
2425        let corpus_lower = ["a"];
2426        let results = scorer.score_incremental_lowered("a", "a", &corpus, &corpus_lower);
2427        assert_eq!(results.len(), 1);
2428        assert_eq!(results[0].0, 0);
2429
2430        let corpus_s = vec!["a".to_string()];
2431        let corpus_lower_s = vec!["a".to_string()];
2432        let word_starts = vec![vec![0]];
2433        let results = scorer.score_incremental_lowered_with_words(
2434            "a",
2435            "a",
2436            &corpus_s,
2437            &corpus_lower_s,
2438            &word_starts,
2439        );
2440        assert_eq!(results.len(), 1);
2441        assert_eq!(results[0].0, 0);
2442    }
2443
2444    #[test]
2445    fn incremental_empty_query() {
2446        let mut scorer = IncrementalScorer::new();
2447        let corpus = test_corpus();
2448
2449        let results = scorer.score_corpus("", &corpus, None);
2450        // Empty query matches everything (with weak scores)
2451        assert_eq!(results.len(), corpus.len());
2452    }
2453
2454    #[test]
2455    fn incremental_no_match_query() {
2456        let mut scorer = IncrementalScorer::new();
2457        let corpus = test_corpus();
2458
2459        let results = scorer.score_corpus("zzz", &corpus, None);
2460        assert!(results.is_empty());
2461    }
2462
2463    #[test]
2464    fn incremental_stats_prune_ratio() {
2465        let mut scorer = IncrementalScorer::new();
2466        let corpus = test_corpus();
2467
2468        scorer.score_corpus("g", &corpus, None);
2469        scorer.score_corpus("gi", &corpus, None);
2470        scorer.score_corpus("git", &corpus, None);
2471
2472        let stats = scorer.stats();
2473        assert!(
2474            stats.prune_ratio() > 0.0,
2475            "Prune ratio should be > 0 after incremental scans"
2476        );
2477        assert!(stats.total_pruned > 0, "Should have pruned some items");
2478    }
2479
2480    #[test]
2481    fn incremental_results_sorted_descending() {
2482        let mut scorer = IncrementalScorer::new();
2483        let corpus = test_corpus();
2484
2485        let results = scorer.score_corpus("o", &corpus, None);
2486        for w in results.windows(2) {
2487            let a = w[0].1.score;
2488            let b = w[1].1.score;
2489            assert!(a >= b, "Results should be sorted descending: {a} >= {b}");
2490        }
2491    }
2492
2493    #[test]
2494    fn incremental_lowered_matches_full() {
2495        let corpus = vec![
2496            "Open File".to_string(),
2497            "Save File".to_string(),
2498            "Close".to_string(),
2499            "Launch 🚀".to_string(),
2500        ];
2501        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
2502        let lower: Vec<String> = corpus.iter().map(|s| s.to_lowercase()).collect();
2503        let word_starts: Vec<Vec<usize>> = lower
2504            .iter()
2505            .map(|title_lower| {
2506                let bytes = title_lower.as_bytes();
2507                title_lower
2508                    .char_indices()
2509                    .filter_map(|(i, _)| {
2510                        let is_word_start = i == 0 || {
2511                            let prev = bytes.get(i.saturating_sub(1)).copied().unwrap_or(b' ');
2512                            prev == b' ' || prev == b'-' || prev == b'_'
2513                        };
2514                        is_word_start.then_some(i)
2515                    })
2516                    .collect()
2517            })
2518            .collect();
2519
2520        let mut full = IncrementalScorer::new();
2521        let mut lowered = IncrementalScorer::new();
2522
2523        let a = full.score_corpus("fi", &corpus_refs, None);
2524        let b =
2525            lowered.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
2526
2527        assert_eq!(a.len(), b.len());
2528        for ((ia, ra), (ib, rb)) in a.iter().zip(b.iter()) {
2529            assert_eq!(ia, ib);
2530            assert_eq!(ra.match_type, rb.match_type);
2531            assert_eq!(ra.match_positions, rb.match_positions);
2532            assert!(
2533                (ra.score - rb.score).abs() < 1e-12,
2534                "score mismatch: {} vs {}",
2535                ra.score,
2536                rb.score
2537            );
2538        }
2539    }
2540
2541    #[test]
2542    fn incremental_deterministic() {
2543        let corpus = test_corpus();
2544
2545        let mut s1 = IncrementalScorer::new();
2546        let r1 = s1.score_corpus("git", &corpus, None);
2547
2548        let mut s2 = IncrementalScorer::new();
2549        let r2 = s2.score_corpus("git", &corpus, None);
2550
2551        assert_eq!(r1.len(), r2.len());
2552        for (a, b) in r1.iter().zip(r2.iter()) {
2553            assert_eq!(a.0, b.0);
2554            assert!((a.1.score - b.1.score).abs() < f64::EPSILON);
2555        }
2556    }
2557
2558    // -----------------------------------------------------------------------
2559    // MatchType::description coverage (bd-z1c65)
2560    // -----------------------------------------------------------------------
2561
2562    #[test]
2563    fn match_type_descriptions() {
2564        assert_eq!(MatchType::Exact.description(), "exact match");
2565        assert_eq!(MatchType::Prefix.description(), "prefix match");
2566        assert_eq!(MatchType::WordStart.description(), "word-start match");
2567        assert_eq!(MatchType::Substring.description(), "contiguous substring");
2568        assert_eq!(MatchType::Fuzzy.description(), "fuzzy match");
2569        assert_eq!(MatchType::NoMatch.description(), "no match");
2570    }
2571
2572    #[test]
2573    fn match_type_no_match_prior_is_zero() {
2574        assert_eq!(MatchType::NoMatch.prior_odds(), 0.0);
2575    }
2576
2577    // -----------------------------------------------------------------------
2578    // EvidenceDescription Display variants (bd-z1c65)
2579    // -----------------------------------------------------------------------
2580
2581    #[test]
2582    fn evidence_description_static_display() {
2583        let d = EvidenceDescription::Static("test message");
2584        assert_eq!(format!("{d}"), "test message");
2585    }
2586
2587    #[test]
2588    fn evidence_description_title_length_display() {
2589        let d = EvidenceDescription::TitleLengthChars { len: 42 };
2590        assert_eq!(format!("{d}"), "title length 42 chars");
2591    }
2592
2593    #[test]
2594    fn evidence_description_first_match_pos_display() {
2595        let d = EvidenceDescription::FirstMatchPos { pos: 5 };
2596        assert_eq!(format!("{d}"), "first match at position 5");
2597    }
2598
2599    #[test]
2600    fn evidence_description_word_boundary_count_display() {
2601        let d = EvidenceDescription::WordBoundaryCount { count: 3 };
2602        assert_eq!(format!("{d}"), "3 word boundary matches");
2603    }
2604
2605    #[test]
2606    fn evidence_description_gap_total_display() {
2607        let d = EvidenceDescription::GapTotal { total: 7 };
2608        assert_eq!(format!("{d}"), "total gap of 7 characters");
2609    }
2610
2611    #[test]
2612    fn evidence_description_coverage_percent_display() {
2613        let d = EvidenceDescription::CoveragePercent { percent: 83.5 };
2614        let s = format!("{d}");
2615        assert!(s.contains("84%"), "Should round: {s}");
2616    }
2617
2618    // -----------------------------------------------------------------------
2619    // EvidenceEntry Display (bd-z1c65)
2620    // -----------------------------------------------------------------------
2621
2622    #[test]
2623    fn evidence_entry_display_supports() {
2624        let entry = EvidenceEntry {
2625            kind: EvidenceKind::Position,
2626            bayes_factor: 1.5,
2627            description: EvidenceDescription::FirstMatchPos { pos: 0 },
2628        };
2629        let s = format!("{entry}");
2630        assert!(s.contains("supports"));
2631        assert!(s.contains("Position"));
2632    }
2633
2634    #[test]
2635    fn evidence_entry_display_opposes() {
2636        let entry = EvidenceEntry {
2637            kind: EvidenceKind::GapPenalty,
2638            bayes_factor: 0.5,
2639            description: EvidenceDescription::GapTotal { total: 10 },
2640        };
2641        let s = format!("{entry}");
2642        assert!(s.contains("opposes"));
2643    }
2644
2645    #[test]
2646    fn evidence_entry_display_neutral() {
2647        let entry = EvidenceEntry {
2648            kind: EvidenceKind::TitleLength,
2649            bayes_factor: 1.0,
2650            description: EvidenceDescription::Static("neutral factor"),
2651        };
2652        let s = format!("{entry}");
2653        assert!(s.contains("neutral"));
2654    }
2655
2656    // -----------------------------------------------------------------------
2657    // EvidenceLedger direct method tests (bd-z1c65)
2658    // -----------------------------------------------------------------------
2659
2660    #[test]
2661    fn ledger_combined_bayes_factor() {
2662        let mut ledger = EvidenceLedger::new();
2663        ledger.add(
2664            EvidenceKind::Position,
2665            2.0,
2666            EvidenceDescription::Static("a"),
2667        );
2668        ledger.add(
2669            EvidenceKind::WordBoundary,
2670            3.0,
2671            EvidenceDescription::Static("b"),
2672        );
2673        assert!((ledger.combined_bayes_factor() - 6.0).abs() < f64::EPSILON);
2674    }
2675
2676    #[test]
2677    fn ledger_combined_bayes_factor_empty() {
2678        let ledger = EvidenceLedger::new();
2679        assert!((ledger.combined_bayes_factor() - 1.0).abs() < f64::EPSILON);
2680    }
2681
2682    #[test]
2683    fn ledger_prior_odds_present() {
2684        let mut ledger = EvidenceLedger::new();
2685        ledger.add(
2686            EvidenceKind::MatchType,
2687            9.0,
2688            EvidenceDescription::Static("prefix"),
2689        );
2690        assert_eq!(ledger.prior_odds(), Some(9.0));
2691    }
2692
2693    #[test]
2694    fn ledger_prior_odds_absent() {
2695        let ledger = EvidenceLedger::new();
2696        assert_eq!(ledger.prior_odds(), None);
2697    }
2698
2699    #[test]
2700    fn ledger_posterior_probability_no_prior() {
2701        let mut ledger = EvidenceLedger::new();
2702        ledger.add(
2703            EvidenceKind::Position,
2704            2.0,
2705            EvidenceDescription::Static("pos"),
2706        );
2707        // No MatchType entry → prior defaults to 1.0
2708        // posterior_odds = 1.0 * 2.0 = 2.0 → prob = 2/3
2709        let prob = ledger.posterior_probability();
2710        assert!((prob - 2.0 / 3.0).abs() < 1e-9);
2711    }
2712
2713    #[test]
2714    fn ledger_posterior_probability_infinite_odds() {
2715        let mut ledger = EvidenceLedger::new();
2716        ledger.add(
2717            EvidenceKind::MatchType,
2718            f64::INFINITY,
2719            EvidenceDescription::Static("inf"),
2720        );
2721        assert_eq!(ledger.posterior_probability(), 1.0);
2722    }
2723
2724    // -----------------------------------------------------------------------
2725    // BayesianScorer::fast() path (no evidence tracking) (bd-z1c65)
2726    // -----------------------------------------------------------------------
2727
2728    #[test]
2729    fn fast_scorer_no_evidence() {
2730        let scorer = BayesianScorer::fast();
2731        let result = scorer.score("set", "Settings");
2732        assert!(result.score > 0.0);
2733        assert!(result.evidence.entries().is_empty());
2734    }
2735
2736    #[test]
2737    fn fast_scorer_score_matches_probability_range() {
2738        let scorer = BayesianScorer::fast();
2739        let result = scorer.score("set", "Settings");
2740        assert!(result.score >= 0.0 && result.score <= 1.0);
2741    }
2742
2743    #[test]
2744    fn fast_scorer_empty_query() {
2745        let scorer = BayesianScorer::fast();
2746        let result = scorer.score("", "Test");
2747        assert!(result.score > 0.0);
2748        assert!(result.evidence.entries().is_empty());
2749    }
2750
2751    #[test]
2752    fn fast_scorer_fuzzy_with_gap_penalty() {
2753        let scorer = BayesianScorer::fast();
2754        let result = scorer.score("ace", "a...b...c...d...e");
2755        assert_eq!(result.match_type, MatchType::Fuzzy);
2756        assert!(result.score > 0.0);
2757    }
2758
2759    #[test]
2760    fn fast_scorer_tag_boost() {
2761        let scorer = BayesianScorer::fast();
2762        let without = scorer.score("set", "Settings");
2763        let with = scorer.score_with_tags("set", "Settings", &["setup"]);
2764        assert!(with.score > without.score);
2765    }
2766
2767    #[test]
2768    fn fast_scorer_tag_no_boost_at_score_one() {
2769        let scorer = BayesianScorer::fast();
2770        // score_with_tags fast path only boosts when score is in (0, 1)
2771        let result = scorer.score_with_tags("Settings", "Settings", &["settings"]);
2772        assert!(result.score <= 1.0);
2773    }
2774
2775    // -----------------------------------------------------------------------
2776    // Unicode (non-ASCII) matching paths (bd-z1c65)
2777    // -----------------------------------------------------------------------
2778
2779    #[test]
2780    fn unicode_exact_match() {
2781        let scorer = BayesianScorer::new();
2782        let result = scorer.score("日本語", "日本語");
2783        assert_eq!(result.match_type, MatchType::Exact);
2784        assert!(result.score > 0.9);
2785    }
2786
2787    #[test]
2788    fn unicode_prefix_match() {
2789        let scorer = BayesianScorer::new();
2790        let result = scorer.score("日本", "日本語テスト");
2791        assert_eq!(result.match_type, MatchType::Prefix);
2792    }
2793
2794    #[test]
2795    fn unicode_substring_match() {
2796        let scorer = BayesianScorer::new();
2797        let result = scorer.score("本語", "日本語テスト");
2798        assert_eq!(result.match_type, MatchType::Substring);
2799    }
2800
2801    #[test]
2802    fn unicode_fuzzy_match() {
2803        let scorer = BayesianScorer::new();
2804        let result = scorer.score("日テ", "日本語テスト");
2805        assert_eq!(result.match_type, MatchType::Fuzzy);
2806    }
2807
2808    #[test]
2809    fn unicode_no_match() {
2810        let scorer = BayesianScorer::new();
2811        let result = scorer.score("ωψξ", "αβγ");
2812        assert_eq!(result.match_type, MatchType::NoMatch);
2813    }
2814
2815    #[test]
2816    fn unicode_word_start_match() {
2817        let scorer = BayesianScorer::new();
2818        // Word boundaries at space/dash/underscore
2819        let result = scorer.score("gd", "go dashboard");
2820        assert_eq!(result.match_type, MatchType::WordStart);
2821    }
2822
2823    // -----------------------------------------------------------------------
2824    // score_with_query_lower / score_with_lowered_title (bd-z1c65)
2825    // -----------------------------------------------------------------------
2826
2827    #[test]
2828    fn score_with_query_lower_matches_score() {
2829        let scorer = BayesianScorer::new();
2830        let direct = scorer.score("Set", "Settings");
2831        let pre = scorer.score_with_query_lower("Set", "set", "Settings");
2832        assert!((direct.score - pre.score).abs() < f64::EPSILON);
2833        assert_eq!(direct.match_type, pre.match_type);
2834    }
2835
2836    #[test]
2837    fn score_with_lowered_title_matches_score() {
2838        let scorer = BayesianScorer::new();
2839        let direct = scorer.score("set", "Settings");
2840        let pre = scorer.score_with_lowered_title("set", "set", "Settings", "settings");
2841        assert!((direct.score - pre.score).abs() < f64::EPSILON);
2842    }
2843
2844    #[test]
2845    fn score_with_lowered_title_and_words_matches() {
2846        let scorer = BayesianScorer::new();
2847        let direct = scorer.score("gd", "Go Dashboard");
2848        let pre = scorer.score_with_lowered_title_and_words(
2849            "gd",
2850            "gd",
2851            "Go Dashboard",
2852            "go dashboard",
2853            Some(&[0, 3]),
2854        );
2855        assert_eq!(direct.match_type, pre.match_type);
2856        assert!((direct.score - pre.score).abs() < f64::EPSILON);
2857    }
2858
2859    // -----------------------------------------------------------------------
2860    // Tag matching edge cases (bd-z1c65)
2861    // -----------------------------------------------------------------------
2862
2863    #[test]
2864    fn tag_match_no_title_match_returns_nomatch() {
2865        let scorer = BayesianScorer::new();
2866        let result = scorer.score_with_tags("xyz", "Settings", &["xyz"]);
2867        // Tag matches "xyz" but title doesn't match → no boost applied
2868        assert_eq!(result.match_type, MatchType::NoMatch);
2869        assert_eq!(result.score, 0.0);
2870    }
2871
2872    #[test]
2873    fn tag_match_no_matching_tag() {
2874        let scorer = BayesianScorer::new();
2875        let without = scorer.score("set", "Settings");
2876        let with = scorer.score_with_tags("set", "Settings", &["foo", "bar"]);
2877        // No tag matches → score unchanged
2878        assert!((without.score - with.score).abs() < f64::EPSILON);
2879    }
2880
2881    #[test]
2882    fn tag_match_case_insensitive() {
2883        let scorer = BayesianScorer::new();
2884        let result = scorer.score_with_tags("set", "Settings", &["SETUP"]);
2885        let without = scorer.score("set", "Settings");
2886        assert!(result.score > without.score);
2887    }
2888
2889    // -----------------------------------------------------------------------
2890    // MatchResult::no_match() (bd-z1c65)
2891    // -----------------------------------------------------------------------
2892
2893    #[test]
2894    fn no_match_result_has_evidence() {
2895        let r = MatchResult::no_match();
2896        assert_eq!(r.score, 0.0);
2897        assert_eq!(r.match_type, MatchType::NoMatch);
2898        assert!(r.match_positions.is_empty());
2899        assert_eq!(r.evidence.entries().len(), 1);
2900        assert_eq!(r.evidence.entries()[0].kind, EvidenceKind::MatchType);
2901        assert_eq!(r.evidence.entries()[0].bayes_factor, 0.0);
2902    }
2903
2904    // -----------------------------------------------------------------------
2905    // count_word_boundaries / total_gap edge cases (bd-z1c65)
2906    // -----------------------------------------------------------------------
2907
2908    #[test]
2909    fn count_word_boundaries_at_start() {
2910        let scorer = BayesianScorer::new();
2911        // Position 0 is always a word boundary
2912        let count = scorer.count_word_boundaries(&[0], "hello");
2913        assert_eq!(count, 1);
2914    }
2915
2916    #[test]
2917    fn count_word_boundaries_after_separators() {
2918        let scorer = BayesianScorer::new();
2919        // "a-b_c d" → positions 0, 2, 4, 6 are word starts
2920        let count = scorer.count_word_boundaries(&[0, 2, 4, 6], "a-b_c d");
2921        assert_eq!(count, 4);
2922    }
2923
2924    #[test]
2925    fn count_word_boundaries_mid_word() {
2926        let scorer = BayesianScorer::new();
2927        // Position 2 in "hello" is mid-word
2928        let count = scorer.count_word_boundaries(&[2], "hello");
2929        assert_eq!(count, 0);
2930    }
2931
2932    #[test]
2933    fn total_gap_empty() {
2934        let scorer = BayesianScorer::new();
2935        assert_eq!(scorer.total_gap(&[]), 0);
2936    }
2937
2938    #[test]
2939    fn total_gap_single() {
2940        let scorer = BayesianScorer::new();
2941        assert_eq!(scorer.total_gap(&[5]), 0);
2942    }
2943
2944    #[test]
2945    fn total_gap_contiguous() {
2946        let scorer = BayesianScorer::new();
2947        // [0,1,2] → gaps are 0,0 → total 0
2948        assert_eq!(scorer.total_gap(&[0, 1, 2]), 0);
2949    }
2950
2951    #[test]
2952    fn total_gap_with_gaps() {
2953        let scorer = BayesianScorer::new();
2954        // [0, 3, 7] → gaps: (3-0-1)=2, (7-3-1)=3 → total 5
2955        assert_eq!(scorer.total_gap(&[0, 3, 7]), 5);
2956    }
2957
2958    // -----------------------------------------------------------------------
2959    // ConformalRanker edge cases (bd-z1c65)
2960    // -----------------------------------------------------------------------
2961
2962    #[test]
2963    fn conformal_custom_thresholds() {
2964        let ranker = ConformalRanker {
2965            tie_epsilon: 0.1,
2966            stable_threshold: 0.9,
2967            marginal_threshold: 0.5,
2968        };
2969        // With tie_epsilon=0.1, scores within 0.1 are ties
2970        let mut r1 = MatchResult::no_match();
2971        r1.score = 0.5;
2972        r1.match_type = MatchType::Prefix;
2973        let mut r2 = MatchResult::no_match();
2974        r2.score = 0.45;
2975        r2.match_type = MatchType::Prefix;
2976
2977        let ranked = ranker.rank(vec![r1, r2]);
2978        assert_eq!(
2979            ranked.items[0].rank_confidence.stability,
2980            RankStability::Unstable,
2981            "Gap 0.05 < epsilon 0.1 → tie"
2982        );
2983    }
2984
2985    #[test]
2986    fn conformal_rank_top_k_larger_than_count() {
2987        let scorer = BayesianScorer::new();
2988        let results = vec![scorer.score("set", "Settings")];
2989        let ranker = ConformalRanker::new();
2990        let ranked = ranker.rank_top_k(results, 100);
2991        assert_eq!(ranked.items.len(), 1);
2992    }
2993
2994    #[test]
2995    fn conformal_rank_top_k_zero() {
2996        let scorer = BayesianScorer::new();
2997        let results = vec![
2998            scorer.score("set", "Settings"),
2999            scorer.score("set", "Asset"),
3000        ];
3001        let ranker = ConformalRanker::new();
3002        let ranked = ranker.rank_top_k(results, 0);
3003        assert!(ranked.items.is_empty());
3004        assert_eq!(ranked.summary.stable_count, 0);
3005    }
3006
3007    #[test]
3008    fn conformal_rank_confidence_display_marginal() {
3009        let rc = RankConfidence {
3010            confidence: 0.5,
3011            gap_to_next: 0.05,
3012            stability: RankStability::Marginal,
3013        };
3014        let s = format!("{rc}");
3015        assert!(s.contains("marginal"));
3016    }
3017
3018    #[test]
3019    fn conformal_rank_confidence_display_unstable() {
3020        let rc = RankConfidence {
3021            confidence: 0.1,
3022            gap_to_next: 0.001,
3023            stability: RankStability::Unstable,
3024        };
3025        let s = format!("{rc}");
3026        assert!(s.contains("unstable"));
3027    }
3028
3029    #[test]
3030    fn conformal_median_gap_even_count() {
3031        // 4 items → 3 gaps → odd, but let's use 5 items → 4 gaps → even
3032        let mut results = Vec::new();
3033        for &s in &[0.9, 0.7, 0.5, 0.3, 0.1] {
3034            let mut r = MatchResult::no_match();
3035            r.score = s;
3036            r.match_type = MatchType::Prefix;
3037            results.push(r);
3038        }
3039        let ranker = ConformalRanker::new();
3040        let ranked = ranker.rank(results);
3041        // 4 gaps, all 0.2 → median = 0.2
3042        assert!(
3043            (ranked.summary.median_gap - 0.2).abs() < 0.01,
3044            "median_gap={}, expected ~0.2",
3045            ranked.summary.median_gap
3046        );
3047    }
3048
3049    // -----------------------------------------------------------------------
3050    // IncrementalScorer additional coverage (bd-z1c65)
3051    // -----------------------------------------------------------------------
3052
3053    #[test]
3054    fn incremental_with_scorer_uses_provided_scorer() {
3055        let scorer = BayesianScorer::new(); // evidence tracking ON
3056        let mut inc = IncrementalScorer::with_scorer(scorer);
3057        let corpus = test_corpus();
3058        let results = inc.score_corpus("git", &corpus, None);
3059        assert!(!results.is_empty());
3060        // Evidence tracking is on, so evidence should be populated
3061        assert!(!results[0].1.evidence.entries().is_empty());
3062    }
3063
3064    #[test]
3065    fn incremental_default_trait() {
3066        let inc = IncrementalScorer::default();
3067        assert_eq!(inc.stats().full_scans, 0);
3068        assert_eq!(inc.stats().incremental_scans, 0);
3069    }
3070
3071    #[test]
3072    fn incremental_stats_prune_ratio_zero_when_empty() {
3073        let stats = IncrementalStats::default();
3074        assert_eq!(stats.prune_ratio(), 0.0);
3075    }
3076
3077    #[test]
3078    fn incremental_score_corpus_with_lowered() {
3079        let corpus = vec!["Open File", "Save File", "Close Tab"];
3080        let lower = vec!["open file", "save file", "close tab"];
3081        let mut inc = IncrementalScorer::new();
3082        let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3083        assert!(!results.is_empty());
3084        // All results should reference valid corpus indices
3085        for (idx, _) in &results {
3086            assert!(*idx < corpus.len());
3087        }
3088    }
3089
3090    #[test]
3091    fn incremental_score_corpus_with_lowered_incremental_path() {
3092        let corpus = vec!["Open File", "Save File", "Close Tab"];
3093        let lower = vec!["open file", "save file", "close tab"];
3094        let mut inc = IncrementalScorer::new();
3095        inc.score_corpus_with_lowered("f", &corpus, &lower, None);
3096        let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
3097        assert_eq!(inc.stats().incremental_scans, 1);
3098        assert!(!results.is_empty());
3099    }
3100
3101    #[test]
3102    fn incremental_score_corpus_with_lowered_and_words() {
3103        let corpus = vec![
3104            "Open File".to_string(),
3105            "Save File".to_string(),
3106            "Close Tab".to_string(),
3107        ];
3108        let lower = vec![
3109            "open file".to_string(),
3110            "save file".to_string(),
3111            "close tab".to_string(),
3112        ];
3113        let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3114
3115        let mut inc = IncrementalScorer::new();
3116        let results =
3117            inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3118        assert!(!results.is_empty());
3119    }
3120
3121    #[test]
3122    fn incremental_score_corpus_with_lowered_and_words_incremental_path() {
3123        let corpus = vec![
3124            "Open File".to_string(),
3125            "Save File".to_string(),
3126            "Close Tab".to_string(),
3127        ];
3128        let lower = vec![
3129            "open file".to_string(),
3130            "save file".to_string(),
3131            "close tab".to_string(),
3132        ];
3133        let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
3134
3135        let mut inc = IncrementalScorer::new();
3136        inc.score_corpus_with_lowered_and_words("f", &corpus, &lower, &word_starts, None);
3137        let results =
3138            inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
3139        assert_eq!(inc.stats().incremental_scans, 1);
3140        assert!(!results.is_empty());
3141    }
3142
3143    // -----------------------------------------------------------------------
3144    // BayesianScorer::Default (bd-z1c65)
3145    // -----------------------------------------------------------------------
3146
3147    #[test]
3148    fn bayesian_scorer_default_has_evidence_off() {
3149        let scorer = BayesianScorer::default();
3150        assert!(!scorer.track_evidence);
3151    }
3152
3153    // -----------------------------------------------------------------------
3154    // Word boundary separator: hyphen and underscore (bd-z1c65)
3155    // -----------------------------------------------------------------------
3156
3157    #[test]
3158    fn word_start_match_with_hyphens() {
3159        let scorer = BayesianScorer::new();
3160        let result = scorer.score("fc", "file-commander");
3161        assert_eq!(result.match_type, MatchType::WordStart);
3162        assert_eq!(result.match_positions, vec![0, 5]);
3163    }
3164
3165    #[test]
3166    fn word_start_match_with_underscores() {
3167        let scorer = BayesianScorer::new();
3168        let result = scorer.score("fc", "file_commander");
3169        assert_eq!(result.match_type, MatchType::WordStart);
3170        assert_eq!(result.match_positions, vec![0, 5]);
3171    }
3172
3173    // -----------------------------------------------------------------------
3174    // MatchType Ord derivation (bd-z1c65)
3175    // -----------------------------------------------------------------------
3176
3177    #[test]
3178    fn match_type_ord() {
3179        let mut types = vec![
3180            MatchType::Exact,
3181            MatchType::NoMatch,
3182            MatchType::Fuzzy,
3183            MatchType::Prefix,
3184            MatchType::Substring,
3185            MatchType::WordStart,
3186        ];
3187        types.sort();
3188        assert_eq!(
3189            types,
3190            vec![
3191                MatchType::NoMatch,
3192                MatchType::Fuzzy,
3193                MatchType::Substring,
3194                MatchType::WordStart,
3195                MatchType::Prefix,
3196                MatchType::Exact,
3197            ]
3198        );
3199    }
3200
3201    // -----------------------------------------------------------------------
3202    // EvidenceKind equality (bd-z1c65)
3203    // -----------------------------------------------------------------------
3204
3205    #[test]
3206    fn evidence_kind_equality() {
3207        assert_eq!(EvidenceKind::MatchType, EvidenceKind::MatchType);
3208        assert_ne!(EvidenceKind::Position, EvidenceKind::GapPenalty);
3209    }
3210
3211    // -----------------------------------------------------------------------
3212    // Scoring position bonus (bd-z1c65)
3213    // -----------------------------------------------------------------------
3214
3215    #[test]
3216    fn earlier_substring_scores_higher() {
3217        let scorer = BayesianScorer::new();
3218        let early = scorer.score("set", "set in stone");
3219        let late = scorer.score("set", "the asset");
3220        // "set" at position 0 vs "set" at position 5
3221        assert!(
3222            early.score > late.score,
3223            "Earlier match should score higher: {} vs {}",
3224            early.score,
3225            late.score
3226        );
3227    }
3228}
3229
3230// ===========================================================================
3231// Performance regression tests (bd-39y4.5)
3232// ===========================================================================
3233
3234#[cfg(test)]
3235mod perf_tests {
3236    use super::*;
3237    use std::time::Instant;
3238
3239    #[derive(Debug, Clone, Copy)]
3240    struct PerfStats {
3241        p50_us: u64,
3242        p95_us: u64,
3243        p99_us: u64,
3244        mean_us: f64,
3245        variance_us: f64,
3246        samples: usize,
3247    }
3248
3249    /// Budget thresholds for single-query scoring.
3250    /// These are generous to avoid CI flakes but catch >2x regressions.
3251    const SINGLE_SCORE_BUDGET_US: u64 = 10; // 10µs per score call
3252    const CORPUS_100_BUDGET_US: u64 = 500; // 500µs for 100-item full scan
3253    const CORPUS_1000_BUDGET_US: u64 = 5_000; // 5ms for 1000-item full scan
3254    const CORPUS_5000_BUDGET_US: u64 = 25_000; // 25ms for 5000-item full scan
3255    const INCREMENTAL_7KEY_100_BUDGET_US: u64 = 5_000; // 5ms for 7 keystrokes on 100 items (relaxed from 2ms for CI stability)
3256    const INCREMENTAL_7KEY_1000_BUDGET_US: u64 = 15_000; // 15ms for 7 keystrokes on 1000 items
3257
3258    const COVERAGE_BUDGET_MULTIPLIER: u64 = 5;
3259
3260    fn is_coverage_run() -> bool {
3261        std::env::var("LLVM_PROFILE_FILE").is_ok() || std::env::var("CARGO_LLVM_COV").is_ok()
3262    }
3263
3264    fn coverage_budget_us_with_mode(base: u64, coverage_run: bool) -> u64 {
3265        // Coverage instrumentation adds substantial overhead (and noise) to these
3266        // microbench-style regression tests, so we relax budgets enough to keep
3267        // `cargo llvm-cov` stable while still logging the timings.
3268        if coverage_run {
3269            base.saturating_mul(COVERAGE_BUDGET_MULTIPLIER)
3270        } else {
3271            base
3272        }
3273    }
3274
3275    fn coverage_budget_us(base: u64) -> u64 {
3276        coverage_budget_us_with_mode(base, is_coverage_run())
3277    }
3278
3279    #[test]
3280    fn coverage_budget_us_with_mode_respects_flag() {
3281        assert_eq!(coverage_budget_us_with_mode(123, false), 123);
3282        assert_eq!(
3283            coverage_budget_us_with_mode(123, true),
3284            123 * COVERAGE_BUDGET_MULTIPLIER
3285        );
3286    }
3287
3288    /// Generate a command corpus of the specified size with realistic variety.
3289    fn make_corpus(size: usize) -> Vec<String> {
3290        let base = [
3291            "Open File",
3292            "Save File",
3293            "Close Tab",
3294            "Split Editor Right",
3295            "Split Editor Down",
3296            "Toggle Terminal",
3297            "Go to Line",
3298            "Find in Files",
3299            "Replace in Files",
3300            "Git: Commit",
3301            "Git: Push",
3302            "Git: Pull",
3303            "Debug: Start",
3304            "Debug: Stop",
3305            "Debug: Step Over",
3306            "Format Document",
3307            "Rename Symbol",
3308            "Go to Definition",
3309            "Find All References",
3310            "Toggle Sidebar",
3311        ];
3312        base.iter()
3313            .cycle()
3314            .take(size)
3315            .enumerate()
3316            .map(|(i, cmd)| {
3317                if i < base.len() {
3318                    (*cmd).to_string()
3319                } else {
3320                    format!("{} ({})", cmd, i)
3321                }
3322            })
3323            .collect()
3324    }
3325
3326    fn measure_stats_us(iterations: usize, mut f: impl FnMut()) -> PerfStats {
3327        let mut times = Vec::with_capacity(iterations);
3328        // Warmup
3329        for _ in 0..3 {
3330            f();
3331        }
3332        for _ in 0..iterations {
3333            let start = Instant::now();
3334            f();
3335            times.push(start.elapsed().as_micros() as u64);
3336        }
3337        times.sort_unstable();
3338        let len = times.len();
3339        let p50 = times[len / 2];
3340        let p95 = times[((len as f64 * 0.95) as usize).min(len.saturating_sub(1))];
3341        let p99 = times[((len as f64 * 0.99) as usize).min(len.saturating_sub(1))];
3342        let mean = times.iter().copied().map(|value| value as f64).sum::<f64>() / len as f64;
3343        let variance = times
3344            .iter()
3345            .map(|value| {
3346                let delta = *value as f64 - mean;
3347                delta * delta
3348            })
3349            .sum::<f64>()
3350            / len as f64;
3351
3352        PerfStats {
3353            p50_us: p50,
3354            p95_us: p95,
3355            p99_us: p99,
3356            mean_us: mean,
3357            variance_us: variance,
3358            samples: len,
3359        }
3360    }
3361
3362    #[test]
3363    fn perf_single_score_under_budget() {
3364        let scorer = BayesianScorer::fast();
3365        let stats = measure_stats_us(200, || {
3366            std::hint::black_box(scorer.score("git co", "Git: Commit"));
3367        });
3368        eprintln!(
3369            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_score_single\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3370            stats.samples,
3371            stats.p50_us,
3372            stats.p95_us,
3373            stats.p99_us,
3374            stats.mean_us,
3375            stats.variance_us
3376        );
3377        let budget = coverage_budget_us(SINGLE_SCORE_BUDGET_US);
3378        assert!(
3379            stats.p50_us <= budget,
3380            "Single score p50 = {}µs exceeds budget {}µs",
3381            stats.p50_us,
3382            budget,
3383        );
3384    }
3385
3386    #[test]
3387    fn perf_corpus_100_under_budget() {
3388        let scorer = BayesianScorer::fast();
3389        let corpus = make_corpus(100);
3390        let stats = measure_stats_us(50, || {
3391            let mut results: Vec<_> = corpus
3392                .iter()
3393                .map(|t| scorer.score("git co", t))
3394                .filter(|r| r.score > 0.0)
3395                .collect();
3396            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3397            std::hint::black_box(&results);
3398        });
3399        eprintln!(
3400            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3401            stats.samples,
3402            stats.p50_us,
3403            stats.p95_us,
3404            stats.p99_us,
3405            stats.mean_us,
3406            stats.variance_us
3407        );
3408        let budget = coverage_budget_us(CORPUS_100_BUDGET_US);
3409        assert!(
3410            stats.p95_us <= budget,
3411            "100-item corpus p95 = {}µs exceeds budget {}µs",
3412            stats.p95_us,
3413            budget,
3414        );
3415    }
3416
3417    #[test]
3418    fn perf_corpus_1000_under_budget() {
3419        let scorer = BayesianScorer::fast();
3420        let corpus = make_corpus(1_000);
3421        let stats = measure_stats_us(20, || {
3422            let mut results: Vec<_> = corpus
3423                .iter()
3424                .map(|t| scorer.score("git co", t))
3425                .filter(|r| r.score > 0.0)
3426                .collect();
3427            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3428            std::hint::black_box(&results);
3429        });
3430        eprintln!(
3431            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3432            stats.samples,
3433            stats.p50_us,
3434            stats.p95_us,
3435            stats.p99_us,
3436            stats.mean_us,
3437            stats.variance_us
3438        );
3439        let budget = coverage_budget_us(CORPUS_1000_BUDGET_US);
3440        assert!(
3441            stats.p95_us <= budget,
3442            "1000-item corpus p95 = {}µs exceeds budget {}µs",
3443            stats.p95_us,
3444            budget,
3445        );
3446    }
3447
3448    #[test]
3449    fn perf_corpus_5000_under_budget() {
3450        let scorer = BayesianScorer::fast();
3451        let corpus = make_corpus(5_000);
3452        let stats = measure_stats_us(10, || {
3453            let mut results: Vec<_> = corpus
3454                .iter()
3455                .map(|t| scorer.score("git co", t))
3456                .filter(|r| r.score > 0.0)
3457                .collect();
3458            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3459            std::hint::black_box(&results);
3460        });
3461        eprintln!(
3462            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_5000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3463            stats.samples,
3464            stats.p50_us,
3465            stats.p95_us,
3466            stats.p99_us,
3467            stats.mean_us,
3468            stats.variance_us
3469        );
3470        let budget = coverage_budget_us(CORPUS_5000_BUDGET_US);
3471        assert!(
3472            stats.p95_us <= budget,
3473            "5000-item corpus p95 = {}µs exceeds budget {}µs",
3474            stats.p95_us,
3475            budget,
3476        );
3477    }
3478
3479    #[test]
3480    fn perf_incremental_7key_100_under_budget() {
3481        let corpus = make_corpus(100);
3482        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3483        let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3484
3485        let stats = measure_stats_us(30, || {
3486            let mut inc = IncrementalScorer::new();
3487            for query in &queries {
3488                let results = inc.score_corpus(query, &corpus_refs, None);
3489                std::hint::black_box(&results);
3490            }
3491        });
3492        eprintln!(
3493            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3494            stats.samples,
3495            stats.p50_us,
3496            stats.p95_us,
3497            stats.p99_us,
3498            stats.mean_us,
3499            stats.variance_us
3500        );
3501        let budget = coverage_budget_us(INCREMENTAL_7KEY_100_BUDGET_US);
3502        assert!(
3503            stats.p95_us <= budget,
3504            "Incremental 7-key 100-item p95 = {}µs exceeds budget {}µs",
3505            stats.p95_us,
3506            budget,
3507        );
3508    }
3509
3510    #[test]
3511    fn perf_incremental_7key_1000_under_budget() {
3512        let corpus = make_corpus(1_000);
3513        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3514        let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3515
3516        let stats = measure_stats_us(10, || {
3517            let mut inc = IncrementalScorer::new();
3518            for query in &queries {
3519                let results = inc.score_corpus(query, &corpus_refs, None);
3520                std::hint::black_box(&results);
3521            }
3522        });
3523        eprintln!(
3524            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
3525            stats.samples,
3526            stats.p50_us,
3527            stats.p95_us,
3528            stats.p99_us,
3529            stats.mean_us,
3530            stats.variance_us
3531        );
3532        let budget = coverage_budget_us(INCREMENTAL_7KEY_1000_BUDGET_US);
3533        assert!(
3534            stats.p95_us <= budget,
3535            "Incremental 7-key 1000-item p95 = {}µs exceeds budget {}µs",
3536            stats.p95_us,
3537            budget,
3538        );
3539    }
3540
3541    #[test]
3542    fn perf_incremental_faster_than_naive() {
3543        let corpus = make_corpus(100);
3544        let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
3545        let scorer = BayesianScorer::fast();
3546        let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
3547
3548        let naive_stats = measure_stats_us(30, || {
3549            for query in &queries {
3550                let mut results: Vec<_> = corpus
3551                    .iter()
3552                    .map(|t| scorer.score(query, t))
3553                    .filter(|r| r.score > 0.0)
3554                    .collect();
3555                results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3556                std::hint::black_box(&results);
3557            }
3558        });
3559
3560        let inc_stats = measure_stats_us(30, || {
3561            let mut inc = IncrementalScorer::new();
3562            for query in &queries {
3563                let results = inc.score_corpus(query, &corpus_refs, None);
3564                std::hint::black_box(&results);
3565            }
3566        });
3567        eprintln!(
3568            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_vs_naive\",\"samples\":{},\"naive_p50_us\":{},\"naive_p95_us\":{},\"inc_p50_us\":{},\"inc_p95_us\":{}}}",
3569            naive_stats.samples,
3570            naive_stats.p50_us,
3571            naive_stats.p95_us,
3572            inc_stats.p50_us,
3573            inc_stats.p95_us
3574        );
3575
3576        // Incremental should not be more than 2x slower than naive
3577        // (in practice it's faster, but we set a relaxed threshold)
3578        assert!(
3579            inc_stats.p50_us <= naive_stats.p50_us * 2 + 50, // +50µs tolerance for measurement noise
3580            "Incremental p50 = {}µs is >2x slower than naive p50 = {}µs",
3581            inc_stats.p50_us,
3582            naive_stats.p50_us,
3583        );
3584    }
3585
3586    fn scaling_ratio_us(p50_small_us: u64, p50_large_us: u64) -> f64 {
3587        if p50_small_us > 0 {
3588            p50_large_us as f64 / p50_small_us as f64
3589        } else {
3590            0.0
3591        }
3592    }
3593
3594    #[test]
3595    fn scaling_ratio_handles_zero_divisor() {
3596        assert_eq!(scaling_ratio_us(0, 123), 0.0);
3597        assert_eq!(scaling_ratio_us(10, 25), 2.5);
3598    }
3599
3600    #[test]
3601    fn perf_scaling_sublinear() {
3602        let scorer = BayesianScorer::fast();
3603        let corpus_100 = make_corpus(100);
3604        let corpus_1000 = make_corpus(1_000);
3605
3606        let stats_100 = measure_stats_us(30, || {
3607            let mut results: Vec<_> = corpus_100
3608                .iter()
3609                .map(|t| scorer.score("git", t))
3610                .filter(|r| r.score > 0.0)
3611                .collect();
3612            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3613            std::hint::black_box(&results);
3614        });
3615
3616        let stats_1000 = measure_stats_us(20, || {
3617            let mut results: Vec<_> = corpus_1000
3618                .iter()
3619                .map(|t| scorer.score("git", t))
3620                .filter(|r| r.score > 0.0)
3621                .collect();
3622            results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
3623            std::hint::black_box(&results);
3624        });
3625        eprintln!(
3626            "{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_scaling\",\"samples_100\":{},\"p50_100_us\":{},\"samples_1000\":{},\"p50_1000_us\":{}}}",
3627            stats_100.samples, stats_100.p50_us, stats_1000.samples, stats_1000.p50_us
3628        );
3629
3630        // 10x corpus ⇒ O(n log n) theoretical ratio ≈ 15x.  Use 25x to absorb
3631        // system noise when tests run in parallel (still catches O(n²) = 100x).
3632        let ratio = scaling_ratio_us(stats_100.p50_us, stats_1000.p50_us);
3633        assert!(
3634            ratio < 25.0,
3635            "1000/100 scaling ratio = {:.1}x exceeds 25x threshold (100: {}µs, 1000: {}µs)",
3636            ratio,
3637            stats_100.p50_us,
3638            stats_1000.p50_us,
3639        );
3640    }
3641}