#![forbid(unsafe_code)]
use std::cmp::Ordering;
use std::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum MatchType {
NoMatch,
Fuzzy,
Substring,
WordStart,
Prefix,
Exact,
}
impl MatchType {
pub fn prior_odds(self) -> f64 {
match self {
Self::Exact => 99.0, Self::Prefix => 9.0, Self::WordStart => 4.0, Self::Substring => 2.0, Self::Fuzzy => 0.333, Self::NoMatch => 0.0, }
}
pub fn description(self) -> &'static str {
match self {
Self::Exact => "exact match",
Self::Prefix => "prefix match",
Self::WordStart => "word-start match",
Self::Substring => "contiguous substring",
Self::Fuzzy => "fuzzy match",
Self::NoMatch => "no match",
}
}
}
fn compare_ranked_match_results(
left: &(usize, MatchResult),
right: &(usize, MatchResult),
) -> Ordering {
right
.1
.score
.partial_cmp(&left.1.score)
.unwrap_or(Ordering::Equal)
.then_with(|| right.1.match_type.cmp(&left.1.match_type))
}
#[derive(Debug, Clone)]
pub struct EvidenceEntry {
pub kind: EvidenceKind,
pub bayes_factor: f64,
pub description: EvidenceDescription,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EvidenceKind {
MatchType,
WordBoundary,
Position,
GapPenalty,
TagMatch,
TitleLength,
}
impl fmt::Display for EvidenceEntry {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let direction = if self.bayes_factor > 1.0 {
"supports"
} else if self.bayes_factor < 1.0 {
"opposes"
} else {
"neutral"
};
write!(
f,
"{:?}: BF={:.2} ({}) - {}",
self.kind, self.bayes_factor, direction, self.description
)
}
}
#[derive(Debug, Clone)]
pub enum EvidenceDescription {
Static(&'static str),
TitleLengthChars { len: usize },
FirstMatchPos { pos: usize },
WordBoundaryCount { count: usize },
GapTotal { total: usize },
CoveragePercent { percent: f64 },
}
impl fmt::Display for EvidenceDescription {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Static(msg) => write!(f, "{msg}"),
Self::TitleLengthChars { len } => write!(f, "title length {} chars", len),
Self::FirstMatchPos { pos } => write!(f, "first match at position {}", pos),
Self::WordBoundaryCount { count } => write!(f, "{} word boundary matches", count),
Self::GapTotal { total } => write!(f, "total gap of {} characters", total),
Self::CoveragePercent { percent } => write!(f, "query covers {:.0}% of title", percent),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct EvidenceLedger {
entries: Vec<EvidenceEntry>,
}
impl EvidenceLedger {
pub fn new() -> Self {
Self::default()
}
pub fn add(&mut self, kind: EvidenceKind, bayes_factor: f64, description: EvidenceDescription) {
self.entries.push(EvidenceEntry {
kind,
bayes_factor,
description,
});
}
pub fn entries(&self) -> &[EvidenceEntry] {
&self.entries
}
pub fn combined_bayes_factor(&self) -> f64 {
self.entries.iter().map(|e| e.bayes_factor).product()
}
#[must_use = "use the prior odds (if any) for diagnostics"]
pub fn prior_odds(&self) -> Option<f64> {
self.entries
.iter()
.find(|e| e.kind == EvidenceKind::MatchType)
.map(|e| e.bayes_factor)
}
pub fn posterior_probability(&self) -> f64 {
let prior = self.prior_odds().unwrap_or(1.0);
let bf: f64 = self
.entries
.iter()
.filter(|e| e.kind != EvidenceKind::MatchType)
.map(|e| e.bayes_factor)
.product();
let posterior_odds = prior * bf;
if posterior_odds.is_infinite() {
1.0
} else {
posterior_odds / (1.0 + posterior_odds)
}
}
}
impl EvidenceLedger {
#[must_use]
pub fn to_jsonl(&self) -> String {
let entries_json: Vec<String> = self
.entries
.iter()
.map(|e| {
format!(
r#"{{"kind":"{:?}","bf":{:.4},"desc":"{}"}}"#,
e.kind, e.bayes_factor, e.description
)
})
.collect();
format!(
r#"{{"schema":"palette-scoring-v1","entries":[{}],"combined_bf":{:.6},"posterior_prob":{:.6}}}"#,
entries_json.join(","),
self.combined_bayes_factor(),
self.posterior_probability(),
)
}
}
impl fmt::Display for EvidenceLedger {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "Evidence Ledger:")?;
for entry in &self.entries {
writeln!(f, " {}", entry)?;
}
writeln!(f, " Combined BF: {:.3}", self.combined_bayes_factor())?;
writeln!(f, " Posterior P: {:.3}", self.posterior_probability())?;
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct MatchResult {
pub score: f64,
pub match_type: MatchType,
pub match_positions: Vec<usize>,
pub evidence: EvidenceLedger,
}
impl MatchResult {
pub fn no_match() -> Self {
let mut evidence = EvidenceLedger::new();
evidence.add(
EvidenceKind::MatchType,
0.0,
EvidenceDescription::Static("no matching characters found"),
);
Self {
score: 0.0,
match_type: MatchType::NoMatch,
match_positions: Vec::new(),
evidence,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct BayesianScorer {
pub track_evidence: bool,
}
impl BayesianScorer {
pub fn new() -> Self {
Self {
track_evidence: true,
}
}
pub fn fast() -> Self {
Self {
track_evidence: false,
}
}
pub fn score(&self, query: &str, title: &str) -> MatchResult {
if query.len() > title.len() {
return MatchResult::no_match();
}
if query.is_empty() {
return self.score_empty_query(title);
}
let query_lower = query.to_lowercase();
let title_lower = title.to_lowercase();
let (match_type, positions) = self.detect_match_type(&query_lower, &title_lower, title);
if match_type == MatchType::NoMatch {
return MatchResult::no_match();
}
self.compute_score(match_type, positions, &query_lower, title)
}
pub fn score_with_query_lower(
&self,
query: &str,
query_lower: &str,
title: &str,
) -> MatchResult {
let title_lower = title.to_lowercase();
self.score_with_lowered_title(query, query_lower, title, &title_lower)
}
pub fn score_with_lowered_title(
&self,
query: &str,
query_lower: &str,
title: &str,
title_lower: &str,
) -> MatchResult {
self.score_with_lowered_title_and_words(query, query_lower, title, title_lower, None)
}
pub fn score_with_lowered_title_and_words(
&self,
query: &str,
query_lower: &str,
title: &str,
title_lower: &str,
word_starts: Option<&[usize]>,
) -> MatchResult {
if query.len() > title.len() {
return MatchResult::no_match();
}
if query.is_empty() {
return self.score_empty_query(title);
}
let (match_type, positions) =
self.detect_match_type_with_words(query_lower, title_lower, title, word_starts);
if match_type == MatchType::NoMatch {
return MatchResult::no_match();
}
self.compute_score(match_type, positions, query_lower, title)
}
pub fn score_with_tags(&self, query: &str, title: &str, tags: &[&str]) -> MatchResult {
let mut result = self.score(query, title);
let query_lower = query.to_lowercase();
let tag_match = tags
.iter()
.any(|tag| crate::contains_ignore_case(tag, &query_lower));
if tag_match && result.match_type != MatchType::NoMatch {
if self.track_evidence {
result.evidence.add(
EvidenceKind::TagMatch,
3.0, EvidenceDescription::Static("query matches tag"),
);
result.score = result.evidence.posterior_probability();
} else if (0.0..1.0).contains(&result.score) {
let odds = result.score / (1.0 - result.score);
let boosted = odds * 3.0;
result.score = boosted / (1.0 + boosted);
}
}
result
}
fn score_empty_query(&self, title: &str) -> MatchResult {
let length_factor = 1.0 + (1.0 / (title.len() as f64 + 1.0)) * 0.1;
if self.track_evidence {
let mut evidence = EvidenceLedger::new();
evidence.add(
EvidenceKind::MatchType,
1.0, EvidenceDescription::Static("empty query matches all"),
);
evidence.add(
EvidenceKind::TitleLength,
length_factor,
EvidenceDescription::TitleLengthChars { len: title.len() },
);
let score = evidence.posterior_probability();
MatchResult {
score,
match_type: MatchType::Fuzzy, match_positions: Vec::new(),
evidence,
}
} else {
let odds = length_factor;
let score = odds / (1.0 + odds);
MatchResult {
score,
match_type: MatchType::Fuzzy,
match_positions: Vec::new(),
evidence: EvidenceLedger::new(),
}
}
}
fn detect_match_type(
&self,
query_lower: &str,
title_lower: &str,
title: &str,
) -> (MatchType, Vec<usize>) {
self.detect_match_type_with_words(query_lower, title_lower, title, None)
}
fn detect_match_type_with_words(
&self,
query_lower: &str,
title_lower: &str,
title: &str,
word_starts: Option<&[usize]>,
) -> (MatchType, Vec<usize>) {
if query_lower.is_ascii() && title_lower.is_ascii() {
return self.detect_match_type_ascii(query_lower, title_lower, word_starts);
}
if query_lower == title_lower {
let positions: Vec<usize> = (0..title.chars().count()).collect();
return (MatchType::Exact, positions);
}
if title_lower.starts_with(query_lower) {
let positions: Vec<usize> = (0..query_lower.chars().count()).collect();
return (MatchType::Prefix, positions);
}
if let Some(positions) = self.word_start_match(query_lower, title_lower) {
return (MatchType::WordStart, positions);
}
if let Some(byte_start) = title_lower.find(query_lower) {
let char_start = title_lower[..byte_start].chars().count();
let char_len = query_lower.chars().count();
let positions: Vec<usize> = (char_start..char_start + char_len).collect();
return (MatchType::Substring, positions);
}
if let Some(positions) = self.fuzzy_match(query_lower, title_lower) {
return (MatchType::Fuzzy, positions);
}
(MatchType::NoMatch, Vec::new())
}
fn detect_match_type_ascii(
&self,
query_lower: &str,
title_lower: &str,
word_starts: Option<&[usize]>,
) -> (MatchType, Vec<usize>) {
let query_bytes = query_lower.as_bytes();
let title_bytes = title_lower.as_bytes();
if query_bytes == title_bytes {
let positions: Vec<usize> = (0..title_bytes.len()).collect();
return (MatchType::Exact, positions);
}
if title_bytes.starts_with(query_bytes) {
let positions: Vec<usize> = (0..query_bytes.len()).collect();
return (MatchType::Prefix, positions);
}
if let Some(positions) = self.word_start_match_ascii(query_bytes, title_bytes, word_starts)
{
return (MatchType::WordStart, positions);
}
if let Some(start) = title_lower.find(query_lower) {
let positions: Vec<usize> = (start..start + query_bytes.len()).collect();
return (MatchType::Substring, positions);
}
if let Some(positions) = self.fuzzy_match_ascii(query_bytes, title_bytes) {
return (MatchType::Fuzzy, positions);
}
(MatchType::NoMatch, Vec::new())
}
fn word_start_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
let mut positions = Vec::new();
let mut query_chars = query.chars().peekable();
let title_bytes = title.as_bytes();
for (char_index, (i, c)) in title.char_indices().enumerate() {
let is_word_start = i == 0 || {
let prev = title_bytes
.get(i.saturating_sub(1))
.copied()
.unwrap_or(b' ');
prev == b' ' || prev == b'-' || prev == b'_'
};
if is_word_start
&& let Some(&qc) = query_chars.peek()
&& c == qc
{
positions.push(char_index);
query_chars.next();
}
}
if query_chars.peek().is_none() {
Some(positions)
} else {
None
}
}
fn word_start_match_ascii(
&self,
query: &[u8],
title: &[u8],
word_starts: Option<&[usize]>,
) -> Option<Vec<usize>> {
let mut positions = Vec::new();
let mut query_idx = 0;
if query.is_empty() {
return Some(positions);
}
if let Some(starts) = word_starts {
for &pos in starts {
if pos >= title.len() {
continue;
}
if title[pos] == query[query_idx] {
positions.push(pos);
query_idx += 1;
if query_idx == query.len() {
return Some(positions);
}
}
}
} else {
for (i, &b) in title.iter().enumerate() {
let is_word_start = i == 0 || matches!(title[i - 1], b' ' | b'-' | b'_');
if is_word_start && b == query[query_idx] {
positions.push(i);
query_idx += 1;
if query_idx == query.len() {
return Some(positions);
}
}
}
}
None
}
fn fuzzy_match(&self, query: &str, title: &str) -> Option<Vec<usize>> {
let mut positions = Vec::new();
let mut query_chars = query.chars().peekable();
for (char_index, c) in title.chars().enumerate() {
if let Some(&qc) = query_chars.peek()
&& c == qc
{
positions.push(char_index);
query_chars.next();
}
}
if query_chars.peek().is_none() {
Some(positions)
} else {
None
}
}
fn fuzzy_match_ascii(&self, query: &[u8], title: &[u8]) -> Option<Vec<usize>> {
let mut positions = Vec::new();
let mut query_idx = 0;
if query.is_empty() {
return Some(positions);
}
for (i, &b) in title.iter().enumerate() {
if b == query[query_idx] {
positions.push(i);
query_idx += 1;
if query_idx == query.len() {
return Some(positions);
}
}
}
None
}
fn compute_score(
&self,
match_type: MatchType,
positions: Vec<usize>,
query: &str,
title: &str,
) -> MatchResult {
let positions_ref = positions.as_slice();
if !self.track_evidence {
let mut combined_bf = match_type.prior_odds();
if let Some(&first_pos) = positions_ref.first() {
let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
combined_bf *= position_factor;
}
let word_boundary_count = self.count_word_boundaries(positions_ref, title);
if word_boundary_count > 0 {
let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
combined_bf *= boundary_factor;
}
if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
let total_gap = self.total_gap(positions_ref);
let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
combined_bf *= gap_factor;
}
let title_len = title.len().max(1);
let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
combined_bf *= length_factor;
let score = combined_bf / (1.0 + combined_bf);
return MatchResult {
score,
match_type,
match_positions: positions,
evidence: EvidenceLedger::new(),
};
}
let mut evidence = EvidenceLedger::new();
let prior_odds = match_type.prior_odds();
evidence.add(
EvidenceKind::MatchType,
prior_odds,
EvidenceDescription::Static(match_type.description()),
);
if let Some(&first_pos) = positions_ref.first() {
let position_factor = 1.0 + (1.0 / (first_pos as f64 + 1.0)) * 0.5;
evidence.add(
EvidenceKind::Position,
position_factor,
EvidenceDescription::FirstMatchPos { pos: first_pos },
);
}
let word_boundary_count = self.count_word_boundaries(positions_ref, title);
if word_boundary_count > 0 {
let boundary_factor = 1.0 + (word_boundary_count as f64 * 0.3);
evidence.add(
EvidenceKind::WordBoundary,
boundary_factor,
EvidenceDescription::WordBoundaryCount {
count: word_boundary_count,
},
);
}
if match_type == MatchType::Fuzzy && positions_ref.len() > 1 {
let total_gap = self.total_gap(positions_ref);
let gap_factor = 1.0 / (1.0 + total_gap as f64 * 0.1);
evidence.add(
EvidenceKind::GapPenalty,
gap_factor,
EvidenceDescription::GapTotal { total: total_gap },
);
}
let title_len = title.len().max(1);
let length_factor = 1.0 + (query.len() as f64 / title_len as f64) * 0.2;
evidence.add(
EvidenceKind::TitleLength,
length_factor,
EvidenceDescription::CoveragePercent {
percent: (query.len() as f64 / title_len as f64) * 100.0,
},
);
let score = evidence.posterior_probability();
MatchResult {
score,
match_type,
match_positions: positions,
evidence,
}
}
fn count_word_boundaries(&self, positions: &[usize], title: &str) -> usize {
let title_bytes = title.as_bytes();
let mut count = 0;
for &pos in positions {
let is_boundary = if pos == 0 {
true
} else {
let prev = title_bytes
.get(pos.saturating_sub(1))
.copied()
.unwrap_or(b' ');
prev == b' ' || prev == b'-' || prev == b'_'
};
if is_boundary {
count += 1;
}
}
count
}
fn total_gap(&self, positions: &[usize]) -> usize {
if positions.len() < 2 {
return 0;
}
let mut total = 0;
for window in positions.windows(2) {
total += window[1].saturating_sub(window[0]).saturating_sub(1);
}
total
}
}
#[derive(Debug, Clone)]
pub struct RankConfidence {
pub confidence: f64,
pub gap_to_next: f64,
pub stability: RankStability,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RankStability {
Stable,
Marginal,
Unstable,
}
impl RankStability {
pub fn label(self) -> &'static str {
match self {
Self::Stable => "stable",
Self::Marginal => "marginal",
Self::Unstable => "unstable",
}
}
}
#[derive(Debug, Clone)]
pub struct RankedResults {
pub items: Vec<RankedItem>,
pub summary: RankingSummary,
}
#[derive(Debug, Clone)]
pub struct RankedItem {
pub original_index: usize,
pub result: MatchResult,
pub rank_confidence: RankConfidence,
}
#[derive(Debug, Clone)]
pub struct RankingSummary {
pub count: usize,
pub stable_count: usize,
pub tie_group_count: usize,
pub median_gap: f64,
}
#[derive(Debug, Clone)]
pub struct ConformalRanker {
pub tie_epsilon: f64,
pub stable_threshold: f64,
pub marginal_threshold: f64,
}
impl Default for ConformalRanker {
fn default() -> Self {
Self {
tie_epsilon: 1e-9,
stable_threshold: 0.7,
marginal_threshold: 0.3,
}
}
}
impl ConformalRanker {
pub fn new() -> Self {
Self::default()
}
pub fn rank(&self, results: Vec<MatchResult>) -> RankedResults {
let count = results.len();
if count == 0 {
return RankedResults {
items: Vec::new(),
summary: RankingSummary {
count: 0,
stable_count: 0,
tie_group_count: 0,
median_gap: 0.0,
},
};
}
let mut indexed: Vec<(usize, MatchResult)> = results.into_iter().enumerate().collect();
indexed.sort_by(|a, b| {
b.1.score
.partial_cmp(&a.1.score)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| b.1.match_type.cmp(&a.1.match_type))
});
let gaps: Vec<f64> = if count > 1 {
indexed
.windows(2)
.map(|w| (w[0].1.score - w[1].1.score).max(0.0))
.collect()
} else {
Vec::new()
};
let mut sorted_gaps = gaps.clone();
sorted_gaps.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mut items: Vec<RankedItem> = Vec::with_capacity(count);
let mut stable_count = 0;
let mut tie_group_count = 0;
let mut in_tie_group = false;
for (rank, (orig_idx, result)) in indexed.into_iter().enumerate() {
let gap_to_next = gaps.get(rank).copied().unwrap_or(0.0);
let confidence = if sorted_gaps.is_empty() {
1.0
} else {
let leq_count =
sorted_gaps.partition_point(|&g| g <= gap_to_next + self.tie_epsilon * 0.5);
leq_count as f64 / sorted_gaps.len() as f64
};
let is_tie = gap_to_next < self.tie_epsilon;
let stability = if is_tie {
if !in_tie_group {
tie_group_count += 1;
in_tie_group = true;
}
RankStability::Unstable
} else {
in_tie_group = false;
if confidence >= self.stable_threshold {
stable_count += 1;
RankStability::Stable
} else if confidence >= self.marginal_threshold {
RankStability::Marginal
} else {
RankStability::Unstable
}
};
items.push(RankedItem {
original_index: orig_idx,
result,
rank_confidence: RankConfidence {
confidence,
gap_to_next,
stability,
},
});
}
let median_gap = if sorted_gaps.is_empty() {
0.0
} else {
let mid = sorted_gaps.len() / 2;
if sorted_gaps.len().is_multiple_of(2) {
(sorted_gaps[mid - 1] + sorted_gaps[mid]) / 2.0
} else {
sorted_gaps[mid]
}
};
RankedResults {
items,
summary: RankingSummary {
count,
stable_count,
tie_group_count,
median_gap,
},
}
}
pub fn rank_top_k(&self, results: Vec<MatchResult>, k: usize) -> RankedResults {
let mut ranked = self.rank(results);
ranked.items.truncate(k);
ranked.summary.stable_count = ranked
.items
.iter()
.filter(|item| item.rank_confidence.stability == RankStability::Stable)
.count();
ranked
}
}
impl fmt::Display for RankConfidence {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"confidence={:.3} gap={:.4} ({})",
self.confidence,
self.gap_to_next,
self.stability.label()
)
}
}
impl fmt::Display for RankingSummary {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{} items, {} stable, {} tie groups, median gap {:.4}",
self.count, self.stable_count, self.tie_group_count, self.median_gap
)
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
struct CachedEntry {
corpus_index: usize,
}
#[derive(Debug)]
pub struct IncrementalScorer {
scorer: BayesianScorer,
prev_query: String,
cache: Vec<CachedEntry>,
corpus_generation: u64,
corpus_len: usize,
stats: IncrementalStats,
}
#[derive(Debug, Clone, Default)]
pub struct IncrementalStats {
pub full_scans: u64,
pub incremental_scans: u64,
pub total_evaluated: u64,
pub total_pruned: u64,
}
impl IncrementalStats {
pub fn prune_ratio(&self) -> f64 {
let total = self.total_evaluated + self.total_pruned;
if total == 0 {
0.0
} else {
self.total_pruned as f64 / total as f64
}
}
}
impl IncrementalScorer {
pub fn new() -> Self {
Self {
scorer: BayesianScorer::fast(),
prev_query: String::new(),
cache: Vec::new(),
corpus_generation: 0,
corpus_len: 0,
stats: IncrementalStats::default(),
}
}
pub fn with_scorer(scorer: BayesianScorer) -> Self {
Self {
scorer,
prev_query: String::new(),
cache: Vec::new(),
corpus_generation: 0,
corpus_len: 0,
stats: IncrementalStats::default(),
}
}
pub fn invalidate(&mut self) {
self.prev_query.clear();
self.cache.clear();
self.corpus_generation = self.corpus_generation.wrapping_add(1);
}
pub fn stats(&self) -> &IncrementalStats {
&self.stats
}
pub fn score_corpus(
&mut self,
query: &str,
corpus: &[&str],
generation: Option<u64>,
) -> Vec<(usize, MatchResult)> {
let generation_val = generation.unwrap_or(self.corpus_generation);
if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
self.invalidate();
self.corpus_generation = generation_val;
self.corpus_len = corpus.len();
}
let can_prune = !self.prev_query.is_empty()
&& query.starts_with(&self.prev_query)
&& !self.cache.is_empty();
let query_lower = query.to_lowercase();
let results = if can_prune {
self.score_incremental(query, &query_lower, corpus)
} else {
self.score_full(query, &query_lower, corpus)
};
self.prev_query.clear();
self.prev_query.push_str(query);
self.cache = results
.iter()
.map(|(idx, _result)| CachedEntry { corpus_index: *idx })
.collect();
results
}
pub fn score_corpus_with_lowered(
&mut self,
query: &str,
corpus: &[&str],
corpus_lower: &[&str],
generation: Option<u64>,
) -> Vec<(usize, MatchResult)> {
debug_assert_eq!(
corpus.len(),
corpus_lower.len(),
"corpus_lower must match corpus length"
);
let generation_val = generation.unwrap_or(self.corpus_generation);
if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
self.invalidate();
self.corpus_generation = generation_val;
self.corpus_len = corpus.len();
}
let can_prune = !self.prev_query.is_empty()
&& query.starts_with(&self.prev_query)
&& !self.cache.is_empty();
let query_lower = query.to_lowercase();
let results = if can_prune {
self.score_incremental_lowered(query, &query_lower, corpus, corpus_lower)
} else {
self.score_full_lowered(query, &query_lower, corpus, corpus_lower)
};
self.prev_query.clear();
self.prev_query.push_str(query);
self.cache = results
.iter()
.map(|(idx, _result)| CachedEntry { corpus_index: *idx })
.collect();
results
}
pub fn score_corpus_with_lowered_and_words(
&mut self,
query: &str,
corpus: &[String],
corpus_lower: &[String],
word_starts: &[Vec<usize>],
generation: Option<u64>,
) -> Vec<(usize, MatchResult)> {
debug_assert_eq!(
corpus.len(),
corpus_lower.len(),
"corpus_lower must match corpus length"
);
debug_assert_eq!(
corpus.len(),
word_starts.len(),
"word_starts must match corpus length"
);
let generation_val = generation.unwrap_or(self.corpus_generation);
if generation_val != self.corpus_generation || corpus.len() != self.corpus_len {
self.invalidate();
self.corpus_generation = generation_val;
self.corpus_len = corpus.len();
}
let can_prune = !self.prev_query.is_empty()
&& query.starts_with(&self.prev_query)
&& !self.cache.is_empty();
let query_lower = query.to_lowercase();
let results = if can_prune {
self.score_incremental_lowered_with_words(
query,
&query_lower,
corpus,
corpus_lower,
word_starts,
)
} else {
self.score_full_lowered_with_words(
query,
&query_lower,
corpus,
corpus_lower,
word_starts,
)
};
self.prev_query.clear();
self.prev_query.push_str(query);
self.cache = results
.iter()
.map(|(idx, _result)| CachedEntry { corpus_index: *idx })
.collect();
results
}
fn score_full(
&mut self,
query: &str,
query_lower: &str,
corpus: &[&str],
) -> Vec<(usize, MatchResult)> {
self.stats.full_scans += 1;
self.stats.total_evaluated += corpus.len() as u64;
let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
for (i, title) in corpus.iter().enumerate() {
let result = self
.scorer
.score_with_query_lower(query, query_lower, title);
if result.score > 0.0 {
results.push((i, result));
}
}
results.sort_unstable_by(compare_ranked_match_results);
results
}
fn score_full_lowered(
&mut self,
query: &str,
query_lower: &str,
corpus: &[&str],
corpus_lower: &[&str],
) -> Vec<(usize, MatchResult)> {
self.stats.full_scans += 1;
self.stats.total_evaluated += corpus.len() as u64;
let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
for (i, (title, title_lower)) in corpus.iter().zip(corpus_lower.iter()).enumerate() {
let result =
self.scorer
.score_with_lowered_title(query, query_lower, title, title_lower);
if result.score > 0.0 {
results.push((i, result));
}
}
results.sort_unstable_by(compare_ranked_match_results);
results
}
fn score_full_lowered_with_words(
&mut self,
query: &str,
query_lower: &str,
corpus: &[String],
corpus_lower: &[String],
word_starts: &[Vec<usize>],
) -> Vec<(usize, MatchResult)> {
self.stats.full_scans += 1;
self.stats.total_evaluated += corpus.len() as u64;
let mut results: Vec<(usize, MatchResult)> = Vec::with_capacity(corpus.len());
for (i, ((title, title_lower), starts)) in corpus
.iter()
.zip(corpus_lower.iter())
.zip(word_starts.iter())
.enumerate()
{
let result = self.scorer.score_with_lowered_title_and_words(
query,
query_lower,
title,
title_lower,
Some(starts),
);
if result.score > 0.0 {
results.push((i, result));
}
}
results.sort_unstable_by(compare_ranked_match_results);
results
}
fn score_incremental(
&mut self,
query: &str,
query_lower: &str,
corpus: &[&str],
) -> Vec<(usize, MatchResult)> {
self.stats.incremental_scans += 1;
let prev_match_count = self.cache.len();
let pruned = corpus.len().saturating_sub(prev_match_count);
self.stats.total_pruned += pruned as u64;
self.stats.total_evaluated += prev_match_count as u64;
let mut results: Vec<(usize, MatchResult)> =
Vec::with_capacity(self.cache.len().min(corpus.len()));
for entry in &self.cache {
if entry.corpus_index < corpus.len() {
let result = self.scorer.score_with_query_lower(
query,
query_lower,
corpus[entry.corpus_index],
);
if result.score > 0.0 {
results.push((entry.corpus_index, result));
}
}
}
results.sort_unstable_by(compare_ranked_match_results);
results
}
fn score_incremental_lowered(
&mut self,
query: &str,
query_lower: &str,
corpus: &[&str],
corpus_lower: &[&str],
) -> Vec<(usize, MatchResult)> {
self.stats.incremental_scans += 1;
let prev_match_count = self.cache.len();
let pruned = corpus.len().saturating_sub(prev_match_count);
self.stats.total_pruned += pruned as u64;
self.stats.total_evaluated += prev_match_count as u64;
let mut results: Vec<(usize, MatchResult)> =
Vec::with_capacity(self.cache.len().min(corpus.len()));
for entry in &self.cache {
if entry.corpus_index < corpus.len() {
let title = corpus[entry.corpus_index];
let title_lower = corpus_lower[entry.corpus_index];
let result =
self.scorer
.score_with_lowered_title(query, query_lower, title, title_lower);
if result.score > 0.0 {
results.push((entry.corpus_index, result));
}
}
}
results.sort_unstable_by(compare_ranked_match_results);
results
}
fn score_incremental_lowered_with_words(
&mut self,
query: &str,
query_lower: &str,
corpus: &[String],
corpus_lower: &[String],
word_starts: &[Vec<usize>],
) -> Vec<(usize, MatchResult)> {
self.stats.incremental_scans += 1;
let prev_match_count = self.cache.len();
let pruned = corpus.len().saturating_sub(prev_match_count);
self.stats.total_pruned += pruned as u64;
self.stats.total_evaluated += prev_match_count as u64;
let mut results: Vec<(usize, MatchResult)> =
Vec::with_capacity(self.cache.len().min(corpus.len()));
for entry in &self.cache {
if entry.corpus_index < corpus.len() {
let title = &corpus[entry.corpus_index];
let title_lower = &corpus_lower[entry.corpus_index];
let starts = &word_starts[entry.corpus_index];
let result = self.scorer.score_with_lowered_title_and_words(
query,
query_lower,
title,
title_lower,
Some(starts),
);
if result.score > 0.0 {
results.push((entry.corpus_index, result));
}
}
}
results.sort_by(|a, b| {
b.1.score
.partial_cmp(&a.1.score)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| b.1.match_type.cmp(&a.1.match_type))
});
results
}
}
impl Default for IncrementalScorer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn exact_match_highest_score() {
let scorer = BayesianScorer::new();
let result = scorer.score("Settings", "Settings");
assert_eq!(result.match_type, MatchType::Exact);
assert!(result.score > 0.95, "Exact match should score > 0.95");
}
#[test]
fn prefix_match_high_score() {
let scorer = BayesianScorer::new();
let result = scorer.score("set", "Settings");
assert_eq!(result.match_type, MatchType::Prefix);
assert!(result.score > 0.85, "Prefix match should score > 0.85");
}
#[test]
fn word_start_match_score() {
let scorer = BayesianScorer::new();
let result = scorer.score("gd", "Go Dashboard");
assert_eq!(result.match_type, MatchType::WordStart);
assert!(result.score > 0.75, "Word-start should score > 0.75");
}
#[test]
fn substring_match_moderate_score() {
let scorer = BayesianScorer::new();
let result = scorer.score("set", "Asset Manager");
assert_eq!(result.match_type, MatchType::Substring);
assert!(result.score > 0.5, "Substring should score > 0.5");
}
#[test]
fn fuzzy_match_low_score() {
let scorer = BayesianScorer::new();
let result = scorer.score("stg", "Settings");
assert_eq!(result.match_type, MatchType::Fuzzy);
assert!(result.score > 0.2, "Fuzzy should score > 0.2");
assert!(result.score < 0.7, "Fuzzy should score < 0.7");
}
#[test]
fn no_match_returns_zero() {
let scorer = BayesianScorer::new();
let result = scorer.score("xyz", "Settings");
assert_eq!(result.match_type, MatchType::NoMatch);
assert_eq!(result.score, 0.0);
}
#[test]
fn word_start_match_ascii_empty_query_matches() {
let scorer = BayesianScorer::new();
let positions = scorer
.word_start_match_ascii(b"", b"go dashboard", None)
.expect("empty query should match");
assert!(positions.is_empty());
}
#[test]
fn word_start_match_ascii_skips_out_of_bounds_precomputed_positions() {
let scorer = BayesianScorer::new();
let starts = [999, 0, 3];
let positions = scorer
.word_start_match_ascii(b"gd", b"go dashboard", Some(&starts))
.expect("should still match despite out-of-bounds precomputed starts");
assert_eq!(positions, vec![0, 3]);
}
#[test]
fn fuzzy_match_ascii_empty_query_matches() {
let scorer = BayesianScorer::new();
let positions = scorer
.fuzzy_match_ascii(b"", b"settings")
.expect("empty query should match");
assert!(positions.is_empty());
}
#[test]
fn score_bounded() {
let scorer = BayesianScorer::new();
let test_cases = [
("a", "abcdefghijklmnop"),
("full", "full"),
("", "anything"),
("xyz", "abc"),
("stg", "Settings"),
];
for (query, title) in test_cases {
let result = scorer.score(query, title);
assert!(
result.score >= 0.0 && result.score <= 1.0,
"Score for ({}, {}) = {} not in [0, 1]",
query,
title,
result.score
);
}
}
#[test]
fn score_deterministic() {
let scorer = BayesianScorer::new();
let result1 = scorer.score("nav", "Navigation");
let result2 = scorer.score("nav", "Navigation");
assert!(
(result1.score - result2.score).abs() < f64::EPSILON,
"Same input should produce identical scores"
);
}
#[test]
fn tiebreak_shorter_first() {
let scorer = BayesianScorer::new();
let short = scorer.score("set", "Set");
let long = scorer.score("set", "Settings");
assert!(
short.score >= long.score,
"Shorter title should score >= longer: {} vs {}",
short.score,
long.score
);
}
#[test]
fn case_insensitive() {
let scorer = BayesianScorer::new();
let lower = scorer.score("set", "Settings");
let upper = scorer.score("SET", "Settings");
assert!(
(lower.score - upper.score).abs() < f64::EPSILON,
"Case should not affect score"
);
}
#[test]
fn match_positions_correct() {
let scorer = BayesianScorer::new();
let result = scorer.score("gd", "Go Dashboard");
assert_eq!(result.match_positions, vec![0, 3]);
}
#[test]
fn fuzzy_match_positions() {
let scorer = BayesianScorer::new();
let result = scorer.score("stg", "Settings");
assert_eq!(result.match_positions.len(), 3);
assert_eq!(result.match_positions[0], 0); }
#[test]
fn empty_query_returns_all() {
let scorer = BayesianScorer::new();
let result = scorer.score("", "Any Command");
assert!(result.score > 0.0, "Empty query should have positive score");
assert!(result.score < 1.0, "Empty query should not be max score");
}
#[test]
fn empty_title_is_safe() {
let scorer = BayesianScorer::new();
let result = scorer.score("x", "");
assert_eq!(result.match_type, MatchType::NoMatch);
assert!(result.score.is_finite(), "Score should remain finite");
}
#[test]
fn empty_query_empty_title_is_finite() {
let scorer = BayesianScorer::new();
let result = scorer.score("", "");
assert!(result.score.is_finite(), "Score should remain finite");
assert_eq!(result.match_type, MatchType::Fuzzy);
}
#[test]
fn query_longer_than_title() {
let scorer = BayesianScorer::new();
let result = scorer.score("verylongquery", "short");
assert_eq!(result.match_type, MatchType::NoMatch);
assert_eq!(result.score, 0.0);
}
#[test]
fn long_query_exact_match_is_handled() {
let scorer = BayesianScorer::new();
let query = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
let result = scorer.score(query, query);
assert_eq!(result.match_type, MatchType::Exact);
assert!(result.score.is_finite());
assert!(result.score > 0.9, "Exact long query should score high");
}
#[test]
fn gap_penalty_prefers_tight_fuzzy_matches() {
let scorer = BayesianScorer::new();
let tight = scorer.score("ace", "abcde");
let gappy = scorer.score("ace", "a...c...e");
assert_eq!(tight.match_type, MatchType::Fuzzy);
assert_eq!(gappy.match_type, MatchType::Fuzzy);
assert!(
tight.score > gappy.score,
"Tight fuzzy match should score higher than gappy: {} vs {}",
tight.score,
gappy.score
);
}
#[test]
fn tag_match_boosts_score() {
let scorer = BayesianScorer::new();
let without_tag = scorer.score("set", "Settings");
let with_tag = scorer.score_with_tags("set", "Settings", &["config", "setup"]);
assert!(
with_tag.score > without_tag.score,
"Tag match should boost score: {} > {}",
with_tag.score,
without_tag.score
);
}
#[test]
fn evidence_ledger_tracks_factors() {
let scorer = BayesianScorer::new();
let result = scorer.score("set", "Settings");
assert!(!result.evidence.entries().is_empty());
assert!(
result
.evidence
.entries()
.iter()
.any(|e| e.kind == EvidenceKind::MatchType)
);
}
#[test]
fn evidence_ledger_display() {
let scorer = BayesianScorer::new();
let result = scorer.score("gd", "Go Dashboard");
let display = format!("{}", result.evidence);
assert!(display.contains("Evidence Ledger"));
assert!(display.contains("Posterior P:"));
}
#[test]
fn property_ordering_total() {
let scorer = BayesianScorer::new();
let titles = ["Settings", "Set Theme", "Reset View", "Asset"];
let mut scores: Vec<(f64, &str)> = titles
.iter()
.map(|t| (scorer.score("set", t).score, *t))
.collect();
scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());
for (score, _) in &scores {
assert!(score.is_finite());
}
}
#[test]
fn property_prefix_monotonic() {
let scorer = BayesianScorer::new();
let one_char = scorer.score("s", "Settings");
let three_char = scorer.score("set", "Settings");
assert!(
three_char.score >= one_char.score,
"Longer prefix should score >= shorter"
);
}
#[test]
fn match_type_prior_ordering() {
assert!(MatchType::Exact.prior_odds() > MatchType::Prefix.prior_odds());
assert!(MatchType::Prefix.prior_odds() > MatchType::WordStart.prior_odds());
assert!(MatchType::WordStart.prior_odds() > MatchType::Substring.prior_odds());
assert!(MatchType::Substring.prior_odds() > MatchType::Fuzzy.prior_odds());
assert!(MatchType::Fuzzy.prior_odds() > MatchType::NoMatch.prior_odds());
}
#[test]
fn conformal_tie_breaks_by_match_type() {
let mut exact = MatchResult::no_match();
exact.score = 0.5;
exact.match_type = MatchType::Exact;
let mut prefix = MatchResult::no_match();
prefix.score = 0.5;
prefix.match_type = MatchType::Prefix;
let mut word_start = MatchResult::no_match();
word_start.score = 0.5;
word_start.match_type = MatchType::WordStart;
let mut substring = MatchResult::no_match();
substring.score = 0.5;
substring.match_type = MatchType::Substring;
let ranker = ConformalRanker::new();
let ranked = ranker.rank(vec![substring, word_start, prefix, exact]);
let order: Vec<MatchType> = ranked
.items
.iter()
.map(|item| item.result.match_type)
.collect();
assert_eq!(
order,
vec![
MatchType::Exact,
MatchType::Prefix,
MatchType::WordStart,
MatchType::Substring
]
);
}
#[test]
fn conformal_empty_input() {
let ranker = ConformalRanker::new();
let ranked = ranker.rank(Vec::new());
assert_eq!(ranked.items.len(), 0);
assert_eq!(ranked.summary.count, 0);
assert_eq!(ranked.summary.stable_count, 0);
assert_eq!(ranked.summary.tie_group_count, 0);
assert_eq!(ranked.summary.median_gap, 0.0);
}
#[test]
fn conformal_single_item() {
let scorer = BayesianScorer::new();
let results = vec![scorer.score("set", "Settings")];
let ranker = ConformalRanker::new();
let ranked = ranker.rank(results);
assert_eq!(ranked.items.len(), 1);
assert_eq!(ranked.items[0].rank_confidence.confidence, 1.0);
assert_eq!(ranked.summary.count, 1);
}
#[test]
fn conformal_sorted_descending() {
let scorer = BayesianScorer::new();
let results = vec![
scorer.score("set", "Settings"),
scorer.score("set", "Asset Manager"),
scorer.score("set", "Reset Configuration Panel"),
];
let ranker = ConformalRanker::new();
let ranked = ranker.rank(results);
for w in ranked.items.windows(2) {
let a = w[0].result.score;
let b = w[1].result.score;
assert!(a >= b, "Items should be sorted descending: {a} >= {b}");
}
}
#[test]
fn conformal_confidence_bounded() {
let scorer = BayesianScorer::new();
let titles = [
"Settings",
"Set Theme",
"Asset Manager",
"Reset View",
"Offset Tool",
"Test Suite",
];
let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
let ranker = ConformalRanker::new();
let ranked = ranker.rank(results);
for item in &ranked.items {
assert!(
item.rank_confidence.confidence >= 0.0 && item.rank_confidence.confidence <= 1.0,
"Confidence must be in [0, 1], got {}",
item.rank_confidence.confidence
);
assert!(
item.rank_confidence.gap_to_next >= 0.0,
"Gap must be non-negative"
);
}
}
#[test]
fn conformal_deterministic() {
let scorer = BayesianScorer::new();
let titles = ["Settings", "Set Theme", "Asset", "Reset"];
let results1: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
let results2: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
let ranker = ConformalRanker::new();
let ranked1 = ranker.rank(results1);
let ranked2 = ranker.rank(results2);
for (a, b) in ranked1.items.iter().zip(ranked2.items.iter()) {
assert!(
(a.rank_confidence.confidence - b.rank_confidence.confidence).abs() < f64::EPSILON,
"Confidence should be deterministic"
);
assert_eq!(
a.original_index, b.original_index,
"Rank order should be deterministic"
);
}
}
#[test]
fn conformal_ties_detected() {
let mut r1 = MatchResult::no_match();
r1.score = 0.8;
r1.match_type = MatchType::Prefix;
let mut r2 = MatchResult::no_match();
r2.score = 0.8;
r2.match_type = MatchType::Prefix;
let mut r3 = MatchResult::no_match();
r3.score = 0.5;
r3.match_type = MatchType::Substring;
let ranker = ConformalRanker::new();
let ranked = ranker.rank(vec![r1, r2, r3]);
assert_eq!(
ranked.items[0].rank_confidence.stability,
RankStability::Unstable,
"Tied items should be Unstable"
);
assert!(
ranked.summary.tie_group_count >= 1,
"Should detect at least one tie group"
);
}
#[test]
fn conformal_all_identical_scores() {
let mut results = Vec::new();
for _ in 0..5 {
let mut r = MatchResult::no_match();
r.score = 0.5;
r.match_type = MatchType::Fuzzy;
results.push(r);
}
let ranker = ConformalRanker::new();
let ranked = ranker.rank(results);
for item in &ranked.items {
assert_eq!(item.rank_confidence.stability, RankStability::Unstable);
}
}
#[test]
fn conformal_well_separated_scores_are_stable() {
let mut results = Vec::new();
for &s in &[0.9, 0.6, 0.3, 0.1] {
let mut r = MatchResult::no_match();
r.score = s;
r.match_type = MatchType::Prefix;
results.push(r);
}
let ranker = ConformalRanker::new();
let ranked = ranker.rank(results);
assert!(
ranked.summary.stable_count >= 2,
"Well-separated scores should yield stable positions, got {}",
ranked.summary.stable_count
);
}
#[test]
fn conformal_top_k_truncates() {
let scorer = BayesianScorer::new();
let titles = [
"Settings",
"Set Theme",
"Asset Manager",
"Reset View",
"Offset Tool",
];
let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
let ranker = ConformalRanker::new();
let ranked = ranker.rank_top_k(results, 2);
assert_eq!(ranked.items.len(), 2);
assert!(ranked.items[0].rank_confidence.confidence > 0.0);
}
#[test]
fn conformal_original_indices_preserved() {
let scorer = BayesianScorer::new();
let titles = ["Zebra Tool", "Settings", "Apple"];
let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
let ranker = ConformalRanker::new();
let ranked = ranker.rank(results);
assert_eq!(
ranked.items[0].original_index, 1,
"Settings should be first; original_index should be 1"
);
}
#[test]
fn conformal_summary_display() {
let scorer = BayesianScorer::new();
let results = vec![
scorer.score("set", "Settings"),
scorer.score("set", "Set Theme"),
];
let ranker = ConformalRanker::new();
let ranked = ranker.rank(results);
let display = format!("{}", ranked.summary);
assert!(display.contains("2 items"));
}
#[test]
fn conformal_rank_confidence_display() {
let rc = RankConfidence {
confidence: 0.85,
gap_to_next: 0.1234,
stability: RankStability::Stable,
};
let display = format!("{}", rc);
assert!(display.contains("0.850"));
assert!(display.contains("stable"));
}
#[test]
fn conformal_stability_labels() {
assert_eq!(RankStability::Stable.label(), "stable");
assert_eq!(RankStability::Marginal.label(), "marginal");
assert_eq!(RankStability::Unstable.label(), "unstable");
}
#[test]
fn conformal_last_item_gap_zero() {
let scorer = BayesianScorer::new();
let results = vec![
scorer.score("set", "Settings"),
scorer.score("set", "Asset"),
scorer.score("set", "Reset"),
];
let ranker = ConformalRanker::new();
let ranked = ranker.rank(results);
let last = ranked.items.last().unwrap();
assert_eq!(
last.rank_confidence.gap_to_next, 0.0,
"Last item gap should be 0"
);
}
#[test]
fn conformal_median_gap_non_negative() {
let scorer = BayesianScorer::new();
let titles = [
"Settings",
"Set Theme",
"Asset Manager",
"Reset View",
"Offset Tool",
"Test Suite",
"System Settings",
"Reset Defaults",
];
let results: Vec<MatchResult> = titles.iter().map(|t| scorer.score("set", t)).collect();
let ranker = ConformalRanker::new();
let ranked = ranker.rank(results);
assert!(
ranked.summary.median_gap >= 0.0,
"Median gap must be non-negative"
);
}
fn test_corpus() -> Vec<&'static str> {
vec![
"Open File",
"Save File",
"Close Tab",
"Git: Commit",
"Git: Push",
"Git: Pull",
"Go to Line",
"Find in Files",
"Toggle Terminal",
"Format Document",
]
}
#[test]
fn incremental_full_scan_on_first_call() {
let mut scorer = IncrementalScorer::new();
let corpus = test_corpus();
let results = scorer.score_corpus("git", &corpus, None);
assert!(!results.is_empty(), "Should find matches for 'git'");
assert_eq!(scorer.stats().full_scans, 1);
assert_eq!(scorer.stats().incremental_scans, 0);
}
#[test]
fn incremental_prunes_on_query_extension() {
let mut scorer = IncrementalScorer::new();
let corpus = test_corpus();
let r1 = scorer.score_corpus("g", &corpus, None);
assert_eq!(scorer.stats().full_scans, 1);
let r2 = scorer.score_corpus("gi", &corpus, None);
assert_eq!(scorer.stats().incremental_scans, 1);
assert!(r2.len() <= r1.len(), "Extended query should match <= items");
let r3 = scorer.score_corpus("git", &corpus, None);
assert_eq!(scorer.stats().incremental_scans, 2);
assert!(r3.len() <= r2.len());
}
#[test]
fn incremental_correctness_matches_full_scan() {
let corpus = test_corpus();
let mut inc = IncrementalScorer::new();
inc.score_corpus("g", &corpus, None);
let inc_results = inc.score_corpus("git", &corpus, None);
let mut full = IncrementalScorer::new();
let full_results = full.score_corpus("git", &corpus, None);
assert_eq!(
inc_results.len(),
full_results.len(),
"Incremental and full scan should return same count"
);
for (a, b) in inc_results.iter().zip(full_results.iter()) {
assert_eq!(a.0, b.0, "Same corpus indices");
assert!(
(a.1.score - b.1.score).abs() < f64::EPSILON,
"Same scores: {} vs {}",
a.1.score,
b.1.score
);
}
}
#[test]
fn incremental_falls_back_on_non_extension() {
let mut scorer = IncrementalScorer::new();
let corpus = test_corpus();
scorer.score_corpus("git", &corpus, None);
assert_eq!(scorer.stats().full_scans, 1);
scorer.score_corpus("fi", &corpus, None);
assert_eq!(scorer.stats().full_scans, 2);
}
#[test]
fn incremental_invalidate_forces_full_scan() {
let mut scorer = IncrementalScorer::new();
let corpus = test_corpus();
scorer.score_corpus("g", &corpus, None);
scorer.invalidate();
scorer.score_corpus("gi", &corpus, None);
assert_eq!(scorer.stats().full_scans, 2);
assert_eq!(scorer.stats().incremental_scans, 0);
}
#[test]
fn incremental_generation_change_invalidates() {
let mut scorer = IncrementalScorer::new();
let corpus = test_corpus();
scorer.score_corpus("g", &corpus, Some(1));
scorer.score_corpus("gi", &corpus, Some(2));
assert_eq!(scorer.stats().full_scans, 2);
}
#[test]
fn incremental_corpus_size_change_invalidates() {
let mut scorer = IncrementalScorer::new();
let corpus1 = test_corpus();
let corpus2 = &corpus1[..5];
scorer.score_corpus("g", &corpus1, None);
scorer.score_corpus("gi", corpus2, None);
assert_eq!(scorer.stats().full_scans, 2);
}
#[test]
fn incremental_skips_out_of_bounds_cache_entries() {
let mut scorer = IncrementalScorer::new();
scorer.cache = vec![
CachedEntry { corpus_index: 0 },
CachedEntry { corpus_index: 999 },
];
let corpus = ["a"];
let results = scorer.score_incremental("a", "a", &corpus);
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, 0);
let corpus_lower = ["a"];
let results = scorer.score_incremental_lowered("a", "a", &corpus, &corpus_lower);
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, 0);
let corpus_s = vec!["a".to_string()];
let corpus_lower_s = vec!["a".to_string()];
let word_starts = vec![vec![0]];
let results = scorer.score_incremental_lowered_with_words(
"a",
"a",
&corpus_s,
&corpus_lower_s,
&word_starts,
);
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, 0);
}
#[test]
fn incremental_empty_query() {
let mut scorer = IncrementalScorer::new();
let corpus = test_corpus();
let results = scorer.score_corpus("", &corpus, None);
assert_eq!(results.len(), corpus.len());
}
#[test]
fn incremental_no_match_query() {
let mut scorer = IncrementalScorer::new();
let corpus = test_corpus();
let results = scorer.score_corpus("zzz", &corpus, None);
assert!(results.is_empty());
}
#[test]
fn incremental_stats_prune_ratio() {
let mut scorer = IncrementalScorer::new();
let corpus = test_corpus();
scorer.score_corpus("g", &corpus, None);
scorer.score_corpus("gi", &corpus, None);
scorer.score_corpus("git", &corpus, None);
let stats = scorer.stats();
assert!(
stats.prune_ratio() > 0.0,
"Prune ratio should be > 0 after incremental scans"
);
assert!(stats.total_pruned > 0, "Should have pruned some items");
}
#[test]
fn incremental_results_sorted_descending() {
let mut scorer = IncrementalScorer::new();
let corpus = test_corpus();
let results = scorer.score_corpus("o", &corpus, None);
for w in results.windows(2) {
let a = w[0].1.score;
let b = w[1].1.score;
assert!(a >= b, "Results should be sorted descending: {a} >= {b}");
}
}
#[test]
fn incremental_lowered_matches_full() {
let corpus = vec![
"Open File".to_string(),
"Save File".to_string(),
"Close".to_string(),
"Launch 🚀".to_string(),
];
let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
let lower: Vec<String> = corpus.iter().map(|s| s.to_lowercase()).collect();
let word_starts: Vec<Vec<usize>> = lower
.iter()
.map(|title_lower| {
let bytes = title_lower.as_bytes();
title_lower
.char_indices()
.filter_map(|(i, _)| {
let is_word_start = i == 0 || {
let prev = bytes.get(i.saturating_sub(1)).copied().unwrap_or(b' ');
prev == b' ' || prev == b'-' || prev == b'_'
};
is_word_start.then_some(i)
})
.collect()
})
.collect();
let mut full = IncrementalScorer::new();
let mut lowered = IncrementalScorer::new();
let a = full.score_corpus("fi", &corpus_refs, None);
let b =
lowered.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
assert_eq!(a.len(), b.len());
for ((ia, ra), (ib, rb)) in a.iter().zip(b.iter()) {
assert_eq!(ia, ib);
assert_eq!(ra.match_type, rb.match_type);
assert_eq!(ra.match_positions, rb.match_positions);
assert!(
(ra.score - rb.score).abs() < 1e-12,
"score mismatch: {} vs {}",
ra.score,
rb.score
);
}
}
#[test]
fn incremental_deterministic() {
let corpus = test_corpus();
let mut s1 = IncrementalScorer::new();
let r1 = s1.score_corpus("git", &corpus, None);
let mut s2 = IncrementalScorer::new();
let r2 = s2.score_corpus("git", &corpus, None);
assert_eq!(r1.len(), r2.len());
for (a, b) in r1.iter().zip(r2.iter()) {
assert_eq!(a.0, b.0);
assert!((a.1.score - b.1.score).abs() < f64::EPSILON);
}
}
#[test]
fn match_type_descriptions() {
assert_eq!(MatchType::Exact.description(), "exact match");
assert_eq!(MatchType::Prefix.description(), "prefix match");
assert_eq!(MatchType::WordStart.description(), "word-start match");
assert_eq!(MatchType::Substring.description(), "contiguous substring");
assert_eq!(MatchType::Fuzzy.description(), "fuzzy match");
assert_eq!(MatchType::NoMatch.description(), "no match");
}
#[test]
fn match_type_no_match_prior_is_zero() {
assert_eq!(MatchType::NoMatch.prior_odds(), 0.0);
}
#[test]
fn evidence_description_static_display() {
let d = EvidenceDescription::Static("test message");
assert_eq!(format!("{d}"), "test message");
}
#[test]
fn evidence_description_title_length_display() {
let d = EvidenceDescription::TitleLengthChars { len: 42 };
assert_eq!(format!("{d}"), "title length 42 chars");
}
#[test]
fn evidence_description_first_match_pos_display() {
let d = EvidenceDescription::FirstMatchPos { pos: 5 };
assert_eq!(format!("{d}"), "first match at position 5");
}
#[test]
fn evidence_description_word_boundary_count_display() {
let d = EvidenceDescription::WordBoundaryCount { count: 3 };
assert_eq!(format!("{d}"), "3 word boundary matches");
}
#[test]
fn evidence_description_gap_total_display() {
let d = EvidenceDescription::GapTotal { total: 7 };
assert_eq!(format!("{d}"), "total gap of 7 characters");
}
#[test]
fn evidence_description_coverage_percent_display() {
let d = EvidenceDescription::CoveragePercent { percent: 83.5 };
let s = format!("{d}");
assert!(s.contains("84%"), "Should round: {s}");
}
#[test]
fn evidence_entry_display_supports() {
let entry = EvidenceEntry {
kind: EvidenceKind::Position,
bayes_factor: 1.5,
description: EvidenceDescription::FirstMatchPos { pos: 0 },
};
let s = format!("{entry}");
assert!(s.contains("supports"));
assert!(s.contains("Position"));
}
#[test]
fn evidence_entry_display_opposes() {
let entry = EvidenceEntry {
kind: EvidenceKind::GapPenalty,
bayes_factor: 0.5,
description: EvidenceDescription::GapTotal { total: 10 },
};
let s = format!("{entry}");
assert!(s.contains("opposes"));
}
#[test]
fn evidence_entry_display_neutral() {
let entry = EvidenceEntry {
kind: EvidenceKind::TitleLength,
bayes_factor: 1.0,
description: EvidenceDescription::Static("neutral factor"),
};
let s = format!("{entry}");
assert!(s.contains("neutral"));
}
#[test]
fn ledger_combined_bayes_factor() {
let mut ledger = EvidenceLedger::new();
ledger.add(
EvidenceKind::Position,
2.0,
EvidenceDescription::Static("a"),
);
ledger.add(
EvidenceKind::WordBoundary,
3.0,
EvidenceDescription::Static("b"),
);
assert!((ledger.combined_bayes_factor() - 6.0).abs() < f64::EPSILON);
}
#[test]
fn ledger_combined_bayes_factor_empty() {
let ledger = EvidenceLedger::new();
assert!((ledger.combined_bayes_factor() - 1.0).abs() < f64::EPSILON);
}
#[test]
fn ledger_prior_odds_present() {
let mut ledger = EvidenceLedger::new();
ledger.add(
EvidenceKind::MatchType,
9.0,
EvidenceDescription::Static("prefix"),
);
assert_eq!(ledger.prior_odds(), Some(9.0));
}
#[test]
fn ledger_prior_odds_absent() {
let ledger = EvidenceLedger::new();
assert_eq!(ledger.prior_odds(), None);
}
#[test]
fn ledger_posterior_probability_no_prior() {
let mut ledger = EvidenceLedger::new();
ledger.add(
EvidenceKind::Position,
2.0,
EvidenceDescription::Static("pos"),
);
let prob = ledger.posterior_probability();
assert!((prob - 2.0 / 3.0).abs() < 1e-9);
}
#[test]
fn ledger_posterior_probability_infinite_odds() {
let mut ledger = EvidenceLedger::new();
ledger.add(
EvidenceKind::MatchType,
f64::INFINITY,
EvidenceDescription::Static("inf"),
);
assert_eq!(ledger.posterior_probability(), 1.0);
}
#[test]
fn fast_scorer_no_evidence() {
let scorer = BayesianScorer::fast();
let result = scorer.score("set", "Settings");
assert!(result.score > 0.0);
assert!(result.evidence.entries().is_empty());
}
#[test]
fn fast_scorer_score_matches_probability_range() {
let scorer = BayesianScorer::fast();
let result = scorer.score("set", "Settings");
assert!(result.score >= 0.0 && result.score <= 1.0);
}
#[test]
fn fast_scorer_empty_query() {
let scorer = BayesianScorer::fast();
let result = scorer.score("", "Test");
assert!(result.score > 0.0);
assert!(result.evidence.entries().is_empty());
}
#[test]
fn fast_scorer_fuzzy_with_gap_penalty() {
let scorer = BayesianScorer::fast();
let result = scorer.score("ace", "a...b...c...d...e");
assert_eq!(result.match_type, MatchType::Fuzzy);
assert!(result.score > 0.0);
}
#[test]
fn fast_scorer_tag_boost() {
let scorer = BayesianScorer::fast();
let without = scorer.score("set", "Settings");
let with = scorer.score_with_tags("set", "Settings", &["setup"]);
assert!(with.score > without.score);
}
#[test]
fn fast_scorer_tag_no_boost_at_score_one() {
let scorer = BayesianScorer::fast();
let result = scorer.score_with_tags("Settings", "Settings", &["settings"]);
assert!(result.score <= 1.0);
}
#[test]
fn unicode_exact_match() {
let scorer = BayesianScorer::new();
let result = scorer.score("日本語", "日本語");
assert_eq!(result.match_type, MatchType::Exact);
assert!(result.score > 0.9);
}
#[test]
fn unicode_prefix_match() {
let scorer = BayesianScorer::new();
let result = scorer.score("日本", "日本語テスト");
assert_eq!(result.match_type, MatchType::Prefix);
}
#[test]
fn unicode_substring_match() {
let scorer = BayesianScorer::new();
let result = scorer.score("本語", "日本語テスト");
assert_eq!(result.match_type, MatchType::Substring);
}
#[test]
fn unicode_fuzzy_match() {
let scorer = BayesianScorer::new();
let result = scorer.score("日テ", "日本語テスト");
assert_eq!(result.match_type, MatchType::Fuzzy);
}
#[test]
fn unicode_no_match() {
let scorer = BayesianScorer::new();
let result = scorer.score("ωψξ", "αβγ");
assert_eq!(result.match_type, MatchType::NoMatch);
}
#[test]
fn unicode_word_start_match() {
let scorer = BayesianScorer::new();
let result = scorer.score("gd", "go dashboard");
assert_eq!(result.match_type, MatchType::WordStart);
}
#[test]
fn score_with_query_lower_matches_score() {
let scorer = BayesianScorer::new();
let direct = scorer.score("Set", "Settings");
let pre = scorer.score_with_query_lower("Set", "set", "Settings");
assert!((direct.score - pre.score).abs() < f64::EPSILON);
assert_eq!(direct.match_type, pre.match_type);
}
#[test]
fn score_with_lowered_title_matches_score() {
let scorer = BayesianScorer::new();
let direct = scorer.score("set", "Settings");
let pre = scorer.score_with_lowered_title("set", "set", "Settings", "settings");
assert!((direct.score - pre.score).abs() < f64::EPSILON);
}
#[test]
fn score_with_lowered_title_and_words_matches() {
let scorer = BayesianScorer::new();
let direct = scorer.score("gd", "Go Dashboard");
let pre = scorer.score_with_lowered_title_and_words(
"gd",
"gd",
"Go Dashboard",
"go dashboard",
Some(&[0, 3]),
);
assert_eq!(direct.match_type, pre.match_type);
assert!((direct.score - pre.score).abs() < f64::EPSILON);
}
#[test]
fn tag_match_no_title_match_returns_nomatch() {
let scorer = BayesianScorer::new();
let result = scorer.score_with_tags("xyz", "Settings", &["xyz"]);
assert_eq!(result.match_type, MatchType::NoMatch);
assert_eq!(result.score, 0.0);
}
#[test]
fn tag_match_no_matching_tag() {
let scorer = BayesianScorer::new();
let without = scorer.score("set", "Settings");
let with = scorer.score_with_tags("set", "Settings", &["foo", "bar"]);
assert!((without.score - with.score).abs() < f64::EPSILON);
}
#[test]
fn tag_match_case_insensitive() {
let scorer = BayesianScorer::new();
let result = scorer.score_with_tags("set", "Settings", &["SETUP"]);
let without = scorer.score("set", "Settings");
assert!(result.score > without.score);
}
#[test]
fn no_match_result_has_evidence() {
let r = MatchResult::no_match();
assert_eq!(r.score, 0.0);
assert_eq!(r.match_type, MatchType::NoMatch);
assert!(r.match_positions.is_empty());
assert_eq!(r.evidence.entries().len(), 1);
assert_eq!(r.evidence.entries()[0].kind, EvidenceKind::MatchType);
assert_eq!(r.evidence.entries()[0].bayes_factor, 0.0);
}
#[test]
fn count_word_boundaries_at_start() {
let scorer = BayesianScorer::new();
let count = scorer.count_word_boundaries(&[0], "hello");
assert_eq!(count, 1);
}
#[test]
fn count_word_boundaries_after_separators() {
let scorer = BayesianScorer::new();
let count = scorer.count_word_boundaries(&[0, 2, 4, 6], "a-b_c d");
assert_eq!(count, 4);
}
#[test]
fn count_word_boundaries_mid_word() {
let scorer = BayesianScorer::new();
let count = scorer.count_word_boundaries(&[2], "hello");
assert_eq!(count, 0);
}
#[test]
fn total_gap_empty() {
let scorer = BayesianScorer::new();
assert_eq!(scorer.total_gap(&[]), 0);
}
#[test]
fn total_gap_single() {
let scorer = BayesianScorer::new();
assert_eq!(scorer.total_gap(&[5]), 0);
}
#[test]
fn total_gap_contiguous() {
let scorer = BayesianScorer::new();
assert_eq!(scorer.total_gap(&[0, 1, 2]), 0);
}
#[test]
fn total_gap_with_gaps() {
let scorer = BayesianScorer::new();
assert_eq!(scorer.total_gap(&[0, 3, 7]), 5);
}
#[test]
fn conformal_custom_thresholds() {
let ranker = ConformalRanker {
tie_epsilon: 0.1,
stable_threshold: 0.9,
marginal_threshold: 0.5,
};
let mut r1 = MatchResult::no_match();
r1.score = 0.5;
r1.match_type = MatchType::Prefix;
let mut r2 = MatchResult::no_match();
r2.score = 0.45;
r2.match_type = MatchType::Prefix;
let ranked = ranker.rank(vec![r1, r2]);
assert_eq!(
ranked.items[0].rank_confidence.stability,
RankStability::Unstable,
"Gap 0.05 < epsilon 0.1 → tie"
);
}
#[test]
fn conformal_rank_top_k_larger_than_count() {
let scorer = BayesianScorer::new();
let results = vec![scorer.score("set", "Settings")];
let ranker = ConformalRanker::new();
let ranked = ranker.rank_top_k(results, 100);
assert_eq!(ranked.items.len(), 1);
}
#[test]
fn conformal_rank_top_k_zero() {
let scorer = BayesianScorer::new();
let results = vec![
scorer.score("set", "Settings"),
scorer.score("set", "Asset"),
];
let ranker = ConformalRanker::new();
let ranked = ranker.rank_top_k(results, 0);
assert!(ranked.items.is_empty());
assert_eq!(ranked.summary.stable_count, 0);
}
#[test]
fn conformal_rank_confidence_display_marginal() {
let rc = RankConfidence {
confidence: 0.5,
gap_to_next: 0.05,
stability: RankStability::Marginal,
};
let s = format!("{rc}");
assert!(s.contains("marginal"));
}
#[test]
fn conformal_rank_confidence_display_unstable() {
let rc = RankConfidence {
confidence: 0.1,
gap_to_next: 0.001,
stability: RankStability::Unstable,
};
let s = format!("{rc}");
assert!(s.contains("unstable"));
}
#[test]
fn conformal_median_gap_even_count() {
let mut results = Vec::new();
for &s in &[0.9, 0.7, 0.5, 0.3, 0.1] {
let mut r = MatchResult::no_match();
r.score = s;
r.match_type = MatchType::Prefix;
results.push(r);
}
let ranker = ConformalRanker::new();
let ranked = ranker.rank(results);
assert!(
(ranked.summary.median_gap - 0.2).abs() < 0.01,
"median_gap={}, expected ~0.2",
ranked.summary.median_gap
);
}
#[test]
fn incremental_with_scorer_uses_provided_scorer() {
let scorer = BayesianScorer::new(); let mut inc = IncrementalScorer::with_scorer(scorer);
let corpus = test_corpus();
let results = inc.score_corpus("git", &corpus, None);
assert!(!results.is_empty());
assert!(!results[0].1.evidence.entries().is_empty());
}
#[test]
fn incremental_default_trait() {
let inc = IncrementalScorer::default();
assert_eq!(inc.stats().full_scans, 0);
assert_eq!(inc.stats().incremental_scans, 0);
}
#[test]
fn incremental_stats_prune_ratio_zero_when_empty() {
let stats = IncrementalStats::default();
assert_eq!(stats.prune_ratio(), 0.0);
}
#[test]
fn incremental_score_corpus_with_lowered() {
let corpus = vec!["Open File", "Save File", "Close Tab"];
let lower = vec!["open file", "save file", "close tab"];
let mut inc = IncrementalScorer::new();
let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
assert!(!results.is_empty());
for (idx, _) in &results {
assert!(*idx < corpus.len());
}
}
#[test]
fn incremental_score_corpus_with_lowered_incremental_path() {
let corpus = vec!["Open File", "Save File", "Close Tab"];
let lower = vec!["open file", "save file", "close tab"];
let mut inc = IncrementalScorer::new();
inc.score_corpus_with_lowered("f", &corpus, &lower, None);
let results = inc.score_corpus_with_lowered("fi", &corpus, &lower, None);
assert_eq!(inc.stats().incremental_scans, 1);
assert!(!results.is_empty());
}
#[test]
fn incremental_score_corpus_with_lowered_and_words() {
let corpus = vec![
"Open File".to_string(),
"Save File".to_string(),
"Close Tab".to_string(),
];
let lower = vec![
"open file".to_string(),
"save file".to_string(),
"close tab".to_string(),
];
let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
let mut inc = IncrementalScorer::new();
let results =
inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
assert!(!results.is_empty());
}
#[test]
fn incremental_score_corpus_with_lowered_and_words_incremental_path() {
let corpus = vec![
"Open File".to_string(),
"Save File".to_string(),
"Close Tab".to_string(),
];
let lower = vec![
"open file".to_string(),
"save file".to_string(),
"close tab".to_string(),
];
let word_starts = vec![vec![0, 5], vec![0, 5], vec![0, 6]];
let mut inc = IncrementalScorer::new();
inc.score_corpus_with_lowered_and_words("f", &corpus, &lower, &word_starts, None);
let results =
inc.score_corpus_with_lowered_and_words("fi", &corpus, &lower, &word_starts, None);
assert_eq!(inc.stats().incremental_scans, 1);
assert!(!results.is_empty());
}
#[test]
fn bayesian_scorer_default_has_evidence_off() {
let scorer = BayesianScorer::default();
assert!(!scorer.track_evidence);
}
#[test]
fn word_start_match_with_hyphens() {
let scorer = BayesianScorer::new();
let result = scorer.score("fc", "file-commander");
assert_eq!(result.match_type, MatchType::WordStart);
assert_eq!(result.match_positions, vec![0, 5]);
}
#[test]
fn word_start_match_with_underscores() {
let scorer = BayesianScorer::new();
let result = scorer.score("fc", "file_commander");
assert_eq!(result.match_type, MatchType::WordStart);
assert_eq!(result.match_positions, vec![0, 5]);
}
#[test]
fn match_type_ord() {
let mut types = vec![
MatchType::Exact,
MatchType::NoMatch,
MatchType::Fuzzy,
MatchType::Prefix,
MatchType::Substring,
MatchType::WordStart,
];
types.sort();
assert_eq!(
types,
vec![
MatchType::NoMatch,
MatchType::Fuzzy,
MatchType::Substring,
MatchType::WordStart,
MatchType::Prefix,
MatchType::Exact,
]
);
}
#[test]
fn evidence_kind_equality() {
assert_eq!(EvidenceKind::MatchType, EvidenceKind::MatchType);
assert_ne!(EvidenceKind::Position, EvidenceKind::GapPenalty);
}
#[test]
fn earlier_substring_scores_higher() {
let scorer = BayesianScorer::new();
let early = scorer.score("set", "set in stone");
let late = scorer.score("set", "the asset");
assert!(
early.score > late.score,
"Earlier match should score higher: {} vs {}",
early.score,
late.score
);
}
}
#[cfg(test)]
mod perf_tests {
use super::*;
use std::time::Instant;
#[derive(Debug, Clone, Copy)]
struct PerfStats {
p50_us: u64,
p95_us: u64,
p99_us: u64,
mean_us: f64,
variance_us: f64,
samples: usize,
}
const SINGLE_SCORE_BUDGET_US: u64 = 10; const CORPUS_100_BUDGET_US: u64 = 500; const CORPUS_1000_BUDGET_US: u64 = 5_000; const CORPUS_5000_BUDGET_US: u64 = 25_000; const INCREMENTAL_7KEY_100_BUDGET_US: u64 = 5_000; const INCREMENTAL_7KEY_1000_BUDGET_US: u64 = 15_000;
const COVERAGE_BUDGET_MULTIPLIER: u64 = 5;
fn is_coverage_run() -> bool {
std::env::var("LLVM_PROFILE_FILE").is_ok() || std::env::var("CARGO_LLVM_COV").is_ok()
}
fn coverage_budget_us_with_mode(base: u64, coverage_run: bool) -> u64 {
if coverage_run {
base.saturating_mul(COVERAGE_BUDGET_MULTIPLIER)
} else {
base
}
}
fn coverage_budget_us(base: u64) -> u64 {
coverage_budget_us_with_mode(base, is_coverage_run())
}
#[test]
fn coverage_budget_us_with_mode_respects_flag() {
assert_eq!(coverage_budget_us_with_mode(123, false), 123);
assert_eq!(
coverage_budget_us_with_mode(123, true),
123 * COVERAGE_BUDGET_MULTIPLIER
);
}
fn make_corpus(size: usize) -> Vec<String> {
let base = [
"Open File",
"Save File",
"Close Tab",
"Split Editor Right",
"Split Editor Down",
"Toggle Terminal",
"Go to Line",
"Find in Files",
"Replace in Files",
"Git: Commit",
"Git: Push",
"Git: Pull",
"Debug: Start",
"Debug: Stop",
"Debug: Step Over",
"Format Document",
"Rename Symbol",
"Go to Definition",
"Find All References",
"Toggle Sidebar",
];
base.iter()
.cycle()
.take(size)
.enumerate()
.map(|(i, cmd)| {
if i < base.len() {
(*cmd).to_string()
} else {
format!("{} ({})", cmd, i)
}
})
.collect()
}
fn measure_stats_us(iterations: usize, mut f: impl FnMut()) -> PerfStats {
let mut times = Vec::with_capacity(iterations);
for _ in 0..3 {
f();
}
for _ in 0..iterations {
let start = Instant::now();
f();
times.push(start.elapsed().as_micros() as u64);
}
times.sort_unstable();
let len = times.len();
let p50 = times[len / 2];
let p95 = times[((len as f64 * 0.95) as usize).min(len.saturating_sub(1))];
let p99 = times[((len as f64 * 0.99) as usize).min(len.saturating_sub(1))];
let mean = times.iter().copied().map(|value| value as f64).sum::<f64>() / len as f64;
let variance = times
.iter()
.map(|value| {
let delta = *value as f64 - mean;
delta * delta
})
.sum::<f64>()
/ len as f64;
PerfStats {
p50_us: p50,
p95_us: p95,
p99_us: p99,
mean_us: mean,
variance_us: variance,
samples: len,
}
}
#[test]
fn perf_single_score_under_budget() {
let scorer = BayesianScorer::fast();
let stats = measure_stats_us(200, || {
std::hint::black_box(scorer.score("git co", "Git: Commit"));
});
eprintln!(
"{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_score_single\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
stats.samples,
stats.p50_us,
stats.p95_us,
stats.p99_us,
stats.mean_us,
stats.variance_us
);
let budget = coverage_budget_us(SINGLE_SCORE_BUDGET_US);
assert!(
stats.p50_us <= budget,
"Single score p50 = {}µs exceeds budget {}µs",
stats.p50_us,
budget,
);
}
#[test]
fn perf_corpus_100_under_budget() {
let scorer = BayesianScorer::fast();
let corpus = make_corpus(100);
let stats = measure_stats_us(50, || {
let mut results: Vec<_> = corpus
.iter()
.map(|t| scorer.score("git co", t))
.filter(|r| r.score > 0.0)
.collect();
results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
std::hint::black_box(&results);
});
eprintln!(
"{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
stats.samples,
stats.p50_us,
stats.p95_us,
stats.p99_us,
stats.mean_us,
stats.variance_us
);
let budget = coverage_budget_us(CORPUS_100_BUDGET_US);
assert!(
stats.p95_us <= budget,
"100-item corpus p95 = {}µs exceeds budget {}µs",
stats.p95_us,
budget,
);
}
#[test]
fn perf_corpus_1000_under_budget() {
let scorer = BayesianScorer::fast();
let corpus = make_corpus(1_000);
let stats = measure_stats_us(20, || {
let mut results: Vec<_> = corpus
.iter()
.map(|t| scorer.score("git co", t))
.filter(|r| r.score > 0.0)
.collect();
results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
std::hint::black_box(&results);
});
eprintln!(
"{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
stats.samples,
stats.p50_us,
stats.p95_us,
stats.p99_us,
stats.mean_us,
stats.variance_us
);
let budget = coverage_budget_us(CORPUS_1000_BUDGET_US);
assert!(
stats.p95_us <= budget,
"1000-item corpus p95 = {}µs exceeds budget {}µs",
stats.p95_us,
budget,
);
}
#[test]
fn perf_corpus_5000_under_budget() {
let scorer = BayesianScorer::fast();
let corpus = make_corpus(5_000);
let stats = measure_stats_us(10, || {
let mut results: Vec<_> = corpus
.iter()
.map(|t| scorer.score("git co", t))
.filter(|r| r.score > 0.0)
.collect();
results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
std::hint::black_box(&results);
});
eprintln!(
"{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_corpus_5000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
stats.samples,
stats.p50_us,
stats.p95_us,
stats.p99_us,
stats.mean_us,
stats.variance_us
);
let budget = coverage_budget_us(CORPUS_5000_BUDGET_US);
assert!(
stats.p95_us <= budget,
"5000-item corpus p95 = {}µs exceeds budget {}µs",
stats.p95_us,
budget,
);
}
#[test]
fn perf_incremental_7key_100_under_budget() {
let corpus = make_corpus(100);
let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
let stats = measure_stats_us(30, || {
let mut inc = IncrementalScorer::new();
for query in &queries {
let results = inc.score_corpus(query, &corpus_refs, None);
std::hint::black_box(&results);
}
});
eprintln!(
"{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_100\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
stats.samples,
stats.p50_us,
stats.p95_us,
stats.p99_us,
stats.mean_us,
stats.variance_us
);
let budget = coverage_budget_us(INCREMENTAL_7KEY_100_BUDGET_US);
assert!(
stats.p95_us <= budget,
"Incremental 7-key 100-item p95 = {}µs exceeds budget {}µs",
stats.p95_us,
budget,
);
}
#[test]
fn perf_incremental_7key_1000_under_budget() {
let corpus = make_corpus(1_000);
let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
let stats = measure_stats_us(10, || {
let mut inc = IncrementalScorer::new();
for query in &queries {
let results = inc.score_corpus(query, &corpus_refs, None);
std::hint::black_box(&results);
}
});
eprintln!(
"{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_7key_1000\",\"samples\":{},\"p50_us\":{},\"p95_us\":{},\"p99_us\":{},\"mean_us\":{:.2},\"variance_us\":{:.2}}}",
stats.samples,
stats.p50_us,
stats.p95_us,
stats.p99_us,
stats.mean_us,
stats.variance_us
);
let budget = coverage_budget_us(INCREMENTAL_7KEY_1000_BUDGET_US);
assert!(
stats.p95_us <= budget,
"Incremental 7-key 1000-item p95 = {}µs exceeds budget {}µs",
stats.p95_us,
budget,
);
}
#[test]
fn perf_incremental_faster_than_naive() {
let corpus = make_corpus(100);
let corpus_refs: Vec<&str> = corpus.iter().map(|s| s.as_str()).collect();
let scorer = BayesianScorer::fast();
let queries = ["g", "gi", "git", "git ", "git c", "git co", "git com"];
let naive_stats = measure_stats_us(30, || {
for query in &queries {
let mut results: Vec<_> = corpus
.iter()
.map(|t| scorer.score(query, t))
.filter(|r| r.score > 0.0)
.collect();
results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
std::hint::black_box(&results);
}
});
let inc_stats = measure_stats_us(30, || {
let mut inc = IncrementalScorer::new();
for query in &queries {
let results = inc.score_corpus(query, &corpus_refs, None);
std::hint::black_box(&results);
}
});
eprintln!(
"{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_incremental_vs_naive\",\"samples\":{},\"naive_p50_us\":{},\"naive_p95_us\":{},\"inc_p50_us\":{},\"inc_p95_us\":{}}}",
naive_stats.samples,
naive_stats.p50_us,
naive_stats.p95_us,
inc_stats.p50_us,
inc_stats.p95_us
);
assert!(
inc_stats.p50_us <= naive_stats.p50_us * 2 + 50, "Incremental p50 = {}µs is >2x slower than naive p50 = {}µs",
inc_stats.p50_us,
naive_stats.p50_us,
);
}
fn scaling_ratio_us(p50_small_us: u64, p50_large_us: u64) -> f64 {
if p50_small_us > 0 {
p50_large_us as f64 / p50_small_us as f64
} else {
0.0
}
}
#[test]
fn scaling_ratio_handles_zero_divisor() {
assert_eq!(scaling_ratio_us(0, 123), 0.0);
assert_eq!(scaling_ratio_us(10, 25), 2.5);
}
#[test]
fn perf_scaling_sublinear() {
let scorer = BayesianScorer::fast();
let corpus_100 = make_corpus(100);
let corpus_1000 = make_corpus(1_000);
let stats_100 = measure_stats_us(30, || {
let mut results: Vec<_> = corpus_100
.iter()
.map(|t| scorer.score("git", t))
.filter(|r| r.score > 0.0)
.collect();
results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
std::hint::black_box(&results);
});
let stats_1000 = measure_stats_us(20, || {
let mut results: Vec<_> = corpus_1000
.iter()
.map(|t| scorer.score("git", t))
.filter(|r| r.score > 0.0)
.collect();
results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
std::hint::black_box(&results);
});
eprintln!(
"{{\"ts\":\"2026-02-04T00:00:00Z\",\"event\":\"palette_scaling\",\"samples_100\":{},\"p50_100_us\":{},\"samples_1000\":{},\"p50_1000_us\":{}}}",
stats_100.samples, stats_100.p50_us, stats_1000.samples, stats_1000.p50_us
);
let ratio = scaling_ratio_us(stats_100.p50_us, stats_1000.p50_us);
assert!(
ratio < 25.0,
"1000/100 scaling ratio = {:.1}x exceeds 25x threshold (100: {}µs, 1000: {}µs)",
ratio,
stats_100.p50_us,
stats_1000.p50_us,
);
}
}