#![cfg_attr(not(feature = "std"), no_std)]
pub const Q4: i32 = 16;
#[repr(u8)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum Source {
Wubi = 0,
Pinyin = 1,
Japanese = 2,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum MatchType {
Exact,
Prefix(u16),
Fuzzy(u16),
Composed { bigram_links: u8 },
}
#[derive(Copy, Clone, Debug)]
pub struct Candidate<'a> {
pub word: &'a str,
pub log_prior: i32,
pub log_likelihood: i32,
pub match_type: MatchType,
pub source: Source,
}
impl<'a> PartialEq for Candidate<'a> {
fn eq(&self, other: &Self) -> bool {
self.word == other.word
&& self.source == other.source
&& self.match_type == other.match_type
}
}
impl<'a> Eq for Candidate<'a> {}
#[inline]
pub fn score(c: &Candidate<'_>) -> i32 {
c.log_prior + c.log_likelihood
}
#[cfg(feature = "std")]
#[inline]
pub fn log_prior_from_freq(freq: u64) -> i32 {
let ln = ((freq as f64) + 1.0).ln();
(ln * (Q4 as f64)).round() as i32
}
#[cfg(feature = "std")]
pub fn derive_log_likelihood(base_log_q4: i32, mt: MatchType) -> i32 {
const K: f64 = 3.0;
const LN_COMPOSED_PER_LINK: f64 = -0.356_674_943_938_732_4; let q4 = Q4 as f64;
match mt {
MatchType::Exact => base_log_q4,
MatchType::Prefix(prox_milli) => {
let prox = (prox_milli.max(1) as f64) / 1000.0;
let decay_q4 = (K * prox.ln() * q4).round() as i32;
base_log_q4 + decay_q4
}
MatchType::Fuzzy(cost_milli) => {
let cost = cost_milli.min(999) as f64 / 1000.0;
let decay_q4 = ((1.0 - cost).ln() * q4).round() as i32;
base_log_q4 + decay_q4
}
MatchType::Composed { bigram_links } => {
let extra_links = bigram_links.saturating_sub(1) as f64;
let decay_q4 = (extra_links * LN_COMPOSED_PER_LINK * q4).round() as i32;
base_log_q4 + decay_q4
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn q4_anchored_to_16() {
assert_eq!(Q4, 16);
}
#[test]
fn source_repr_u8_stable() {
assert_eq!(Source::Wubi as u8, 0);
assert_eq!(Source::Pinyin as u8, 1);
assert_eq!(Source::Japanese as u8, 2);
}
#[test]
fn bayes_additive_in_log_space() {
let cases: &[(i32, i32)] = &[
(0, 0),
(10 * Q4, 5 * Q4),
(100 * Q4, -30 * Q4),
(-50 * Q4, 100 * Q4),
(i32::MAX / 4, i32::MIN / 4),
];
for &(lp, ll) in cases {
let c = Candidate {
word: "x",
log_prior: lp,
log_likelihood: ll,
match_type: MatchType::Exact,
source: Source::Pinyin,
};
assert_eq!(score(&c), lp.saturating_add(ll));
}
}
#[test]
fn match_type_round_trip_variants() {
let _exact = MatchType::Exact;
let prefix = MatchType::Prefix(875);
let fuzzy = MatchType::Fuzzy(300);
let composed = MatchType::Composed { bigram_links: 2 };
assert_eq!(prefix, MatchType::Prefix(875));
assert_eq!(fuzzy, MatchType::Fuzzy(300));
assert_eq!(composed, MatchType::Composed { bigram_links: 2 });
assert_ne!(prefix, MatchType::Prefix(874));
assert_ne!(composed, MatchType::Composed { bigram_links: 1 });
}
#[test]
fn candidate_equality_ignores_score() {
let a = Candidate {
word: "继续",
log_prior: 100 * Q4,
log_likelihood: 50 * Q4,
match_type: MatchType::Exact,
source: Source::Pinyin,
};
let b = Candidate {
word: "继续",
log_prior: 999 * Q4,
log_likelihood: -10 * Q4,
match_type: MatchType::Exact,
source: Source::Pinyin,
};
let c_word_diff = Candidate { word: "继续。", ..a };
let c_source_diff = Candidate { source: Source::Wubi, ..a };
let c_mt_diff = Candidate { match_type: MatchType::Prefix(500), ..a };
assert_eq!(a, b, "score should not affect identity");
assert_ne!(a, c_word_diff);
assert_ne!(a, c_source_diff);
assert_ne!(a, c_mt_diff);
}
#[cfg(feature = "std")]
#[test]
fn log_prior_from_freq_baseline_values() {
assert_eq!(log_prior_from_freq(0), 0);
let f1 = log_prior_from_freq(1);
assert!((10..=12).contains(&f1), "ln(2)·16 ≈ 11; got {f1}");
let f1000 = log_prior_from_freq(1000);
assert!((110..=112).contains(&f1000), "ln(1001)·16 ≈ 110; got {f1000}");
let f50000 = log_prior_from_freq(50_000);
assert!(f50000 > log_prior_from_freq(1000), "monotone in freq");
}
#[cfg(feature = "std")]
#[test]
fn derive_log_likelihood_exact_is_base() {
for base in [0, Q4, -10 * Q4, 100 * Q4] {
assert_eq!(derive_log_likelihood(base, MatchType::Exact), base);
}
}
#[cfg(feature = "std")]
#[test]
fn derive_log_likelihood_prefix_monotonic() {
let base = 100 * Q4;
let high = derive_log_likelihood(base, MatchType::Prefix(875));
let mid = derive_log_likelihood(base, MatchType::Prefix(500));
let low = derive_log_likelihood(base, MatchType::Prefix(100));
let full = derive_log_likelihood(base, MatchType::Prefix(1000));
assert_eq!(full, base, "prox=1000 collapses to base (ln 1 = 0)");
assert!(high < base && mid < high && low < mid,
"prefix decay must be monotone-decreasing in proximity; got full={full} high={high} mid={mid} low={low}");
}
#[cfg(feature = "std")]
#[test]
fn derive_log_likelihood_fuzzy_monotonic_in_cost() {
let base = 100 * Q4;
let cheap = derive_log_likelihood(base, MatchType::Fuzzy(100));
let expensive = derive_log_likelihood(base, MatchType::Fuzzy(700));
let zero = derive_log_likelihood(base, MatchType::Fuzzy(0));
assert_eq!(zero, base, "cost=0 collapses to base (ln 1 = 0)");
assert!(expensive < cheap && cheap < base,
"fuzzy decay must be monotone-decreasing in cost; got zero={zero} cheap={cheap} expensive={expensive}");
}
#[cfg(feature = "std")]
#[test]
fn derive_log_likelihood_composed_per_link_decay() {
let base = 200 * Q4;
let one = derive_log_likelihood(base, MatchType::Composed { bigram_links: 1 });
let two = derive_log_likelihood(base, MatchType::Composed { bigram_links: 2 });
let three = derive_log_likelihood(base, MatchType::Composed { bigram_links: 3 });
let zero = derive_log_likelihood(base, MatchType::Composed { bigram_links: 0 });
assert_eq!(zero, base, "0 links — no chain — no decay");
assert_eq!(one, base, "1 link is the first segment; (links − 1) = 0, no decay");
assert!(two < one && three < two,
"composed decay must drop per extra link; got one={one} two={two} three={three}");
}
#[cfg(feature = "std")]
#[test]
fn derive_log_likelihood_bayes_additivity_round_trip() {
let base = 160 * Q4;
let make = |freq: u64, mt: MatchType| Candidate {
word: "x",
log_prior: log_prior_from_freq(freq),
log_likelihood: derive_log_likelihood(base, mt),
match_type: mt,
source: Source::Pinyin,
};
let exact_common = make(50_000, MatchType::Exact);
let exact_rare = make(10, MatchType::Exact);
let prefix_common = make(50_000, MatchType::Prefix(875));
let fuzzy_common = make(50_000, MatchType::Fuzzy(300));
assert!(score(&exact_common) > score(&prefix_common));
assert!(score(&prefix_common) > score(&exact_rare),
"common prefix prior wins rare exact; got pref={} rare={}",
score(&prefix_common), score(&exact_rare));
assert!(score(&exact_common) > score(&fuzzy_common));
}
#[test]
fn score_orders_candidates_by_log_sum_desc() {
let mk = |lp: i32, ll: i32| Candidate {
word: "w",
log_prior: lp,
log_likelihood: ll,
match_type: MatchType::Exact,
source: Source::Pinyin,
};
let mut cands = [mk(10, 20), mk(50, -10), mk(0, 0), mk(100, 100)];
cands.sort_by(|a, b| score(b).cmp(&score(a)));
assert_eq!(score(&cands[0]), 200);
assert_eq!(score(&cands[1]), 40);
assert_eq!(score(&cands[2]), 30);
assert_eq!(score(&cands[3]), 0);
}
}