use cortex_core::{CortexError, CortexResult, MemoryId};
use cortex_store::repo::MemoryRepo;
pub const FUZZY_BOOST_WEIGHT: f32 = 0.25;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Fts5Hit {
pub memory_id: MemoryId,
pub raw_rank: f32,
pub normalized_score: f32,
}
impl Fts5Hit {
#[must_use]
pub fn from_raw(memory_id: MemoryId, raw_rank: f32) -> Self {
Self {
memory_id,
raw_rank,
normalized_score: normalize_bm25(raw_rank),
}
}
}
pub fn query_fts5(repo: &MemoryRepo<'_>, query: &str, limit: usize) -> CortexResult<Vec<Fts5Hit>> {
if query.trim().is_empty() {
return Err(CortexError::Validation(
"fts5 query must not be empty".into(),
));
}
let raw_hits = repo
.fts5_search(query, limit)
.map_err(|err| CortexError::Validation(format!("fts5 mirror lookup failed: {err}")))?;
Ok(raw_hits
.into_iter()
.map(|(memory_id, rank)| Fts5Hit::from_raw(memory_id, rank))
.collect())
}
#[must_use]
pub fn normalize_bm25(rank: f32) -> f32 {
if !rank.is_finite() {
return 0.0;
}
rank.exp().clamp(0.0, 1.0)
}
#[must_use]
pub fn compose_fuzzy_boost(lexical_match: f32, fuzzy_score: f32) -> f32 {
let lex = if lexical_match.is_finite() {
lexical_match.clamp(0.0, 1.0)
} else {
0.0
};
let fuzz = if fuzzy_score.is_finite() {
fuzzy_score.clamp(0.0, 1.0)
} else {
0.0
};
lex * (1.0 - FUZZY_BOOST_WEIGHT) + fuzz * FUZZY_BOOST_WEIGHT
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalize_bm25_maps_best_rank_to_one() {
assert_eq!(normalize_bm25(0.0), 1.0);
}
#[test]
fn normalize_bm25_decays_for_weaker_ranks() {
let strong = normalize_bm25(-0.5);
let weak = normalize_bm25(-3.0);
assert!(strong > weak);
assert!((0.0..=1.0).contains(&strong));
assert!((0.0..=1.0).contains(&weak));
}
#[test]
fn normalize_bm25_handles_non_finite_rank() {
assert_eq!(normalize_bm25(f32::NAN), 0.0);
assert_eq!(normalize_bm25(f32::INFINITY), 0.0);
assert_eq!(normalize_bm25(f32::NEG_INFINITY), 0.0);
}
#[test]
fn compose_fuzzy_keeps_exact_lexical_dominant() {
let exact = compose_fuzzy_boost(1.0, 0.0);
let fuzzy_only = compose_fuzzy_boost(0.0, 1.0);
assert!(
exact > fuzzy_only,
"exact lexical hit must dominate fuzzy-only hit; got exact={exact}, fuzzy_only={fuzzy_only}"
);
assert!((exact - 0.75).abs() < f32::EPSILON);
assert!((fuzzy_only - FUZZY_BOOST_WEIGHT).abs() < f32::EPSILON);
}
#[test]
fn compose_fuzzy_pure_lexical_stays_in_band() {
let pure = compose_fuzzy_boost(0.8, 0.0);
assert!((0.0..=1.0).contains(&pure));
assert!((pure - 0.8 * (1.0 - FUZZY_BOOST_WEIGHT)).abs() < f32::EPSILON);
}
#[test]
fn compose_fuzzy_clamps_out_of_band_inputs() {
let huge = compose_fuzzy_boost(5.0, -2.0);
assert!((0.0..=1.0).contains(&huge));
assert!((huge - 0.75).abs() < f32::EPSILON);
}
#[test]
fn compose_fuzzy_handles_nan_inputs() {
let composed = compose_fuzzy_boost(f32::NAN, f32::NAN);
assert_eq!(composed, 0.0);
}
#[test]
fn compose_fuzzy_at_default_baseline_passes_lexical_through_at_weighted_fraction() {
assert_eq!(FUZZY_BOOST_WEIGHT, 0.25);
}
#[test]
fn fts5_hit_from_raw_normalises_bm25_via_exp_transform() {
let memory_id: MemoryId = "mem_01ARZ3NDEKTSV4RRFFQ69G5FAV"
.parse()
.expect("parse memory id");
let hit = Fts5Hit::from_raw(memory_id, -1.0);
let expected = (-1.0_f32).exp();
assert!((hit.normalized_score - expected).abs() < f32::EPSILON);
assert_eq!(hit.memory_id, memory_id);
assert_eq!(hit.raw_rank, -1.0);
}
}