use std::collections::HashSet;
use crate::fts5::compose_fuzzy_boost;
pub const LEXICAL_MATCH_WEIGHT: f32 = 0.30;
pub const SEMANTIC_SIMILARITY_WEIGHT: f32 = 0.25;
pub const BRIGHTNESS_WEIGHT: f32 = 0.15;
pub const DOMAIN_OVERLAP_WEIGHT: f32 = 0.10;
pub const VALIDATION_WEIGHT: f32 = 0.10;
pub const AUTHORITY_WEIGHT: f32 = 0.10;
pub const CONTRADICTION_RISK_WEIGHT: f32 = -0.25;
pub const STALENESS_PENALTY_WEIGHT: f32 = -0.10;
pub const LEX_WEIGHT_WITH_SEM: f32 = 0.65;
pub const FTS5_WEIGHT_WITH_SEM: f32 = 0.25;
pub const SEM_WEIGHT_WITH_SEM: f32 = 0.10;
const _: () = {
let sum_x100 = (LEX_WEIGHT_WITH_SEM * 100.0) as i32
+ (FTS5_WEIGHT_WITH_SEM * 100.0) as i32
+ (SEM_WEIGHT_WITH_SEM * 100.0) as i32;
assert!(sum_x100 == 100, "Phase 4.C weights must sum to 1.0");
};
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ScoreInputs {
pub lexical_match: f32,
pub brightness: f32,
pub domain_overlap: f32,
pub validation: f32,
pub authority_weight: f32,
pub contradiction_risk: f32,
pub staleness_penalty: f32,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ScoreComponent {
pub raw: f32,
pub weight: f32,
pub contribution: f32,
}
impl ScoreComponent {
fn new(raw: f32, weight: f32) -> Self {
let raw = raw.clamp(0.0, 1.0);
Self {
raw,
weight,
contribution: raw * weight,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct HybridScoreExplanation {
pub lexical_match: ScoreComponent,
pub semantic_similarity: ScoreComponent,
pub brightness: ScoreComponent,
pub domain_overlap: ScoreComponent,
pub validation: ScoreComponent,
pub authority_weight: ScoreComponent,
pub contradiction_risk: ScoreComponent,
pub staleness_penalty: ScoreComponent,
pub final_score: f32,
}
#[must_use]
pub fn compose_lexical_semantic(lexical: f32, fts5: f32, semantic: Option<f32>) -> f32 {
let Some(sem) = semantic else {
return compose_fuzzy_boost(lexical, fts5);
};
let lex = clamp_band(lexical);
let fts = clamp_band(fts5);
let sem = clamp_band(sem);
lex * LEX_WEIGHT_WITH_SEM + fts * FTS5_WEIGHT_WITH_SEM + sem * SEM_WEIGHT_WITH_SEM
}
fn clamp_band(value: f32) -> f32 {
if value.is_finite() {
value.clamp(0.0, 1.0)
} else {
0.0
}
}
#[must_use]
pub fn score(inputs: ScoreInputs) -> HybridScoreExplanation {
let lexical_match = ScoreComponent::new(inputs.lexical_match, LEXICAL_MATCH_WEIGHT);
let semantic_similarity = ScoreComponent::new(0.0, SEMANTIC_SIMILARITY_WEIGHT);
let brightness = ScoreComponent::new(inputs.brightness, BRIGHTNESS_WEIGHT);
let domain_overlap = ScoreComponent::new(inputs.domain_overlap, DOMAIN_OVERLAP_WEIGHT);
let validation = ScoreComponent::new(inputs.validation, VALIDATION_WEIGHT);
let authority_weight = ScoreComponent::new(inputs.authority_weight, AUTHORITY_WEIGHT);
let contradiction_risk =
ScoreComponent::new(inputs.contradiction_risk, CONTRADICTION_RISK_WEIGHT);
let staleness_penalty = ScoreComponent::new(inputs.staleness_penalty, STALENESS_PENALTY_WEIGHT);
let final_score = lexical_match.contribution
+ semantic_similarity.contribution
+ brightness.contribution
+ domain_overlap.contribution
+ validation.contribution
+ authority_weight.contribution
+ contradiction_risk.contribution
+ staleness_penalty.contribution;
HybridScoreExplanation {
lexical_match,
semantic_similarity,
brightness,
domain_overlap,
validation,
authority_weight,
contradiction_risk,
staleness_penalty,
final_score,
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct DomainOverlapExplanation {
pub domain_overlap: f32,
pub query_domains: Vec<String>,
pub memory_domains: Vec<String>,
pub matched_domains: Vec<String>,
}
#[must_use]
pub fn domain_overlap(
query_domains: &[impl AsRef<str>],
memory_domains: &[impl AsRef<str>],
) -> DomainOverlapExplanation {
let query_domains = normalize_domains(query_domains);
let memory_domains = normalize_domains(memory_domains);
if query_domains.is_empty() {
return DomainOverlapExplanation {
domain_overlap: 0.0,
query_domains,
memory_domains,
matched_domains: Vec::new(),
};
}
let memory_set: HashSet<_> = memory_domains.iter().cloned().collect();
let matched_domains: Vec<_> = query_domains
.iter()
.filter(|domain| memory_set.contains(*domain))
.cloned()
.collect();
let domain_overlap = matched_domains.len() as f32 / query_domains.len() as f32;
DomainOverlapExplanation {
domain_overlap,
query_domains,
memory_domains,
matched_domains,
}
}
fn normalize_domains(domains: &[impl AsRef<str>]) -> Vec<String> {
let mut seen = HashSet::new();
let mut normalized = Vec::new();
for domain in domains {
let domain = domain.as_ref().trim().to_ascii_lowercase();
if !domain.is_empty() && seen.insert(domain.clone()) {
normalized.push(domain);
}
}
normalized
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn score_matches_build_spec_weights() {
let explanation = score(ScoreInputs {
lexical_match: 0.8,
brightness: 0.6,
domain_overlap: 0.5,
validation: 0.7,
authority_weight: 0.4,
contradiction_risk: 0.2,
staleness_penalty: 0.3,
});
let expected = 0.30 * 0.8 + 0.25 * 0.0 + 0.15 * 0.6 + 0.10 * 0.5 + 0.10 * 0.7 + 0.10 * 0.4
- 0.25 * 0.2
- 0.10 * 0.3;
assert!((explanation.final_score - expected).abs() < f32::EPSILON);
assert_eq!(explanation.semantic_similarity.raw, 0.0);
assert_eq!(explanation.semantic_similarity.contribution, 0.0);
}
#[test]
fn score_ordering_rewards_salient_validated_memories() {
let strong = score(ScoreInputs {
lexical_match: 0.75,
brightness: 0.9,
domain_overlap: 1.0,
validation: 1.0,
authority_weight: 0.8,
contradiction_risk: 0.0,
staleness_penalty: 0.0,
});
let weak = score(ScoreInputs {
lexical_match: 1.0,
brightness: 0.1,
domain_overlap: 0.0,
validation: 0.0,
authority_weight: 0.2,
contradiction_risk: 0.8,
staleness_penalty: 0.6,
});
assert!(strong.final_score > weak.final_score);
}
#[test]
fn explanation_contains_all_score_fields() {
let explanation = score(ScoreInputs {
lexical_match: 2.0,
brightness: 1.0,
domain_overlap: 1.0,
validation: 1.0,
authority_weight: 1.0,
contradiction_risk: 1.0,
staleness_penalty: 1.0,
});
assert_eq!(explanation.lexical_match.raw, 1.0);
assert_eq!(explanation.lexical_match.weight, LEXICAL_MATCH_WEIGHT);
assert_eq!(
explanation.semantic_similarity.weight,
SEMANTIC_SIMILARITY_WEIGHT
);
assert_eq!(explanation.brightness.weight, BRIGHTNESS_WEIGHT);
assert_eq!(explanation.domain_overlap.weight, DOMAIN_OVERLAP_WEIGHT);
assert_eq!(explanation.validation.weight, VALIDATION_WEIGHT);
assert_eq!(explanation.authority_weight.weight, AUTHORITY_WEIGHT);
assert_eq!(
explanation.contradiction_risk.weight,
CONTRADICTION_RISK_WEIGHT
);
assert_eq!(
explanation.staleness_penalty.weight,
STALENESS_PENALTY_WEIGHT
);
}
#[test]
fn domain_overlap_reports_matched_domains() {
let explanation =
domain_overlap(&["Retrieval", "Store", "retrieval"], &["store", "privacy"]);
assert_eq!(explanation.domain_overlap, 0.5);
assert_eq!(explanation.query_domains, ["retrieval", "store"]);
assert_eq!(explanation.memory_domains, ["store", "privacy"]);
assert_eq!(explanation.matched_domains, ["store"]);
}
#[test]
fn phase_4c_weights_sum_to_one() {
let sum = LEX_WEIGHT_WITH_SEM + FTS5_WEIGHT_WITH_SEM + SEM_WEIGHT_WITH_SEM;
assert!(
(sum - 1.0).abs() < f32::EPSILON,
"Phase 4.C composition weights must sum to 1.0, got {sum}"
);
}
#[test]
fn compose_semantic_off_matches_phase_4b_baseline_exactly() {
let cells = [
(0.0_f32, 0.0_f32),
(1.0, 0.0),
(0.0, 1.0),
(0.5, 0.5),
(0.75, 0.25),
(0.25, 0.75),
(0.8, 0.2),
(0.2, 0.8),
(0.9, 0.1),
(0.1, 0.9),
];
for (lex, fts5) in cells {
let phase_4b = compose_fuzzy_boost(lex, fts5);
let phase_4c_off = compose_lexical_semantic(lex, fts5, None);
assert!(
(phase_4b - phase_4c_off).abs() < f32::EPSILON,
"Phase 4.B baseline drift at (lex={lex}, fts5={fts5}): \
phase_4b={phase_4b}, phase_4c_off={phase_4c_off}"
);
}
}
#[test]
fn compose_semantic_on_includes_semantic_axis() {
let lex = 0.5;
let fts5 = 0.0;
let low_sem = compose_lexical_semantic(lex, fts5, Some(0.0));
let high_sem = compose_lexical_semantic(lex, fts5, Some(1.0));
assert!(
high_sem > low_sem,
"semantic axis must contribute positively; low_sem={low_sem}, high_sem={high_sem}"
);
assert!(
(high_sem - low_sem - SEM_WEIGHT_WITH_SEM).abs() < f32::EPSILON,
"semantic contribution must equal SEM_WEIGHT_WITH_SEM"
);
}
#[test]
fn compose_semantic_keeps_exact_lexical_dominant() {
let lexical_only = compose_lexical_semantic(1.0, 0.0, Some(0.0));
let semantic_only = compose_lexical_semantic(0.0, 0.0, Some(1.0));
assert!(
lexical_only > semantic_only,
"exact lexical hit must dominate semantic-only hit; \
lexical_only={lexical_only}, semantic_only={semantic_only}"
);
}
#[test]
fn compose_semantic_default_weight_does_not_displace_exact_lexical_hits() {
let lex_winner = compose_lexical_semantic(1.0, 0.0, Some(0.0));
let sem_winner = compose_lexical_semantic(0.0, 0.0, Some(1.0));
assert!(
lex_winner > sem_winner,
"default semantic weight must not displace exact lexical hits"
);
}
#[test]
fn compose_semantic_stays_in_band_for_all_inputs() {
let values = [0.0_f32, 0.25, 0.5, 0.75, 1.0];
for &lex in &values {
for &fts5 in &values {
for &sem in &values {
let composed = compose_lexical_semantic(lex, fts5, Some(sem));
assert!(
(0.0..=1.0).contains(&composed),
"composed value out of band at (lex={lex}, fts5={fts5}, sem={sem}): {composed}"
);
}
}
}
}
#[test]
fn compose_semantic_clamps_out_of_band_inputs() {
let composed = compose_lexical_semantic(f32::NAN, f32::INFINITY, Some(-5.0));
assert!((0.0..=1.0).contains(&composed));
assert_eq!(composed, 0.0);
}
#[test]
fn compose_semantic_zero_for_all_axes_is_zero() {
assert_eq!(compose_lexical_semantic(0.0, 0.0, None), 0.0);
assert_eq!(compose_lexical_semantic(0.0, 0.0, Some(0.0)), 0.0);
}
}