crtx-retrieval 0.1.1

//! Hybrid retrieval scoring and explanations.
//!
//! Phase 4.C extends the upstream lexical-input composition with a third
//! axis (semantic similarity) so the retrieval surface can fold in
//! embeddings when the operator opts in via `--semantic`. The
//! composition lives in [`compose_lexical_semantic`]; the downstream
//! hybrid scorer in [`score`] is unchanged. See the constants
//! [`LEX_WEIGHT_WITH_SEM`], [`FTS5_WEIGHT_WITH_SEM`], and
//! [`SEM_WEIGHT_WITH_SEM`] for the active weights and the rationale
//! comments for why the semantic weight is small.
//!
//! **Phase 4.B eval guardrail**: when the caller passes `None` for the
//! semantic axis, [`compose_lexical_semantic`] returns exactly the same
//! value as the Phase 4.B [`compose_fuzzy_boost`] (`lex * 0.75 +
//! fts5 * 0.25`). The default-off `--semantic` flag therefore preserves
//! byte-for-byte the Phase 4.B retrieval ordering, and the regression
//! test `compose_semantic_off_matches_phase_4b_baseline_exactly` pins
//! the property.

use std::collections::HashSet;

use crate::fts5::compose_fuzzy_boost;

/// Weight for the lexical match component.
pub const LEXICAL_MATCH_WEIGHT: f32 = 0.30;
/// Weight for the semantic similarity component.
pub const SEMANTIC_SIMILARITY_WEIGHT: f32 = 0.25;
/// Weight for the brightness component.
pub const BRIGHTNESS_WEIGHT: f32 = 0.15;
/// Weight for the domain overlap component.
pub const DOMAIN_OVERLAP_WEIGHT: f32 = 0.10;
/// Weight for the validation component.
pub const VALIDATION_WEIGHT: f32 = 0.10;
/// Weight for the authority component.
pub const AUTHORITY_WEIGHT: f32 = 0.10;
/// Weight for the contradiction-risk penalty.
pub const CONTRADICTION_RISK_WEIGHT: f32 = -0.25;
/// Weight for the staleness penalty.
pub const STALENESS_PENALTY_WEIGHT: f32 = -0.10;

// =============================================================================
// Phase 4.C compositional weights for the upstream lexical-input axis.
//
// These constants are active ONLY when the caller passes a non-`None`
// semantic similarity into [`compose_lexical_semantic`]. When semantic
// is `None`, [`compose_lexical_semantic`] reduces to the Phase 4.B
// [`compose_fuzzy_boost`] shape (lex * 0.75 + fts5 * 0.25) so the
// default ordering is byte-for-byte unchanged.
//
// The weights below were picked under three constraints:
//
//   1. They MUST sum to 1.0 so the composed value stays inside `[0, 1]`
//      and the downstream hybrid scorer (`score`) receives the same
//      input shape it always has.
//   2. The FTS5 weight MUST be unchanged from Phase 4.B (0.25). The
//      fuzzy-recovery invariant
//      `compose_fuzzy_keeps_exact_lexical_dominant` depends on the
//      FTS5 axis carrying the same fraction of the composed score in
//      both modes; otherwise turning on `--semantic` would weaken
//      a typo-of-one-character recovery as a side effect.
//   3. The semantic weight MUST be SMALL relative to lexical so that
//      a memory with a perfect lexical match (lex=1.0) but no
//      embedding (sem=0.0) still strictly outscores a memory with
//      no lexical match but a perfect embedding match (lex=0.0,
//      sem=1.0). The 0.10 / 0.65 ratio guarantees this:
//      `0.65 * 1.0 + 0.10 * 0.0 = 0.65 > 0.65 * 0.0 + 0.10 * 1.0 = 0.10`.
//
// The Phase 4.C SPEC mentions a notional fourth axis (`w_sal = 0.10`)
// for salience, but salience is composed DOWNSTREAM by [`score`] via
// `brightness`, `validation`, `authority_weight`, and the
// contradiction / staleness penalties — not at the lexical-input phase.
// Pulling salience up here would double-count it. The four-way
// composition shape in the SPEC is the conceptual decomposition
// operators reason about; the implementation keeps lexical-input
// composition (here) and salience composition (in `score`) separate.

/// Lexical-match weight when the semantic axis is active.
///
/// 0.65 = 0.75 (Phase 4.B baseline) - 0.10 (semantic eats into lexical).
/// Lexical stays the strongest axis so an exact lexical hit dominates a
/// semantic-only hit. See the rationale comment above for the
/// dominance arithmetic.
pub const LEX_WEIGHT_WITH_SEM: f32 = 0.65;

/// FTS5 weight when the semantic axis is active.
///
/// 0.25 is unchanged from Phase 4.B
/// ([`crate::fts5::FUZZY_BOOST_WEIGHT`]) so flipping `--semantic` on
/// does not weaken fuzzy recovery as a side effect.
pub const FTS5_WEIGHT_WITH_SEM: f32 = 0.25;

/// Semantic-similarity weight when the semantic axis is active.
///
/// 0.10 is the SMALL value the SPEC pins. Larger weights would let a
/// stub-embedder hash-collision push a semantically unrelated memory
/// above an exact lexical hit; smaller weights would make the axis
/// invisible. The compromise is 0.10 — enough to break ties between
/// two equally-lexical-matched memories using semantic signal, not
/// enough to displace a lexical hit.
pub const SEM_WEIGHT_WITH_SEM: f32 = 0.10;

// Compile-time check that the weights sum to 1.0. A non-1.0 sum would
// silently push the composed lexical input outside `[0, 1]` and break
// the downstream scorer's assumptions; pin the property explicitly so
// a future weight tweak trips the assertion in the
// `phase_4c_weights_sum_to_one` test.
const _: () = {
    let sum_x100 = (LEX_WEIGHT_WITH_SEM * 100.0) as i32
        + (FTS5_WEIGHT_WITH_SEM * 100.0) as i32
        + (SEM_WEIGHT_WITH_SEM * 100.0) as i32;
    assert!(sum_x100 == 100, "Phase 4.C weights must sum to 1.0");
};

/// Score inputs available for v0 retrieval.
///
/// Values are clamped to `[0, 1]` before weighting. Semantic similarity is
/// intentionally absent because v0 fixes it at `0` until embeddings exist.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ScoreInputs {
    /// Lexical match from `lexical.rs`.
    pub lexical_match: f32,
    /// Deterministic salience brightness.
    pub brightness: f32,
    /// Domain overlap between query/task domains and memory domains.
    pub domain_overlap: f32,
    /// Outcome-bound validation signal.
    pub validation: f32,
    /// Authority weight for the memory source.
    pub authority_weight: f32,
    /// Risk from unresolved contradictions.
    pub contradiction_risk: f32,
    /// Penalty for stale or unvalidated memory.
    pub staleness_penalty: f32,
}

/// A weighted score component suitable for `memory search --explain`.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ScoreComponent {
    /// Raw normalized component value after clamping.
    pub raw: f32,
    /// BUILD_SPEC §14.1 component weight.
    pub weight: f32,
    /// Weighted contribution to the final score.
    pub contribution: f32,
}

impl ScoreComponent {
    fn new(raw: f32, weight: f32) -> Self {
        let raw = raw.clamp(0.0, 1.0);
        Self {
            raw,
            weight,
            contribution: raw * weight,
        }
    }
}

/// Full hybrid score explanation.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct HybridScoreExplanation {
    /// `0.30 * lexical_match`.
    pub lexical_match: ScoreComponent,
    /// `0.25 * semantic_similarity`; fixed to zero for v0.
    pub semantic_similarity: ScoreComponent,
    /// `0.15 * brightness`.
    pub brightness: ScoreComponent,
    /// `0.10 * domain_overlap`.
    pub domain_overlap: ScoreComponent,
    /// `0.10 * validation`.
    pub validation: ScoreComponent,
    /// `0.10 * authority_weight`.
    pub authority_weight: ScoreComponent,
    /// `-0.25 * contradiction_risk`.
    pub contradiction_risk: ScoreComponent,
    /// `-0.10 * staleness_penalty`.
    pub staleness_penalty: ScoreComponent,
    /// Final retrieval score.
    pub final_score: f32,
}

/// Compose lexical, FTS5, and (optional) semantic axes into a single
/// effective lexical-input value the downstream hybrid scorer consumes.
///
/// This is the Phase 4.C generalisation of [`compose_fuzzy_boost`].
/// When `semantic` is `None`, the function returns exactly the same
/// value as `compose_fuzzy_boost(lexical, fts5)` — the Phase 4.B eval
/// guardrail. When `semantic` is `Some(sim)`, the function blends the
/// three axes by [`LEX_WEIGHT_WITH_SEM`], [`FTS5_WEIGHT_WITH_SEM`],
/// and [`SEM_WEIGHT_WITH_SEM`] (which sum to 1.0).
///
/// All inputs are clamped to `[0, 1]` defensively. A NaN or out-of-band
/// input cannot push the composed value outside the band the
/// downstream scorer expects. Negative cosine similarity (semantic
/// orthogonality / opposition) is clamped to `0.0` rather than being
/// treated as a penalty — the Phase 4.C SPEC does not introduce a
/// semantic-displacement penalty axis.
///
/// **Invariants**:
///
/// - `compose_lexical_semantic(lex, fts5, None) == compose_fuzzy_boost(lex, fts5)`
///   (pinned by `compose_semantic_off_matches_phase_4b_baseline_exactly`).
/// - A memory with `lex=1.0, fts5=0.0, sem=0.0` (exact lexical, no
///   fuzzy, no semantic) outscores a memory with `lex=0.0, fts5=0.0,
///   sem=1.0` (semantic-only hit). Pinned by
///   `compose_semantic_keeps_exact_lexical_dominant`.
#[must_use]
pub fn compose_lexical_semantic(lexical: f32, fts5: f32, semantic: Option<f32>) -> f32 {
    let Some(sem) = semantic else {
        return compose_fuzzy_boost(lexical, fts5);
    };
    let lex = clamp_band(lexical);
    let fts = clamp_band(fts5);
    let sem = clamp_band(sem);
    lex * LEX_WEIGHT_WITH_SEM + fts * FTS5_WEIGHT_WITH_SEM + sem * SEM_WEIGHT_WITH_SEM
}

fn clamp_band(value: f32) -> f32 {
    if value.is_finite() {
        value.clamp(0.0, 1.0)
    } else {
        0.0
    }
}

/// Calculates the BUILD_SPEC §14.1 hybrid retrieval score.
#[must_use]
pub fn score(inputs: ScoreInputs) -> HybridScoreExplanation {
    let lexical_match = ScoreComponent::new(inputs.lexical_match, LEXICAL_MATCH_WEIGHT);
    let semantic_similarity = ScoreComponent::new(0.0, SEMANTIC_SIMILARITY_WEIGHT);
    let brightness = ScoreComponent::new(inputs.brightness, BRIGHTNESS_WEIGHT);
    let domain_overlap = ScoreComponent::new(inputs.domain_overlap, DOMAIN_OVERLAP_WEIGHT);
    let validation = ScoreComponent::new(inputs.validation, VALIDATION_WEIGHT);
    let authority_weight = ScoreComponent::new(inputs.authority_weight, AUTHORITY_WEIGHT);
    let contradiction_risk =
        ScoreComponent::new(inputs.contradiction_risk, CONTRADICTION_RISK_WEIGHT);
    let staleness_penalty = ScoreComponent::new(inputs.staleness_penalty, STALENESS_PENALTY_WEIGHT);
    let final_score = lexical_match.contribution
        + semantic_similarity.contribution
        + brightness.contribution
        + domain_overlap.contribution
        + validation.contribution
        + authority_weight.contribution
        + contradiction_risk.contribution
        + staleness_penalty.contribution;

    HybridScoreExplanation {
        lexical_match,
        semantic_similarity,
        brightness,
        domain_overlap,
        validation,
        authority_weight,
        contradiction_risk,
        staleness_penalty,
        final_score,
    }
}

/// Explanation for domain-overlap scoring.
#[derive(Debug, Clone, PartialEq)]
pub struct DomainOverlapExplanation {
    /// Normalized domain overlap in `[0, 1]`.
    pub domain_overlap: f32,
    /// Normalized query/task domains considered.
    pub query_domains: Vec<String>,
    /// Normalized memory domains considered.
    pub memory_domains: Vec<String>,
    /// Query domains also present on the memory.
    pub matched_domains: Vec<String>,
}

/// Calculates normalized domain overlap for score inputs.
#[must_use]
pub fn domain_overlap(
    query_domains: &[impl AsRef<str>],
    memory_domains: &[impl AsRef<str>],
) -> DomainOverlapExplanation {
    let query_domains = normalize_domains(query_domains);
    let memory_domains = normalize_domains(memory_domains);
    if query_domains.is_empty() {
        return DomainOverlapExplanation {
            domain_overlap: 0.0,
            query_domains,
            memory_domains,
            matched_domains: Vec::new(),
        };
    }

    let memory_set: HashSet<_> = memory_domains.iter().cloned().collect();
    let matched_domains: Vec<_> = query_domains
        .iter()
        .filter(|domain| memory_set.contains(*domain))
        .cloned()
        .collect();
    let domain_overlap = matched_domains.len() as f32 / query_domains.len() as f32;

    DomainOverlapExplanation {
        domain_overlap,
        query_domains,
        memory_domains,
        matched_domains,
    }
}

fn normalize_domains(domains: &[impl AsRef<str>]) -> Vec<String> {
    let mut seen = HashSet::new();
    let mut normalized = Vec::new();
    for domain in domains {
        let domain = domain.as_ref().trim().to_ascii_lowercase();
        if !domain.is_empty() && seen.insert(domain.clone()) {
            normalized.push(domain);
        }
    }
    normalized
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn score_matches_build_spec_weights() {
        let explanation = score(ScoreInputs {
            lexical_match: 0.8,
            brightness: 0.6,
            domain_overlap: 0.5,
            validation: 0.7,
            authority_weight: 0.4,
            contradiction_risk: 0.2,
            staleness_penalty: 0.3,
        });

        let expected = 0.30 * 0.8 + 0.25 * 0.0 + 0.15 * 0.6 + 0.10 * 0.5 + 0.10 * 0.7 + 0.10 * 0.4
            - 0.25 * 0.2
            - 0.10 * 0.3;
        assert!((explanation.final_score - expected).abs() < f32::EPSILON);
        assert_eq!(explanation.semantic_similarity.raw, 0.0);
        assert_eq!(explanation.semantic_similarity.contribution, 0.0);
    }

    #[test]
    fn score_ordering_rewards_salient_validated_memories() {
        let strong = score(ScoreInputs {
            lexical_match: 0.75,
            brightness: 0.9,
            domain_overlap: 1.0,
            validation: 1.0,
            authority_weight: 0.8,
            contradiction_risk: 0.0,
            staleness_penalty: 0.0,
        });
        let weak = score(ScoreInputs {
            lexical_match: 1.0,
            brightness: 0.1,
            domain_overlap: 0.0,
            validation: 0.0,
            authority_weight: 0.2,
            contradiction_risk: 0.8,
            staleness_penalty: 0.6,
        });

        assert!(strong.final_score > weak.final_score);
    }

    #[test]
    fn explanation_contains_all_score_fields() {
        let explanation = score(ScoreInputs {
            lexical_match: 2.0,
            brightness: 1.0,
            domain_overlap: 1.0,
            validation: 1.0,
            authority_weight: 1.0,
            contradiction_risk: 1.0,
            staleness_penalty: 1.0,
        });

        assert_eq!(explanation.lexical_match.raw, 1.0);
        assert_eq!(explanation.lexical_match.weight, LEXICAL_MATCH_WEIGHT);
        assert_eq!(
            explanation.semantic_similarity.weight,
            SEMANTIC_SIMILARITY_WEIGHT
        );
        assert_eq!(explanation.brightness.weight, BRIGHTNESS_WEIGHT);
        assert_eq!(explanation.domain_overlap.weight, DOMAIN_OVERLAP_WEIGHT);
        assert_eq!(explanation.validation.weight, VALIDATION_WEIGHT);
        assert_eq!(explanation.authority_weight.weight, AUTHORITY_WEIGHT);
        assert_eq!(
            explanation.contradiction_risk.weight,
            CONTRADICTION_RISK_WEIGHT
        );
        assert_eq!(
            explanation.staleness_penalty.weight,
            STALENESS_PENALTY_WEIGHT
        );
    }

    #[test]
    fn domain_overlap_reports_matched_domains() {
        let explanation =
            domain_overlap(&["Retrieval", "Store", "retrieval"], &["store", "privacy"]);

        assert_eq!(explanation.domain_overlap, 0.5);
        assert_eq!(explanation.query_domains, ["retrieval", "store"]);
        assert_eq!(explanation.memory_domains, ["store", "privacy"]);
        assert_eq!(explanation.matched_domains, ["store"]);
    }

    // =========================================================================
    // Phase 4.C compositional axis tests.
    //
    // The non-negotiable invariant is the eval guardrail
    // `compose_semantic_off_matches_phase_4b_baseline_exactly` — when the
    // caller passes `None` for the semantic axis the composer MUST return
    // exactly the same value as the Phase 4.B `compose_fuzzy_boost` shape.
    // A failure here means flipping `--semantic` from OFF to OFF (default
    // path) changed the retrieval ordering, which is a Phase 4.B
    // regression.

    #[test]
    fn phase_4c_weights_sum_to_one() {
        let sum = LEX_WEIGHT_WITH_SEM + FTS5_WEIGHT_WITH_SEM + SEM_WEIGHT_WITH_SEM;
        assert!(
            (sum - 1.0).abs() < f32::EPSILON,
            "Phase 4.C composition weights must sum to 1.0, got {sum}"
        );
    }

    #[test]
    fn compose_semantic_off_matches_phase_4b_baseline_exactly() {
        // The Phase 4.B eval guardrail: passing `None` for semantic
        // MUST produce byte-for-byte the same value as the Phase 4.B
        // `compose_fuzzy_boost` helper. Iterate over a grid of
        // representative `(lex, fts5)` pairs so a future change that
        // sneaks a non-zero semantic contribution into the OFF path
        // trips this assertion on at least one cell.
        let cells = [
            (0.0_f32, 0.0_f32),
            (1.0, 0.0),
            (0.0, 1.0),
            (0.5, 0.5),
            (0.75, 0.25),
            (0.25, 0.75),
            (0.8, 0.2),
            (0.2, 0.8),
            (0.9, 0.1),
            (0.1, 0.9),
        ];
        for (lex, fts5) in cells {
            let phase_4b = compose_fuzzy_boost(lex, fts5);
            let phase_4c_off = compose_lexical_semantic(lex, fts5, None);
            assert!(
                (phase_4b - phase_4c_off).abs() < f32::EPSILON,
                "Phase 4.B baseline drift at (lex={lex}, fts5={fts5}): \
                 phase_4b={phase_4b}, phase_4c_off={phase_4c_off}"
            );
        }
    }

    #[test]
    fn compose_semantic_on_includes_semantic_axis() {
        // With semantic ON, two memories that are otherwise identical
        // (same lexical, same fts5) MUST receive different composed
        // values when their semantic similarity differs.
        let lex = 0.5;
        let fts5 = 0.0;
        let low_sem = compose_lexical_semantic(lex, fts5, Some(0.0));
        let high_sem = compose_lexical_semantic(lex, fts5, Some(1.0));
        assert!(
            high_sem > low_sem,
            "semantic axis must contribute positively; low_sem={low_sem}, high_sem={high_sem}"
        );
        // The gap is exactly the semantic weight.
        assert!(
            (high_sem - low_sem - SEM_WEIGHT_WITH_SEM).abs() < f32::EPSILON,
            "semantic contribution must equal SEM_WEIGHT_WITH_SEM"
        );
    }

    #[test]
    fn compose_semantic_keeps_exact_lexical_dominant() {
        // A memory with a perfect lexical hit and no semantic signal
        // (e.g. no embedding row in the side table) MUST still
        // outscore a memory with no lexical hit but a perfect semantic
        // signal. This pins the SPEC's "small semantic weight does
        // not displace exact lexical hits" property.
        let lexical_only = compose_lexical_semantic(1.0, 0.0, Some(0.0));
        let semantic_only = compose_lexical_semantic(0.0, 0.0, Some(1.0));
        assert!(
            lexical_only > semantic_only,
            "exact lexical hit must dominate semantic-only hit; \
             lexical_only={lexical_only}, semantic_only={semantic_only}"
        );
    }

    #[test]
    fn compose_semantic_default_weight_does_not_displace_exact_lexical_hits() {
        // Even with worst-case semantic noise (semantic = 1.0 for an
        // unrelated memory, semantic = 0.0 for the lexically-matching
        // memory), the lexical hit MUST still win the composed score.
        // Pinned by SPEC: "small semantic weight" — the 0.10 weight is
        // small enough that lex=1.0,sem=0.0 (0.65) > lex=0.0,sem=1.0
        // (0.10).
        let lex_winner = compose_lexical_semantic(1.0, 0.0, Some(0.0));
        let sem_winner = compose_lexical_semantic(0.0, 0.0, Some(1.0));
        assert!(
            lex_winner > sem_winner,
            "default semantic weight must not displace exact lexical hits"
        );
    }

    #[test]
    fn compose_semantic_stays_in_band_for_all_inputs() {
        // Random-ish grid: every composed value MUST stay in [0, 1]
        // regardless of input combination.
        let values = [0.0_f32, 0.25, 0.5, 0.75, 1.0];
        for &lex in &values {
            for &fts5 in &values {
                for &sem in &values {
                    let composed = compose_lexical_semantic(lex, fts5, Some(sem));
                    assert!(
                        (0.0..=1.0).contains(&composed),
                        "composed value out of band at (lex={lex}, fts5={fts5}, sem={sem}): {composed}"
                    );
                }
            }
        }
    }

    #[test]
    fn compose_semantic_clamps_out_of_band_inputs() {
        // NaN, infinity, and out-of-band values MUST degrade to 0.0
        // rather than propagating into the composed score.
        let composed = compose_lexical_semantic(f32::NAN, f32::INFINITY, Some(-5.0));
        assert!((0.0..=1.0).contains(&composed));
        assert_eq!(composed, 0.0);
    }

    #[test]
    fn compose_semantic_zero_for_all_axes_is_zero() {
        // A memory that matched nothing must compose to 0.0 regardless
        // of which mode the composer is in.
        assert_eq!(compose_lexical_semantic(0.0, 0.0, None), 0.0);
        assert_eq!(compose_lexical_semantic(0.0, 0.0, Some(0.0)), 0.0);
    }
}