maproom 0.1.0

Semantic code search powered by embeddings and SQLite
Documentation
//! Score fusion for hybrid search results.
//!
//! This module implements score fusion strategies that combine results from
//! multiple search strategies (FTS, vector, graph, signals) into a single
//! ranked result set.
//!
//! # Fusion Strategies
//!
//! - **BasicWeightedFusion**: Simple weighted average (Phase 2 baseline)
//! - **RRFFusion**: Reciprocal Rank Fusion (Phase 3 sophisticated approach)
//!
//! # Score Normalization
//!
//! All scores are normalized to the 0.0-1.0 range before fusion to ensure
//! fair combination across different search types with different score ranges.

mod basic;
mod rrf;

pub use basic::{BasicWeightedFusion, FusionWeights};
pub use rrf::RRFFusion;

use crate::search::executor_types::{RankedResults, SearchSource};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// Trait for score fusion strategies.
///
/// Implementations combine results from multiple search strategies into
/// a single ranked result set with fused scores.
pub trait ScoreFusion: Send + Sync {
    /// Fuse multiple result sets into a single ranked list.
    ///
    /// # Parameters
    /// - `results`: Vector of RankedResults from different search strategies
    /// - `weights`: Weights for each search type
    /// - `limit`: Maximum number of results to return
    ///
    /// # Returns
    /// Vector of FusedResult with combined scores, sorted by score descending
    fn fuse(
        &self,
        results: Vec<RankedResults>,
        weights: &FusionWeights,
        limit: usize,
    ) -> Vec<FusedResult>;
}

/// Detailed breakdown of score contributions from each signal.
///
/// This is used in debug mode to understand how each signal contributed
/// to the final fused score.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScoreBreakdown {
    /// Contribution from full-text search (weighted)
    pub fts: f32,
    /// Contribution from vector similarity (weighted)
    pub vector: f32,
    /// Contribution from graph importance (weighted)
    pub graph: f32,
    /// Contribution from recency signal (weighted)
    pub recency: f32,
    /// Contribution from churn signal (weighted and inverted)
    pub churn: f32,
}

impl ScoreBreakdown {
    /// Create a breakdown with all zero contributions.
    pub fn zero() -> Self {
        Self {
            fts: 0.0,
            vector: 0.0,
            graph: 0.0,
            recency: 0.0,
            churn: 0.0,
        }
    }

    /// Format the breakdown as a human-readable string for debug output.
    pub fn format_debug(&self) -> String {
        format!(
            "FTS:{:.3} Vec:{:.3} Graph:{:.3} Recency:{:.3} Churn:{:.3}",
            self.fts, self.vector, self.graph, self.recency, self.churn
        )
    }

    /// Calculate what percentage each signal contributed to the total score.
    pub fn as_percentages(&self) -> Vec<(String, f32)> {
        let total = self.fts + self.vector + self.graph + self.recency + self.churn;
        if total < 0.0001 {
            return vec![];
        }
        vec![
            ("FTS".to_string(), (self.fts / total) * 100.0),
            ("Vector".to_string(), (self.vector / total) * 100.0),
            ("Graph".to_string(), (self.graph / total) * 100.0),
            ("Recency".to_string(), (self.recency / total) * 100.0),
            ("Churn".to_string(), (self.churn / total) * 100.0),
        ]
    }
}

/// A single search result with fused score from multiple sources.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FusedResult {
    /// Chunk ID from maproom.chunks table
    pub chunk_id: i64,

    /// Combined score after fusion (0.0-1.0)
    pub score: f32,

    /// Individual scores from each search source that found this chunk
    pub source_scores: HashMap<SearchSource, f32>,

    /// Optional detailed breakdown of score contributions (for debug mode)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub breakdown: Option<ScoreBreakdown>,

    /// Exact match multiplier applied during FTS scoring (3.0 for exact matches, 1.0 otherwise).
    /// Always computed (not debug-only) to enable confidence scoring.
    /// None if result did not come from FTS or exact match detection was not performed.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub exact_match_multiplier: Option<f32>,
}

impl FusedResult {
    /// Create a new FusedResult without breakdown or exact match multiplier.
    pub fn new(chunk_id: i64, score: f32, source_scores: HashMap<SearchSource, f32>) -> Self {
        Self {
            chunk_id,
            score,
            source_scores,
            breakdown: None,
            exact_match_multiplier: None,
        }
    }

    /// Create a new FusedResult with exact match multiplier.
    pub fn with_exact_match(
        chunk_id: i64,
        score: f32,
        source_scores: HashMap<SearchSource, f32>,
        exact_match_multiplier: Option<f32>,
    ) -> Self {
        Self {
            chunk_id,
            score,
            source_scores,
            breakdown: None,
            exact_match_multiplier,
        }
    }

    /// Create a new FusedResult with score breakdown.
    pub fn with_breakdown(
        chunk_id: i64,
        score: f32,
        source_scores: HashMap<SearchSource, f32>,
        breakdown: ScoreBreakdown,
    ) -> Self {
        Self {
            chunk_id,
            score,
            source_scores,
            breakdown: Some(breakdown),
            exact_match_multiplier: None,
        }
    }

    /// Create a new FusedResult with score breakdown and exact match multiplier.
    pub fn with_all(
        chunk_id: i64,
        score: f32,
        source_scores: HashMap<SearchSource, f32>,
        breakdown: ScoreBreakdown,
        exact_match_multiplier: Option<f32>,
    ) -> Self {
        Self {
            chunk_id,
            score,
            source_scores,
            breakdown: Some(breakdown),
            exact_match_multiplier,
        }
    }
}