oxirs_vec/
advanced_result_merging.rs

1//! Advanced Result Merging and Score Combination System
2//!
3//! This module provides sophisticated result merging capabilities for combining
4//! vector search results from multiple sources, algorithms, and modalities.
5
6use crate::Vector;
7use anyhow::Result;
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet};
10
11/// Configuration for result merging
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ResultMergingConfig {
14    /// Score combination strategy
15    pub combination_strategy: ScoreCombinationStrategy,
16    /// Normalization method for scores
17    pub normalization_method: ScoreNormalizationMethod,
18    /// Fusion algorithm for rank-based combination
19    pub fusion_algorithm: RankFusionAlgorithm,
20    /// Weights for different result sources
21    pub source_weights: HashMap<String, f32>,
22    /// Confidence interval calculation
23    pub confidence_intervals: bool,
24    /// Enable explanation generation
25    pub enable_explanations: bool,
26    /// Result diversity enhancement
27    pub diversity_config: Option<DiversityConfig>,
28}
29
30impl Default for ResultMergingConfig {
31    fn default() -> Self {
32        let mut source_weights = HashMap::new();
33        source_weights.insert("primary".to_string(), 1.0);
34
35        Self {
36            combination_strategy: ScoreCombinationStrategy::WeightedSum,
37            normalization_method: ScoreNormalizationMethod::MinMax,
38            fusion_algorithm: RankFusionAlgorithm::CombSUM,
39            source_weights,
40            confidence_intervals: true,
41            enable_explanations: false,
42            diversity_config: None,
43        }
44    }
45}
46
47/// Score combination strategies
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub enum ScoreCombinationStrategy {
50    /// Simple average of scores
51    Average,
52    /// Weighted sum of scores
53    WeightedSum,
54    /// Maximum score across sources
55    Maximum,
56    /// Minimum score across sources
57    Minimum,
58    /// Geometric mean
59    GeometricMean,
60    /// Harmonic mean
61    HarmonicMean,
62    /// Product of scores
63    Product,
64    /// Borda count method
65    BordaCount,
66    /// Custom combination function
67    Custom(String),
68}
69
70/// Score normalization methods
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub enum ScoreNormalizationMethod {
73    /// No normalization
74    None,
75    /// Min-max normalization to [0, 1]
76    MinMax,
77    /// Z-score normalization
78    ZScore,
79    /// Rank-based normalization
80    RankBased,
81    /// Softmax normalization
82    Softmax,
83    /// Sigmoid normalization
84    Sigmoid,
85}
86
87/// Rank fusion algorithms
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub enum RankFusionAlgorithm {
90    /// CombSUM - sum of scores
91    CombSUM,
92    /// CombMNZ - multiply sum by number of non-zero scores
93    CombMNZ,
94    /// Reciprocal Rank Fusion
95    ReciprocalRankFusion,
96    /// Borda fusion
97    BordaFusion,
98    /// Condorcet fusion
99    CondorcetFusion,
100}
101
102/// Diversity configuration for result enhancement
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct DiversityConfig {
105    /// Enable diversity enhancement
106    pub enable: bool,
107    /// Diversity metric
108    pub metric: DiversityMetric,
109    /// Diversity weight (0.0 = no diversity, 1.0 = maximum diversity)
110    pub diversity_weight: f32,
111    /// Maximum results to consider for diversity
112    pub max_diverse_results: usize,
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub enum DiversityMetric {
117    /// Maximum Marginal Relevance
118    MMR,
119    /// Angular diversity
120    Angular,
121    /// Clustering-based diversity
122    ClusterBased,
123    /// Content-based diversity
124    ContentBased,
125}
126
127/// Result from a single source with metadata
128#[derive(Debug, Clone)]
129pub struct SourceResult {
130    pub source_id: String,
131    pub results: Vec<ScoredResult>,
132    pub metadata: ResultMetadata,
133}
134
135/// Individual scored result
136#[derive(Debug, Clone)]
137pub struct ScoredResult {
138    pub item_id: String,
139    pub score: f32,
140    pub rank: usize,
141    pub vector: Option<Vector>,
142    pub metadata: Option<HashMap<String, String>>,
143}
144
145/// Metadata for result source
146#[derive(Debug, Clone)]
147pub struct ResultMetadata {
148    pub source_type: SourceType,
149    pub algorithm_used: String,
150    pub total_candidates: usize,
151    pub processing_time: std::time::Duration,
152    pub quality_metrics: HashMap<String, f32>,
153}
154
155#[derive(Debug, Clone)]
156pub enum SourceType {
157    VectorSearch,
158    TextSearch,
159    KnowledgeGraph,
160    MultiModal,
161    Hybrid,
162}
163
164/// Merged result with explanation
165#[derive(Debug, Clone)]
166pub struct MergedResult {
167    pub item_id: String,
168    pub final_score: f32,
169    pub confidence_interval: Option<ConfidenceInterval>,
170    pub source_contributions: Vec<SourceContribution>,
171    pub explanation: Option<ResultExplanation>,
172    pub diversity_score: Option<f32>,
173}
174
175/// Confidence interval for a result
176#[derive(Debug, Clone)]
177pub struct ConfidenceInterval {
178    pub lower_bound: f32,
179    pub upper_bound: f32,
180    pub confidence_level: f32,
181}
182
183/// Contribution from each source
184#[derive(Debug, Clone)]
185pub struct SourceContribution {
186    pub source_id: String,
187    pub original_score: f32,
188    pub normalized_score: f32,
189    pub weight: f32,
190    pub rank: usize,
191}
192
193/// Explanation for result ranking
194#[derive(Debug, Clone)]
195pub struct ResultExplanation {
196    pub ranking_factors: Vec<RankingFactor>,
197    pub score_breakdown: HashMap<String, f32>,
198    pub similar_items: Vec<String>,
199    pub differentiating_features: Vec<String>,
200}
201
202#[derive(Debug, Clone)]
203pub struct RankingFactor {
204    pub factor_name: String,
205    pub importance: f32,
206    pub description: String,
207}
208
209/// Advanced result merging engine
210pub struct AdvancedResultMerger {
211    config: ResultMergingConfig,
212    normalization_cache: HashMap<String, NormalizationParams>,
213    fusion_stats: FusionStatistics,
214}
215
216/// Parameters for score normalization
217#[derive(Debug, Clone)]
218struct NormalizationParams {
219    min_score: f32,
220    max_score: f32,
221    mean_score: f32,
222    std_dev: f32,
223}
224
225/// Statistics for fusion operations
226#[derive(Debug, Clone, Default)]
227pub struct FusionStatistics {
228    pub total_merges: usize,
229    pub average_sources_per_merge: f32,
230    pub score_distribution: HashMap<String, f32>,
231    pub fusion_quality_metrics: HashMap<String, f32>,
232}
233
234impl AdvancedResultMerger {
235    /// Create new result merger
236    pub fn new(config: ResultMergingConfig) -> Self {
237        Self {
238            config,
239            normalization_cache: HashMap::new(),
240            fusion_stats: FusionStatistics::default(),
241        }
242    }
243
244    /// Merge results from multiple sources
245    pub fn merge_results(&mut self, sources: Vec<SourceResult>) -> Result<Vec<MergedResult>> {
246        if sources.is_empty() {
247            return Ok(Vec::new());
248        }
249
250        // Update statistics
251        self.fusion_stats.total_merges += 1;
252        self.fusion_stats.average_sources_per_merge = (self.fusion_stats.average_sources_per_merge
253            * (self.fusion_stats.total_merges - 1) as f32
254            + sources.len() as f32)
255            / self.fusion_stats.total_merges as f32;
256
257        // Step 1: Normalize scores from each source
258        let normalized_sources = self.normalize_sources(&sources)?;
259
260        // Step 2: Collect all unique items
261        let all_items = self.collect_unique_items(&normalized_sources);
262
263        // Step 3: Apply fusion algorithm
264        let mut merged_results = match self.config.fusion_algorithm {
265            RankFusionAlgorithm::CombSUM => self.apply_combsum(&normalized_sources, &all_items)?,
266            RankFusionAlgorithm::CombMNZ => self.apply_combmnz(&normalized_sources, &all_items)?,
267            RankFusionAlgorithm::ReciprocalRankFusion => {
268                self.apply_rrf(&normalized_sources, &all_items)?
269            }
270            RankFusionAlgorithm::BordaFusion => {
271                self.apply_borda(&normalized_sources, &all_items)?
272            }
273            RankFusionAlgorithm::CondorcetFusion => {
274                self.apply_condorcet(&normalized_sources, &all_items)?
275            }
276        };
277
278        // Step 4: Apply score combination strategy
279        merged_results = self.apply_score_combination(merged_results, &normalized_sources)?;
280
281        // Step 5: Calculate confidence intervals if enabled
282        if self.config.confidence_intervals {
283            merged_results =
284                self.calculate_confidence_intervals(merged_results, &normalized_sources)?;
285        }
286
287        // Step 6: Generate explanations if enabled
288        if self.config.enable_explanations {
289            merged_results = self.generate_explanations(merged_results, &normalized_sources)?;
290        }
291
292        // Step 7: Apply diversity enhancement if configured
293        if let Some(diversity_config) = &self.config.diversity_config {
294            if diversity_config.enable {
295                merged_results = self.enhance_diversity(merged_results, diversity_config)?;
296            }
297        }
298
299        // Step 8: Sort by final score
300        merged_results.sort_by(|a, b| {
301            b.final_score
302                .partial_cmp(&a.final_score)
303                .unwrap_or(std::cmp::Ordering::Equal)
304        });
305
306        Ok(merged_results)
307    }
308
309    /// Normalize scores from all sources
310    fn normalize_sources(&mut self, sources: &[SourceResult]) -> Result<Vec<SourceResult>> {
311        let mut normalized = Vec::new();
312
313        for source in sources {
314            let normalized_source = self.normalize_source(source)?;
315            normalized.push(normalized_source);
316        }
317
318        Ok(normalized)
319    }
320
321    /// Normalize a single source
322    fn normalize_source(&mut self, source: &SourceResult) -> Result<SourceResult> {
323        if source.results.is_empty() {
324            return Ok(source.clone());
325        }
326
327        let scores: Vec<f32> = source.results.iter().map(|r| r.score).collect();
328        let normalization_params = self.calculate_normalization_params(&scores);
329
330        // Cache normalization parameters
331        self.normalization_cache
332            .insert(source.source_id.clone(), normalization_params.clone());
333
334        let normalized_results: Vec<ScoredResult> = source
335            .results
336            .iter()
337            .map(|result| {
338                let normalized_score = self.normalize_score(result.score, &normalization_params);
339                ScoredResult {
340                    item_id: result.item_id.clone(),
341                    score: normalized_score,
342                    rank: result.rank,
343                    vector: result.vector.clone(),
344                    metadata: result.metadata.clone(),
345                }
346            })
347            .collect();
348
349        Ok(SourceResult {
350            source_id: source.source_id.clone(),
351            results: normalized_results,
352            metadata: source.metadata.clone(),
353        })
354    }
355
356    /// Calculate normalization parameters
357    fn calculate_normalization_params(&self, scores: &[f32]) -> NormalizationParams {
358        let min_score = scores.iter().fold(f32::INFINITY, |a, &b| a.min(b));
359        let max_score = scores.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
360        let mean_score = scores.iter().sum::<f32>() / scores.len() as f32;
361
362        let variance = scores
363            .iter()
364            .map(|&x| (x - mean_score).powi(2))
365            .sum::<f32>()
366            / scores.len() as f32;
367        let std_dev = variance.sqrt();
368
369        NormalizationParams {
370            min_score,
371            max_score,
372            mean_score,
373            std_dev,
374        }
375    }
376
377    /// Normalize a single score
378    fn normalize_score(&self, score: f32, params: &NormalizationParams) -> f32 {
379        match self.config.normalization_method {
380            ScoreNormalizationMethod::None => score,
381            ScoreNormalizationMethod::MinMax => {
382                if params.max_score == params.min_score {
383                    0.5 // Default to middle value if no variance
384                } else {
385                    (score - params.min_score) / (params.max_score - params.min_score)
386                }
387            }
388            ScoreNormalizationMethod::ZScore => {
389                if params.std_dev == 0.0 {
390                    0.0 // Default to zero if no variance
391                } else {
392                    (score - params.mean_score) / params.std_dev
393                }
394            }
395            ScoreNormalizationMethod::Softmax => {
396                // For softmax, we need all scores, so this is a simplified version
397                (score - params.min_score).exp()
398            }
399            ScoreNormalizationMethod::Sigmoid => 1.0 / (1.0 + (-score).exp()),
400            ScoreNormalizationMethod::RankBased => {
401                // This would require rank information
402                score / params.max_score
403            }
404        }
405    }
406
407    /// Collect all unique items from sources
408    fn collect_unique_items(&self, sources: &[SourceResult]) -> HashSet<String> {
409        let mut items = HashSet::new();
410        for source in sources {
411            for result in &source.results {
412                items.insert(result.item_id.clone());
413            }
414        }
415        items
416    }
417
418    /// Apply CombSUM fusion algorithm
419    fn apply_combsum(
420        &self,
421        sources: &[SourceResult],
422        items: &HashSet<String>,
423    ) -> Result<Vec<MergedResult>> {
424        let mut merged_results = Vec::new();
425
426        for item_id in items {
427            let mut total_score = 0.0;
428            let mut source_contributions = Vec::new();
429
430            for source in sources {
431                if let Some(result) = source.results.iter().find(|r| r.item_id == *item_id) {
432                    let weight = self
433                        .config
434                        .source_weights
435                        .get(&source.source_id)
436                        .copied()
437                        .unwrap_or(1.0);
438                    let weighted_score = result.score * weight;
439                    total_score += weighted_score;
440
441                    source_contributions.push(SourceContribution {
442                        source_id: source.source_id.clone(),
443                        original_score: result.score,
444                        normalized_score: result.score,
445                        weight,
446                        rank: result.rank,
447                    });
448                }
449            }
450
451            merged_results.push(MergedResult {
452                item_id: item_id.clone(),
453                final_score: total_score,
454                confidence_interval: None,
455                source_contributions,
456                explanation: None,
457                diversity_score: None,
458            });
459        }
460
461        Ok(merged_results)
462    }
463
464    /// Apply CombMNZ fusion algorithm
465    fn apply_combmnz(
466        &self,
467        sources: &[SourceResult],
468        items: &HashSet<String>,
469    ) -> Result<Vec<MergedResult>> {
470        let mut merged_results = Vec::new();
471
472        for item_id in items {
473            let mut total_score = 0.0;
474            let mut non_zero_count = 0;
475            let mut source_contributions = Vec::new();
476
477            for source in sources {
478                if let Some(result) = source.results.iter().find(|r| r.item_id == *item_id) {
479                    let weight = self
480                        .config
481                        .source_weights
482                        .get(&source.source_id)
483                        .copied()
484                        .unwrap_or(1.0);
485                    let weighted_score = result.score * weight;
486
487                    if weighted_score > 0.0 {
488                        total_score += weighted_score;
489                        non_zero_count += 1;
490                    }
491
492                    source_contributions.push(SourceContribution {
493                        source_id: source.source_id.clone(),
494                        original_score: result.score,
495                        normalized_score: result.score,
496                        weight,
497                        rank: result.rank,
498                    });
499                }
500            }
501
502            let final_score = if non_zero_count > 0 {
503                total_score * non_zero_count as f32
504            } else {
505                0.0
506            };
507
508            merged_results.push(MergedResult {
509                item_id: item_id.clone(),
510                final_score,
511                confidence_interval: None,
512                source_contributions,
513                explanation: None,
514                diversity_score: None,
515            });
516        }
517
518        Ok(merged_results)
519    }
520
521    /// Apply Reciprocal Rank Fusion
522    fn apply_rrf(
523        &self,
524        sources: &[SourceResult],
525        items: &HashSet<String>,
526    ) -> Result<Vec<MergedResult>> {
527        let k = 60.0; // RRF constant
528        let mut merged_results = Vec::new();
529
530        for item_id in items {
531            let mut rrf_score = 0.0;
532            let mut source_contributions = Vec::new();
533
534            for source in sources {
535                if let Some(result) = source.results.iter().find(|r| r.item_id == *item_id) {
536                    let weight = self
537                        .config
538                        .source_weights
539                        .get(&source.source_id)
540                        .copied()
541                        .unwrap_or(1.0);
542                    let rrf_contribution = weight / (k + result.rank as f32);
543                    rrf_score += rrf_contribution;
544
545                    source_contributions.push(SourceContribution {
546                        source_id: source.source_id.clone(),
547                        original_score: result.score,
548                        normalized_score: rrf_contribution,
549                        weight,
550                        rank: result.rank,
551                    });
552                }
553            }
554
555            merged_results.push(MergedResult {
556                item_id: item_id.clone(),
557                final_score: rrf_score,
558                confidence_interval: None,
559                source_contributions,
560                explanation: None,
561                diversity_score: None,
562            });
563        }
564
565        Ok(merged_results)
566    }
567
568    /// Apply Borda fusion
569    fn apply_borda(
570        &self,
571        sources: &[SourceResult],
572        items: &HashSet<String>,
573    ) -> Result<Vec<MergedResult>> {
574        let mut merged_results = Vec::new();
575
576        for item_id in items {
577            let mut borda_score = 0.0;
578            let mut source_contributions = Vec::new();
579
580            for source in sources {
581                if let Some(result) = source.results.iter().find(|r| r.item_id == *item_id) {
582                    let weight = self
583                        .config
584                        .source_weights
585                        .get(&source.source_id)
586                        .copied()
587                        .unwrap_or(1.0);
588                    let max_rank = source.results.len() as f32;
589                    let borda_contribution = weight * (max_rank - result.rank as f32);
590                    borda_score += borda_contribution;
591
592                    source_contributions.push(SourceContribution {
593                        source_id: source.source_id.clone(),
594                        original_score: result.score,
595                        normalized_score: borda_contribution,
596                        weight,
597                        rank: result.rank,
598                    });
599                }
600            }
601
602            merged_results.push(MergedResult {
603                item_id: item_id.clone(),
604                final_score: borda_score,
605                confidence_interval: None,
606                source_contributions,
607                explanation: None,
608                diversity_score: None,
609            });
610        }
611
612        Ok(merged_results)
613    }
614
615    /// Apply Condorcet fusion (simplified)
616    fn apply_condorcet(
617        &self,
618        sources: &[SourceResult],
619        items: &HashSet<String>,
620    ) -> Result<Vec<MergedResult>> {
621        // For simplicity, we'll use a vote-based approach
622        // In a full implementation, this would involve pairwise comparisons
623        self.apply_borda(sources, items)
624    }
625
626    /// Apply score combination strategy
627    fn apply_score_combination(
628        &self,
629        mut results: Vec<MergedResult>,
630        _sources: &[SourceResult],
631    ) -> Result<Vec<MergedResult>> {
632        match self.config.combination_strategy {
633            ScoreCombinationStrategy::Average => {
634                for result in &mut results {
635                    if !result.source_contributions.is_empty() {
636                        result.final_score = result
637                            .source_contributions
638                            .iter()
639                            .map(|c| c.normalized_score)
640                            .sum::<f32>()
641                            / result.source_contributions.len() as f32;
642                    }
643                }
644            }
645            ScoreCombinationStrategy::WeightedSum => {
646                // Already handled in fusion algorithms
647            }
648            ScoreCombinationStrategy::Maximum => {
649                for result in &mut results {
650                    result.final_score = result
651                        .source_contributions
652                        .iter()
653                        .map(|c| c.normalized_score)
654                        .fold(0.0, f32::max);
655                }
656            }
657            ScoreCombinationStrategy::Minimum => {
658                for result in &mut results {
659                    result.final_score = result
660                        .source_contributions
661                        .iter()
662                        .map(|c| c.normalized_score)
663                        .fold(f32::INFINITY, f32::min);
664                }
665            }
666            ScoreCombinationStrategy::GeometricMean => {
667                for result in &mut results {
668                    let product: f32 = result
669                        .source_contributions
670                        .iter()
671                        .map(|c| c.normalized_score.max(0.001)) // Avoid zero values
672                        .product();
673                    result.final_score =
674                        product.powf(1.0 / result.source_contributions.len() as f32);
675                }
676            }
677            _ => {
678                // Other strategies would be implemented here
679            }
680        }
681
682        Ok(results)
683    }
684
685    /// Calculate confidence intervals
686    fn calculate_confidence_intervals(
687        &self,
688        mut results: Vec<MergedResult>,
689        _sources: &[SourceResult],
690    ) -> Result<Vec<MergedResult>> {
691        for result in &mut results {
692            if result.source_contributions.len() > 1 {
693                let scores: Vec<f32> = result
694                    .source_contributions
695                    .iter()
696                    .map(|c| c.normalized_score)
697                    .collect();
698
699                let mean = scores.iter().sum::<f32>() / scores.len() as f32;
700                let variance =
701                    scores.iter().map(|&x| (x - mean).powi(2)).sum::<f32>() / scores.len() as f32;
702                let std_dev = variance.sqrt();
703
704                // 95% confidence interval (approximation)
705                let margin = 1.96 * std_dev / (scores.len() as f32).sqrt();
706
707                result.confidence_interval = Some(ConfidenceInterval {
708                    lower_bound: (mean - margin).max(0.0),
709                    upper_bound: (mean + margin).min(1.0),
710                    confidence_level: 0.95,
711                });
712            }
713        }
714
715        Ok(results)
716    }
717
718    /// Generate explanations for results
719    fn generate_explanations(
720        &self,
721        mut results: Vec<MergedResult>,
722        _sources: &[SourceResult],
723    ) -> Result<Vec<MergedResult>> {
724        for result in &mut results {
725            let mut ranking_factors = Vec::new();
726            let mut score_breakdown = HashMap::new();
727
728            // Analyze source contributions
729            for contribution in &result.source_contributions {
730                ranking_factors.push(RankingFactor {
731                    factor_name: format!("Source: {}", contribution.source_id),
732                    importance: contribution.normalized_score,
733                    description: format!(
734                        "Contribution from {} with weight {}",
735                        contribution.source_id, contribution.weight
736                    ),
737                });
738
739                score_breakdown.insert(
740                    contribution.source_id.clone(),
741                    contribution.normalized_score,
742                );
743            }
744
745            result.explanation = Some(ResultExplanation {
746                ranking_factors,
747                score_breakdown,
748                similar_items: Vec::new(), // Would be populated in a full implementation
749                differentiating_features: Vec::new(), // Would be populated in a full implementation
750            });
751        }
752
753        Ok(results)
754    }
755
756    /// Enhance diversity of results
757    fn enhance_diversity(
758        &self,
759        results: Vec<MergedResult>,
760        diversity_config: &DiversityConfig,
761    ) -> Result<Vec<MergedResult>> {
762        if results.len() <= diversity_config.max_diverse_results {
763            return Ok(results);
764        }
765
766        // Simple diversity enhancement using Maximum Marginal Relevance (MMR)
767        let mut selected = Vec::new();
768        let mut remaining = results;
769
770        // Always select the top result first
771        if !remaining.is_empty() {
772            let top_result = remaining.remove(0);
773            selected.push(top_result);
774        }
775
776        // Select remaining results balancing relevance and diversity
777        while selected.len() < diversity_config.max_diverse_results && !remaining.is_empty() {
778            let mut best_idx = 0;
779            let mut best_mmr = f32::NEG_INFINITY;
780
781            for (i, candidate) in remaining.iter().enumerate() {
782                // Calculate MMR score
783                let relevance = candidate.final_score;
784                let max_similarity =
785                    self.calculate_max_similarity_to_selected(candidate, &selected);
786                let mmr = diversity_config.diversity_weight * relevance
787                    - (1.0 - diversity_config.diversity_weight) * max_similarity;
788
789                if mmr > best_mmr {
790                    best_mmr = mmr;
791                    best_idx = i;
792                }
793            }
794
795            let selected_result = remaining.remove(best_idx);
796            selected.push(selected_result);
797        }
798
799        // Add diversity scores
800        for result in &mut selected {
801            result.diversity_score = Some(0.8); // Placeholder - would be calculated properly
802        }
803
804        Ok(selected)
805    }
806
807    /// Calculate maximum similarity to already selected results
808    fn calculate_max_similarity_to_selected(
809        &self,
810        candidate: &MergedResult,
811        selected: &[MergedResult],
812    ) -> f32 {
813        if selected.is_empty() {
814            return 0.0;
815        }
816
817        // Simplified similarity calculation
818        // In a full implementation, this would use actual vector similarities
819        let mut max_similarity: f32 = 0.0;
820
821        for selected_result in selected {
822            // Simple similarity based on score difference
823            let similarity: f32 = 1.0 - (candidate.final_score - selected_result.final_score).abs();
824            max_similarity = max_similarity.max(similarity);
825        }
826
827        max_similarity
828    }
829
830    /// Get fusion statistics
831    pub fn get_statistics(&self) -> &FusionStatistics {
832        &self.fusion_stats
833    }
834
835    /// Reset statistics
836    pub fn reset_statistics(&mut self) {
837        self.fusion_stats = FusionStatistics::default();
838    }
839}
840
841#[cfg(test)]
842mod tests {
843    use super::*;
844    use std::time::Duration;
845
846    fn create_test_source(source_id: &str, results: Vec<(String, f32, usize)>) -> SourceResult {
847        let scored_results = results
848            .into_iter()
849            .map(|(id, score, rank)| ScoredResult {
850                item_id: id,
851                score,
852                rank,
853                vector: None,
854                metadata: None,
855            })
856            .collect();
857
858        SourceResult {
859            source_id: source_id.to_string(),
860            results: scored_results,
861            metadata: ResultMetadata {
862                source_type: SourceType::VectorSearch,
863                algorithm_used: "test".to_string(),
864                total_candidates: 100,
865                processing_time: Duration::from_millis(10),
866                quality_metrics: HashMap::new(),
867            },
868        }
869    }
870
871    #[test]
872    fn test_combsum_fusion() {
873        let config = ResultMergingConfig::default();
874        let mut merger = AdvancedResultMerger::new(config);
875
876        let source1 = create_test_source(
877            "source1",
878            vec![("doc1".to_string(), 0.9, 1), ("doc2".to_string(), 0.8, 2)],
879        );
880
881        let source2 = create_test_source(
882            "source2",
883            vec![("doc1".to_string(), 0.7, 1), ("doc3".to_string(), 0.6, 2)],
884        );
885
886        let merged = merger.merge_results(vec![source1, source2]).unwrap();
887
888        assert_eq!(merged.len(), 3); // doc1, doc2, doc3
889
890        // doc1 should have the highest score (appears in both sources)
891        let doc1_result = merged.iter().find(|r| r.item_id == "doc1").unwrap();
892        assert!(doc1_result.final_score > 1.0); // Should be sum of normalized scores
893    }
894
895    #[test]
896    fn test_reciprocal_rank_fusion() {
897        let config = ResultMergingConfig {
898            fusion_algorithm: RankFusionAlgorithm::ReciprocalRankFusion,
899            ..Default::default()
900        };
901
902        let mut merger = AdvancedResultMerger::new(config);
903
904        let source1 = create_test_source(
905            "source1",
906            vec![("doc1".to_string(), 0.9, 1), ("doc2".to_string(), 0.8, 2)],
907        );
908
909        let source2 = create_test_source(
910            "source2",
911            vec![("doc2".to_string(), 0.7, 1), ("doc1".to_string(), 0.6, 2)],
912        );
913
914        let merged = merger.merge_results(vec![source1, source2]).unwrap();
915
916        assert_eq!(merged.len(), 2);
917
918        // Both documents appear in both sources, so both should have RRF scores
919        for result in &merged {
920            assert!(result.final_score > 0.0);
921            assert_eq!(result.source_contributions.len(), 2);
922        }
923    }
924
925    #[test]
926    fn test_confidence_intervals() {
927        let config = ResultMergingConfig {
928            confidence_intervals: true,
929            ..Default::default()
930        };
931
932        let mut merger = AdvancedResultMerger::new(config);
933
934        let source1 = create_test_source("source1", vec![("doc1".to_string(), 0.9, 1)]);
935
936        let source2 = create_test_source("source2", vec![("doc1".to_string(), 0.7, 1)]);
937
938        let merged = merger.merge_results(vec![source1, source2]).unwrap();
939
940        assert_eq!(merged.len(), 1);
941
942        let result = &merged[0];
943        assert!(result.confidence_interval.is_some());
944
945        let ci = result.confidence_interval.as_ref().unwrap();
946        assert!(ci.lower_bound <= ci.upper_bound);
947        assert_eq!(ci.confidence_level, 0.95);
948    }
949
950    #[test]
951    fn test_score_normalization() {
952        let config = ResultMergingConfig {
953            normalization_method: ScoreNormalizationMethod::MinMax,
954            ..Default::default()
955        };
956
957        let mut merger = AdvancedResultMerger::new(config);
958
959        let source = create_test_source(
960            "source1",
961            vec![
962                ("doc1".to_string(), 10.0, 1),
963                ("doc2".to_string(), 5.0, 2),
964                ("doc3".to_string(), 0.0, 3),
965            ],
966        );
967
968        let normalized = merger.normalize_source(&source).unwrap();
969
970        // After min-max normalization, scores should be in [0, 1]
971        for result in &normalized.results {
972            assert!(result.score >= 0.0 && result.score <= 1.0);
973        }
974    }
975}
oxirs_vec/advanced_result_merging.rs

oxirs_vec/
advanced_result_merging.rs