oxirs_vec/
advanced_result_merging.rs

1//! Advanced Result Merging and Score Combination System
2//!
3//! This module provides sophisticated result merging capabilities for combining
4//! vector search results from multiple sources, algorithms, and modalities.
5
6use crate::Vector;
7use anyhow::Result;
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet};
10
11/// Configuration for result merging
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ResultMergingConfig {
14    /// Score combination strategy
15    pub combination_strategy: ScoreCombinationStrategy,
16    /// Normalization method for scores
17    pub normalization_method: ScoreNormalizationMethod,
18    /// Fusion algorithm for rank-based combination
19    pub fusion_algorithm: RankFusionAlgorithm,
20    /// Weights for different result sources
21    pub source_weights: HashMap<String, f32>,
22    /// Confidence interval calculation
23    pub confidence_intervals: bool,
24    /// Enable explanation generation
25    pub enable_explanations: bool,
26    /// Result diversity enhancement
27    pub diversity_config: Option<DiversityConfig>,
28}
29
30impl Default for ResultMergingConfig {
31    fn default() -> Self {
32        let mut source_weights = HashMap::new();
33        source_weights.insert("primary".to_string(), 1.0);
34
35        Self {
36            combination_strategy: ScoreCombinationStrategy::WeightedSum,
37            normalization_method: ScoreNormalizationMethod::MinMax,
38            fusion_algorithm: RankFusionAlgorithm::CombSUM,
39            source_weights,
40            confidence_intervals: true,
41            enable_explanations: false,
42            diversity_config: None,
43        }
44    }
45}
46
47/// Score combination strategies
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub enum ScoreCombinationStrategy {
50    /// Simple average of scores
51    Average,
52    /// Weighted sum of scores
53    WeightedSum,
54    /// Maximum score across sources
55    Maximum,
56    /// Minimum score across sources
57    Minimum,
58    /// Geometric mean
59    GeometricMean,
60    /// Harmonic mean
61    HarmonicMean,
62    /// Product of scores
63    Product,
64    /// Borda count method
65    BordaCount,
66    /// Custom combination function
67    Custom(String),
68}
69
70/// Score normalization methods
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub enum ScoreNormalizationMethod {
73    /// No normalization
74    None,
75    /// Min-max normalization to [0, 1]
76    MinMax,
77    /// Z-score normalization
78    ZScore,
79    /// Rank-based normalization
80    RankBased,
81    /// Softmax normalization
82    Softmax,
83    /// Sigmoid normalization
84    Sigmoid,
85}
86
87/// Rank fusion algorithms
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub enum RankFusionAlgorithm {
90    /// CombSUM - sum of scores
91    CombSUM,
92    /// CombMNZ - multiply sum by number of non-zero scores
93    CombMNZ,
94    /// Reciprocal Rank Fusion
95    ReciprocalRankFusion,
96    /// Borda fusion
97    BordaFusion,
98    /// Condorcet fusion
99    CondorcetFusion,
100}
101
102/// Diversity configuration for result enhancement
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct DiversityConfig {
105    /// Enable diversity enhancement
106    pub enable: bool,
107    /// Diversity metric
108    pub metric: DiversityMetric,
109    /// Diversity weight (0.0 = no diversity, 1.0 = maximum diversity)
110    pub diversity_weight: f32,
111    /// Maximum results to consider for diversity
112    pub max_diverse_results: usize,
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub enum DiversityMetric {
117    /// Maximum Marginal Relevance
118    MMR,
119    /// Angular diversity
120    Angular,
121    /// Clustering-based diversity
122    ClusterBased,
123    /// Content-based diversity
124    ContentBased,
125}
126
127/// Result from a single source with metadata
128#[derive(Debug, Clone)]
129pub struct SourceResult {
130    pub source_id: String,
131    pub results: Vec<ScoredResult>,
132    pub metadata: ResultMetadata,
133}
134
135/// Individual scored result
136#[derive(Debug, Clone)]
137pub struct ScoredResult {
138    pub item_id: String,
139    pub score: f32,
140    pub rank: usize,
141    pub vector: Option<Vector>,
142    pub metadata: Option<HashMap<String, String>>,
143}
144
145/// Metadata for result source
146#[derive(Debug, Clone)]
147pub struct ResultMetadata {
148    pub source_type: SourceType,
149    pub algorithm_used: String,
150    pub total_candidates: usize,
151    pub processing_time: std::time::Duration,
152    pub quality_metrics: HashMap<String, f32>,
153}
154
155#[derive(Debug, Clone)]
156pub enum SourceType {
157    VectorSearch,
158    TextSearch,
159    KnowledgeGraph,
160    MultiModal,
161    Hybrid,
162}
163
164/// Merged result with explanation
165#[derive(Debug, Clone)]
166pub struct MergedResult {
167    pub item_id: String,
168    pub final_score: f32,
169    pub confidence_interval: Option<ConfidenceInterval>,
170    pub source_contributions: Vec<SourceContribution>,
171    pub explanation: Option<ResultExplanation>,
172    pub diversity_score: Option<f32>,
173}
174
175/// Confidence interval for a result
176#[derive(Debug, Clone)]
177pub struct ConfidenceInterval {
178    pub lower_bound: f32,
179    pub upper_bound: f32,
180    pub confidence_level: f32,
181}
182
183/// Contribution from each source
184#[derive(Debug, Clone)]
185pub struct SourceContribution {
186    pub source_id: String,
187    pub original_score: f32,
188    pub normalized_score: f32,
189    pub weight: f32,
190    pub rank: usize,
191}
192
193/// Explanation for result ranking
194#[derive(Debug, Clone)]
195pub struct ResultExplanation {
196    pub ranking_factors: Vec<RankingFactor>,
197    pub score_breakdown: HashMap<String, f32>,
198    pub similar_items: Vec<String>,
199    pub differentiating_features: Vec<String>,
200}
201
202#[derive(Debug, Clone)]
203pub struct RankingFactor {
204    pub factor_name: String,
205    pub importance: f32,
206    pub description: String,
207}
208
209/// Advanced result merging engine
210pub struct AdvancedResultMerger {
211    config: ResultMergingConfig,
212    normalization_cache: HashMap<String, NormalizationParams>,
213    fusion_stats: FusionStatistics,
214}
215
216/// Parameters for score normalization
217#[derive(Debug, Clone)]
218struct NormalizationParams {
219    min_score: f32,
220    max_score: f32,
221    mean_score: f32,
222    std_dev: f32,
223}
224
225/// Statistics for fusion operations
226#[derive(Debug, Clone, Default)]
227pub struct FusionStatistics {
228    pub total_merges: usize,
229    pub average_sources_per_merge: f32,
230    pub score_distribution: HashMap<String, f32>,
231    pub fusion_quality_metrics: HashMap<String, f32>,
232}
233
234impl AdvancedResultMerger {
235    /// Create new result merger
236    pub fn new(config: ResultMergingConfig) -> Self {
237        Self {
238            config,
239            normalization_cache: HashMap::new(),
240            fusion_stats: FusionStatistics::default(),
241        }
242    }
243
244    /// Merge results from multiple sources
245    pub fn merge_results(&mut self, sources: Vec<SourceResult>) -> Result<Vec<MergedResult>> {
246        if sources.is_empty() {
247            return Ok(Vec::new());
248        }
249
250        // Update statistics
251        self.fusion_stats.total_merges += 1;
252        self.fusion_stats.average_sources_per_merge = (self.fusion_stats.average_sources_per_merge
253            * (self.fusion_stats.total_merges - 1) as f32
254            + sources.len() as f32)
255            / self.fusion_stats.total_merges as f32;
256
257        // Step 1: Normalize scores from each source
258        let normalized_sources = self.normalize_sources(&sources)?;
259
260        // Step 2: Collect all unique items
261        let all_items = self.collect_unique_items(&normalized_sources);
262
263        // Step 3: Apply fusion algorithm
264        let mut merged_results = match self.config.fusion_algorithm {
265            RankFusionAlgorithm::CombSUM => self.apply_combsum(&normalized_sources, &all_items)?,
266            RankFusionAlgorithm::CombMNZ => self.apply_combmnz(&normalized_sources, &all_items)?,
267            RankFusionAlgorithm::ReciprocalRankFusion => {
268                self.apply_rrf(&normalized_sources, &all_items)?
269            }
270            RankFusionAlgorithm::BordaFusion => {
271                self.apply_borda(&normalized_sources, &all_items)?
272            }
273            RankFusionAlgorithm::CondorcetFusion => {
274                self.apply_condorcet(&normalized_sources, &all_items)?
275            }
276        };
277
278        // Step 4: Apply score combination strategy
279        merged_results = self.apply_score_combination(merged_results, &normalized_sources)?;
280
281        // Step 5: Calculate confidence intervals if enabled
282        if self.config.confidence_intervals {
283            merged_results =
284                self.calculate_confidence_intervals(merged_results, &normalized_sources)?;
285        }
286
287        // Step 6: Generate explanations if enabled
288        if self.config.enable_explanations {
289            merged_results = self.generate_explanations(merged_results, &normalized_sources)?;
290        }
291
292        // Step 7: Apply diversity enhancement if configured
293        if let Some(diversity_config) = &self.config.diversity_config {
294            if diversity_config.enable {
295                merged_results = self.enhance_diversity(merged_results, diversity_config)?;
296            }
297        }
298
299        // Step 8: Sort by final score
300        merged_results.sort_by(|a, b| b.final_score.partial_cmp(&a.final_score).unwrap());
301
302        Ok(merged_results)
303    }
304
305    /// Normalize scores from all sources
306    fn normalize_sources(&mut self, sources: &[SourceResult]) -> Result<Vec<SourceResult>> {
307        let mut normalized = Vec::new();
308
309        for source in sources {
310            let normalized_source = self.normalize_source(source)?;
311            normalized.push(normalized_source);
312        }
313
314        Ok(normalized)
315    }
316
317    /// Normalize a single source
318    fn normalize_source(&mut self, source: &SourceResult) -> Result<SourceResult> {
319        if source.results.is_empty() {
320            return Ok(source.clone());
321        }
322
323        let scores: Vec<f32> = source.results.iter().map(|r| r.score).collect();
324        let normalization_params = self.calculate_normalization_params(&scores);
325
326        // Cache normalization parameters
327        self.normalization_cache
328            .insert(source.source_id.clone(), normalization_params.clone());
329
330        let normalized_results: Vec<ScoredResult> = source
331            .results
332            .iter()
333            .map(|result| {
334                let normalized_score = self.normalize_score(result.score, &normalization_params);
335                ScoredResult {
336                    item_id: result.item_id.clone(),
337                    score: normalized_score,
338                    rank: result.rank,
339                    vector: result.vector.clone(),
340                    metadata: result.metadata.clone(),
341                }
342            })
343            .collect();
344
345        Ok(SourceResult {
346            source_id: source.source_id.clone(),
347            results: normalized_results,
348            metadata: source.metadata.clone(),
349        })
350    }
351
352    /// Calculate normalization parameters
353    fn calculate_normalization_params(&self, scores: &[f32]) -> NormalizationParams {
354        let min_score = scores.iter().fold(f32::INFINITY, |a, &b| a.min(b));
355        let max_score = scores.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
356        let mean_score = scores.iter().sum::<f32>() / scores.len() as f32;
357
358        let variance = scores
359            .iter()
360            .map(|&x| (x - mean_score).powi(2))
361            .sum::<f32>()
362            / scores.len() as f32;
363        let std_dev = variance.sqrt();
364
365        NormalizationParams {
366            min_score,
367            max_score,
368            mean_score,
369            std_dev,
370        }
371    }
372
373    /// Normalize a single score
374    fn normalize_score(&self, score: f32, params: &NormalizationParams) -> f32 {
375        match self.config.normalization_method {
376            ScoreNormalizationMethod::None => score,
377            ScoreNormalizationMethod::MinMax => {
378                if params.max_score == params.min_score {
379                    0.5 // Default to middle value if no variance
380                } else {
381                    (score - params.min_score) / (params.max_score - params.min_score)
382                }
383            }
384            ScoreNormalizationMethod::ZScore => {
385                if params.std_dev == 0.0 {
386                    0.0 // Default to zero if no variance
387                } else {
388                    (score - params.mean_score) / params.std_dev
389                }
390            }
391            ScoreNormalizationMethod::Softmax => {
392                // For softmax, we need all scores, so this is a simplified version
393                (score - params.min_score).exp()
394            }
395            ScoreNormalizationMethod::Sigmoid => 1.0 / (1.0 + (-score).exp()),
396            ScoreNormalizationMethod::RankBased => {
397                // This would require rank information
398                score / params.max_score
399            }
400        }
401    }
402
403    /// Collect all unique items from sources
404    fn collect_unique_items(&self, sources: &[SourceResult]) -> HashSet<String> {
405        let mut items = HashSet::new();
406        for source in sources {
407            for result in &source.results {
408                items.insert(result.item_id.clone());
409            }
410        }
411        items
412    }
413
414    /// Apply CombSUM fusion algorithm
415    fn apply_combsum(
416        &self,
417        sources: &[SourceResult],
418        items: &HashSet<String>,
419    ) -> Result<Vec<MergedResult>> {
420        let mut merged_results = Vec::new();
421
422        for item_id in items {
423            let mut total_score = 0.0;
424            let mut source_contributions = Vec::new();
425
426            for source in sources {
427                if let Some(result) = source.results.iter().find(|r| r.item_id == *item_id) {
428                    let weight = self
429                        .config
430                        .source_weights
431                        .get(&source.source_id)
432                        .copied()
433                        .unwrap_or(1.0);
434                    let weighted_score = result.score * weight;
435                    total_score += weighted_score;
436
437                    source_contributions.push(SourceContribution {
438                        source_id: source.source_id.clone(),
439                        original_score: result.score,
440                        normalized_score: result.score,
441                        weight,
442                        rank: result.rank,
443                    });
444                }
445            }
446
447            merged_results.push(MergedResult {
448                item_id: item_id.clone(),
449                final_score: total_score,
450                confidence_interval: None,
451                source_contributions,
452                explanation: None,
453                diversity_score: None,
454            });
455        }
456
457        Ok(merged_results)
458    }
459
460    /// Apply CombMNZ fusion algorithm
461    fn apply_combmnz(
462        &self,
463        sources: &[SourceResult],
464        items: &HashSet<String>,
465    ) -> Result<Vec<MergedResult>> {
466        let mut merged_results = Vec::new();
467
468        for item_id in items {
469            let mut total_score = 0.0;
470            let mut non_zero_count = 0;
471            let mut source_contributions = Vec::new();
472
473            for source in sources {
474                if let Some(result) = source.results.iter().find(|r| r.item_id == *item_id) {
475                    let weight = self
476                        .config
477                        .source_weights
478                        .get(&source.source_id)
479                        .copied()
480                        .unwrap_or(1.0);
481                    let weighted_score = result.score * weight;
482
483                    if weighted_score > 0.0 {
484                        total_score += weighted_score;
485                        non_zero_count += 1;
486                    }
487
488                    source_contributions.push(SourceContribution {
489                        source_id: source.source_id.clone(),
490                        original_score: result.score,
491                        normalized_score: result.score,
492                        weight,
493                        rank: result.rank,
494                    });
495                }
496            }
497
498            let final_score = if non_zero_count > 0 {
499                total_score * non_zero_count as f32
500            } else {
501                0.0
502            };
503
504            merged_results.push(MergedResult {
505                item_id: item_id.clone(),
506                final_score,
507                confidence_interval: None,
508                source_contributions,
509                explanation: None,
510                diversity_score: None,
511            });
512        }
513
514        Ok(merged_results)
515    }
516
517    /// Apply Reciprocal Rank Fusion
518    fn apply_rrf(
519        &self,
520        sources: &[SourceResult],
521        items: &HashSet<String>,
522    ) -> Result<Vec<MergedResult>> {
523        let k = 60.0; // RRF constant
524        let mut merged_results = Vec::new();
525
526        for item_id in items {
527            let mut rrf_score = 0.0;
528            let mut source_contributions = Vec::new();
529
530            for source in sources {
531                if let Some(result) = source.results.iter().find(|r| r.item_id == *item_id) {
532                    let weight = self
533                        .config
534                        .source_weights
535                        .get(&source.source_id)
536                        .copied()
537                        .unwrap_or(1.0);
538                    let rrf_contribution = weight / (k + result.rank as f32);
539                    rrf_score += rrf_contribution;
540
541                    source_contributions.push(SourceContribution {
542                        source_id: source.source_id.clone(),
543                        original_score: result.score,
544                        normalized_score: rrf_contribution,
545                        weight,
546                        rank: result.rank,
547                    });
548                }
549            }
550
551            merged_results.push(MergedResult {
552                item_id: item_id.clone(),
553                final_score: rrf_score,
554                confidence_interval: None,
555                source_contributions,
556                explanation: None,
557                diversity_score: None,
558            });
559        }
560
561        Ok(merged_results)
562    }
563
564    /// Apply Borda fusion
565    fn apply_borda(
566        &self,
567        sources: &[SourceResult],
568        items: &HashSet<String>,
569    ) -> Result<Vec<MergedResult>> {
570        let mut merged_results = Vec::new();
571
572        for item_id in items {
573            let mut borda_score = 0.0;
574            let mut source_contributions = Vec::new();
575
576            for source in sources {
577                if let Some(result) = source.results.iter().find(|r| r.item_id == *item_id) {
578                    let weight = self
579                        .config
580                        .source_weights
581                        .get(&source.source_id)
582                        .copied()
583                        .unwrap_or(1.0);
584                    let max_rank = source.results.len() as f32;
585                    let borda_contribution = weight * (max_rank - result.rank as f32);
586                    borda_score += borda_contribution;
587
588                    source_contributions.push(SourceContribution {
589                        source_id: source.source_id.clone(),
590                        original_score: result.score,
591                        normalized_score: borda_contribution,
592                        weight,
593                        rank: result.rank,
594                    });
595                }
596            }
597
598            merged_results.push(MergedResult {
599                item_id: item_id.clone(),
600                final_score: borda_score,
601                confidence_interval: None,
602                source_contributions,
603                explanation: None,
604                diversity_score: None,
605            });
606        }
607
608        Ok(merged_results)
609    }
610
611    /// Apply Condorcet fusion (simplified)
612    fn apply_condorcet(
613        &self,
614        sources: &[SourceResult],
615        items: &HashSet<String>,
616    ) -> Result<Vec<MergedResult>> {
617        // For simplicity, we'll use a vote-based approach
618        // In a full implementation, this would involve pairwise comparisons
619        self.apply_borda(sources, items)
620    }
621
622    /// Apply score combination strategy
623    fn apply_score_combination(
624        &self,
625        mut results: Vec<MergedResult>,
626        _sources: &[SourceResult],
627    ) -> Result<Vec<MergedResult>> {
628        match self.config.combination_strategy {
629            ScoreCombinationStrategy::Average => {
630                for result in &mut results {
631                    if !result.source_contributions.is_empty() {
632                        result.final_score = result
633                            .source_contributions
634                            .iter()
635                            .map(|c| c.normalized_score)
636                            .sum::<f32>()
637                            / result.source_contributions.len() as f32;
638                    }
639                }
640            }
641            ScoreCombinationStrategy::WeightedSum => {
642                // Already handled in fusion algorithms
643            }
644            ScoreCombinationStrategy::Maximum => {
645                for result in &mut results {
646                    result.final_score = result
647                        .source_contributions
648                        .iter()
649                        .map(|c| c.normalized_score)
650                        .fold(0.0, f32::max);
651                }
652            }
653            ScoreCombinationStrategy::Minimum => {
654                for result in &mut results {
655                    result.final_score = result
656                        .source_contributions
657                        .iter()
658                        .map(|c| c.normalized_score)
659                        .fold(f32::INFINITY, f32::min);
660                }
661            }
662            ScoreCombinationStrategy::GeometricMean => {
663                for result in &mut results {
664                    let product: f32 = result
665                        .source_contributions
666                        .iter()
667                        .map(|c| c.normalized_score.max(0.001)) // Avoid zero values
668                        .product();
669                    result.final_score =
670                        product.powf(1.0 / result.source_contributions.len() as f32);
671                }
672            }
673            _ => {
674                // Other strategies would be implemented here
675            }
676        }
677
678        Ok(results)
679    }
680
681    /// Calculate confidence intervals
682    fn calculate_confidence_intervals(
683        &self,
684        mut results: Vec<MergedResult>,
685        _sources: &[SourceResult],
686    ) -> Result<Vec<MergedResult>> {
687        for result in &mut results {
688            if result.source_contributions.len() > 1 {
689                let scores: Vec<f32> = result
690                    .source_contributions
691                    .iter()
692                    .map(|c| c.normalized_score)
693                    .collect();
694
695                let mean = scores.iter().sum::<f32>() / scores.len() as f32;
696                let variance =
697                    scores.iter().map(|&x| (x - mean).powi(2)).sum::<f32>() / scores.len() as f32;
698                let std_dev = variance.sqrt();
699
700                // 95% confidence interval (approximation)
701                let margin = 1.96 * std_dev / (scores.len() as f32).sqrt();
702
703                result.confidence_interval = Some(ConfidenceInterval {
704                    lower_bound: (mean - margin).max(0.0),
705                    upper_bound: (mean + margin).min(1.0),
706                    confidence_level: 0.95,
707                });
708            }
709        }
710
711        Ok(results)
712    }
713
714    /// Generate explanations for results
715    fn generate_explanations(
716        &self,
717        mut results: Vec<MergedResult>,
718        _sources: &[SourceResult],
719    ) -> Result<Vec<MergedResult>> {
720        for result in &mut results {
721            let mut ranking_factors = Vec::new();
722            let mut score_breakdown = HashMap::new();
723
724            // Analyze source contributions
725            for contribution in &result.source_contributions {
726                ranking_factors.push(RankingFactor {
727                    factor_name: format!("Source: {}", contribution.source_id),
728                    importance: contribution.normalized_score,
729                    description: format!(
730                        "Contribution from {} with weight {}",
731                        contribution.source_id, contribution.weight
732                    ),
733                });
734
735                score_breakdown.insert(
736                    contribution.source_id.clone(),
737                    contribution.normalized_score,
738                );
739            }
740
741            result.explanation = Some(ResultExplanation {
742                ranking_factors,
743                score_breakdown,
744                similar_items: Vec::new(), // Would be populated in a full implementation
745                differentiating_features: Vec::new(), // Would be populated in a full implementation
746            });
747        }
748
749        Ok(results)
750    }
751
752    /// Enhance diversity of results
753    fn enhance_diversity(
754        &self,
755        results: Vec<MergedResult>,
756        diversity_config: &DiversityConfig,
757    ) -> Result<Vec<MergedResult>> {
758        if results.len() <= diversity_config.max_diverse_results {
759            return Ok(results);
760        }
761
762        // Simple diversity enhancement using Maximum Marginal Relevance (MMR)
763        let mut selected = Vec::new();
764        let mut remaining = results;
765
766        // Always select the top result first
767        if !remaining.is_empty() {
768            let top_result = remaining.remove(0);
769            selected.push(top_result);
770        }
771
772        // Select remaining results balancing relevance and diversity
773        while selected.len() < diversity_config.max_diverse_results && !remaining.is_empty() {
774            let mut best_idx = 0;
775            let mut best_mmr = f32::NEG_INFINITY;
776
777            for (i, candidate) in remaining.iter().enumerate() {
778                // Calculate MMR score
779                let relevance = candidate.final_score;
780                let max_similarity =
781                    self.calculate_max_similarity_to_selected(candidate, &selected);
782                let mmr = diversity_config.diversity_weight * relevance
783                    - (1.0 - diversity_config.diversity_weight) * max_similarity;
784
785                if mmr > best_mmr {
786                    best_mmr = mmr;
787                    best_idx = i;
788                }
789            }
790
791            let selected_result = remaining.remove(best_idx);
792            selected.push(selected_result);
793        }
794
795        // Add diversity scores
796        for result in &mut selected {
797            result.diversity_score = Some(0.8); // Placeholder - would be calculated properly
798        }
799
800        Ok(selected)
801    }
802
803    /// Calculate maximum similarity to already selected results
804    fn calculate_max_similarity_to_selected(
805        &self,
806        candidate: &MergedResult,
807        selected: &[MergedResult],
808    ) -> f32 {
809        if selected.is_empty() {
810            return 0.0;
811        }
812
813        // Simplified similarity calculation
814        // In a full implementation, this would use actual vector similarities
815        let mut max_similarity: f32 = 0.0;
816
817        for selected_result in selected {
818            // Simple similarity based on score difference
819            let similarity: f32 = 1.0 - (candidate.final_score - selected_result.final_score).abs();
820            max_similarity = max_similarity.max(similarity);
821        }
822
823        max_similarity
824    }
825
826    /// Get fusion statistics
827    pub fn get_statistics(&self) -> &FusionStatistics {
828        &self.fusion_stats
829    }
830
831    /// Reset statistics
832    pub fn reset_statistics(&mut self) {
833        self.fusion_stats = FusionStatistics::default();
834    }
835}
836
837#[cfg(test)]
838mod tests {
839    use super::*;
840    use std::time::Duration;
841
842    fn create_test_source(source_id: &str, results: Vec<(String, f32, usize)>) -> SourceResult {
843        let scored_results = results
844            .into_iter()
845            .map(|(id, score, rank)| ScoredResult {
846                item_id: id,
847                score,
848                rank,
849                vector: None,
850                metadata: None,
851            })
852            .collect();
853
854        SourceResult {
855            source_id: source_id.to_string(),
856            results: scored_results,
857            metadata: ResultMetadata {
858                source_type: SourceType::VectorSearch,
859                algorithm_used: "test".to_string(),
860                total_candidates: 100,
861                processing_time: Duration::from_millis(10),
862                quality_metrics: HashMap::new(),
863            },
864        }
865    }
866
867    #[test]
868    fn test_combsum_fusion() {
869        let config = ResultMergingConfig::default();
870        let mut merger = AdvancedResultMerger::new(config);
871
872        let source1 = create_test_source(
873            "source1",
874            vec![("doc1".to_string(), 0.9, 1), ("doc2".to_string(), 0.8, 2)],
875        );
876
877        let source2 = create_test_source(
878            "source2",
879            vec![("doc1".to_string(), 0.7, 1), ("doc3".to_string(), 0.6, 2)],
880        );
881
882        let merged = merger.merge_results(vec![source1, source2]).unwrap();
883
884        assert_eq!(merged.len(), 3); // doc1, doc2, doc3
885
886        // doc1 should have the highest score (appears in both sources)
887        let doc1_result = merged.iter().find(|r| r.item_id == "doc1").unwrap();
888        assert!(doc1_result.final_score > 1.0); // Should be sum of normalized scores
889    }
890
891    #[test]
892    fn test_reciprocal_rank_fusion() {
893        let config = ResultMergingConfig {
894            fusion_algorithm: RankFusionAlgorithm::ReciprocalRankFusion,
895            ..Default::default()
896        };
897
898        let mut merger = AdvancedResultMerger::new(config);
899
900        let source1 = create_test_source(
901            "source1",
902            vec![("doc1".to_string(), 0.9, 1), ("doc2".to_string(), 0.8, 2)],
903        );
904
905        let source2 = create_test_source(
906            "source2",
907            vec![("doc2".to_string(), 0.7, 1), ("doc1".to_string(), 0.6, 2)],
908        );
909
910        let merged = merger.merge_results(vec![source1, source2]).unwrap();
911
912        assert_eq!(merged.len(), 2);
913
914        // Both documents appear in both sources, so both should have RRF scores
915        for result in &merged {
916            assert!(result.final_score > 0.0);
917            assert_eq!(result.source_contributions.len(), 2);
918        }
919    }
920
921    #[test]
922    fn test_confidence_intervals() {
923        let config = ResultMergingConfig {
924            confidence_intervals: true,
925            ..Default::default()
926        };
927
928        let mut merger = AdvancedResultMerger::new(config);
929
930        let source1 = create_test_source("source1", vec![("doc1".to_string(), 0.9, 1)]);
931
932        let source2 = create_test_source("source2", vec![("doc1".to_string(), 0.7, 1)]);
933
934        let merged = merger.merge_results(vec![source1, source2]).unwrap();
935
936        assert_eq!(merged.len(), 1);
937
938        let result = &merged[0];
939        assert!(result.confidence_interval.is_some());
940
941        let ci = result.confidence_interval.as_ref().unwrap();
942        assert!(ci.lower_bound <= ci.upper_bound);
943        assert_eq!(ci.confidence_level, 0.95);
944    }
945
946    #[test]
947    fn test_score_normalization() {
948        let config = ResultMergingConfig {
949            normalization_method: ScoreNormalizationMethod::MinMax,
950            ..Default::default()
951        };
952
953        let mut merger = AdvancedResultMerger::new(config);
954
955        let source = create_test_source(
956            "source1",
957            vec![
958                ("doc1".to_string(), 10.0, 1),
959                ("doc2".to_string(), 5.0, 2),
960                ("doc3".to_string(), 0.0, 3),
961            ],
962        );
963
964        let normalized = merger.normalize_source(&source).unwrap();
965
966        // After min-max normalization, scores should be in [0, 1]
967        for result in &normalized.results {
968            assert!(result.score >= 0.0 && result.score <= 1.0);
969        }
970    }
971}
oxirs_vec/advanced_result_merging.rs

oxirs_vec/
advanced_result_merging.rs