reasonkit_web/research/
consensus.rs

1//! Consensus Extraction and Conflict Detection
2//!
3//! Analyzes multiple sources to extract consensus and identify discrepancies.
4
5use super::verification::{Evidence, VerificationStatus, VerifiedSource};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9/// A claim extracted from content
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct Claim {
12    /// The claim text
13    pub text: String,
14    /// Normalized/canonicalized form for comparison
15    pub normalized: String,
16    /// Key entities mentioned
17    pub entities: Vec<String>,
18    /// Keywords for matching
19    pub keywords: Vec<String>,
20    /// Claim category (if detected)
21    pub category: Option<ClaimCategory>,
22}
23
24/// Categories of claims
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26#[serde(rename_all = "snake_case")]
27pub enum ClaimCategory {
28    /// Factual assertion (can be true/false)
29    Factual,
30    /// Statistical claim (numbers, percentages)
31    Statistical,
32    /// Temporal claim (dates, timelines)
33    Temporal,
34    /// Attribution (who said/did what)
35    Attribution,
36    /// Definition (what something is)
37    Definition,
38    /// Causal claim (X causes Y)
39    Causal,
40    /// Opinion/subjective
41    Opinion,
42}
43
44/// Status of a claim based on sources
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
46#[serde(rename_all = "snake_case")]
47pub enum ClaimStatus {
48    /// Supported by majority of sources
49    Supported,
50    /// Refuted by majority of sources
51    Refuted,
52    /// Sources disagree
53    Disputed,
54    /// Not enough sources to determine
55    Inconclusive,
56    /// No sources found
57    NoData,
58}
59
60impl ClaimStatus {
61    /// Get emoji representation
62    pub fn emoji(&self) -> &'static str {
63        match self {
64            Self::Supported => "\u{2705}",    // Green check
65            Self::Refuted => "\u{274c}",      // Red X
66            Self::Disputed => "\u{26a0}",     // Warning
67            Self::Inconclusive => "\u{2753}", // Question
68            Self::NoData => "\u{2796}",       // Minus
69        }
70    }
71}
72
73/// A discrepancy found between sources
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct Discrepancy {
76    /// What aspect differs
77    pub aspect: String,
78    /// Values from different sources
79    pub values: Vec<DiscrepancyValue>,
80    /// Severity (0.0 = minor, 1.0 = critical)
81    pub severity: f64,
82    /// Possible explanations
83    pub explanations: Vec<String>,
84}
85
86/// A single value in a discrepancy
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct DiscrepancyValue {
89    /// The value
90    pub value: String,
91    /// Source URL
92    pub source_url: String,
93    /// Source tier weight
94    pub source_weight: f64,
95}
96
97/// Result of consensus analysis
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct ConsensusResult {
100    /// Original claim/query
101    pub claim: Claim,
102    /// Overall status
103    pub status: ClaimStatus,
104    /// Confidence in the consensus (0.0 - 1.0)
105    pub confidence: f64,
106    /// The consensus answer (if found)
107    pub consensus_answer: Option<String>,
108    /// Evidence supporting the claim
109    pub supporting_evidence: Vec<Evidence>,
110    /// Evidence refuting the claim
111    pub refuting_evidence: Vec<Evidence>,
112    /// Discrepancies found
113    pub discrepancies: Vec<Discrepancy>,
114    /// Summary of findings
115    pub summary: String,
116}
117
118/// Consensus analyzer
119pub struct ConsensusAnalyzer {
120    /// Minimum agreement ratio for consensus
121    min_agreement_ratio: f64,
122    /// Minimum sources for consensus
123    min_sources: usize,
124}
125
126impl ConsensusAnalyzer {
127    /// Create a new consensus analyzer
128    pub fn new() -> Self {
129        Self {
130            min_agreement_ratio: 0.7,
131            min_sources: 3,
132        }
133    }
134
135    /// Configure minimum agreement ratio
136    pub fn with_min_agreement(mut self, ratio: f64) -> Self {
137        self.min_agreement_ratio = ratio.clamp(0.0, 1.0);
138        self
139    }
140
141    /// Configure minimum sources
142    pub fn with_min_sources(mut self, count: usize) -> Self {
143        self.min_sources = count.max(1);
144        self
145    }
146
147    /// Analyze sources for consensus
148    pub fn analyze(&self, claim: Claim, sources: &[VerifiedSource]) -> ConsensusResult {
149        let usable_sources: Vec<&VerifiedSource> =
150            sources.iter().filter(|s| s.is_usable()).collect();
151
152        if usable_sources.len() < self.min_sources {
153            return ConsensusResult {
154                claim,
155                status: ClaimStatus::Inconclusive,
156                confidence: 0.0,
157                consensus_answer: None,
158                supporting_evidence: Vec::new(),
159                refuting_evidence: Vec::new(),
160                discrepancies: Vec::new(),
161                summary: format!(
162                    "Insufficient sources: {} found, {} required",
163                    usable_sources.len(),
164                    self.min_sources
165                ),
166            };
167        }
168
169        // Collect supporting and refuting evidence
170        let mut supporting: Vec<Evidence> = Vec::new();
171        let mut refuting: Vec<Evidence> = Vec::new();
172        let mut values_found: HashMap<String, Vec<(String, f64)>> = HashMap::new();
173
174        for source in &usable_sources {
175            if let Some(supports) = source.supports_claim {
176                let evidence = Evidence {
177                    source_url: source.url.clone(),
178                    quote: source
179                        .content_snippet
180                        .clone()
181                        .unwrap_or_else(|| "[No snippet]".to_string()),
182                    supports,
183                    confidence: source.weighted_confidence(),
184                    position: None,
185                };
186
187                if supports {
188                    supporting.push(evidence);
189                } else {
190                    refuting.push(evidence);
191                }
192            }
193
194            // Track values for discrepancy detection
195            if let Some(snippet) = &source.content_snippet {
196                // Simple value extraction - in production, use NLP
197                let value_key = "primary_value".to_string();
198                values_found
199                    .entry(value_key)
200                    .or_default()
201                    .push((snippet.clone(), source.quality.tier.weight()));
202            }
203        }
204
205        // Calculate agreement
206        let total_opinionated = supporting.len() + refuting.len();
207        let agreement_ratio = if total_opinionated > 0 {
208            supporting.len() as f64 / total_opinionated as f64
209        } else {
210            0.5 // Neutral if no opinions
211        };
212
213        let refutation_ratio = if total_opinionated > 0 {
214            refuting.len() as f64 / total_opinionated as f64
215        } else {
216            0.0
217        };
218
219        // Detect discrepancies
220        let discrepancies = self.detect_discrepancies(&values_found);
221
222        // Determine status
223        let status = if usable_sources.is_empty() {
224            ClaimStatus::NoData
225        } else if total_opinionated == 0 {
226            ClaimStatus::Inconclusive
227        } else if refutation_ratio > self.min_agreement_ratio {
228            ClaimStatus::Refuted
229        } else if agreement_ratio >= self.min_agreement_ratio && discrepancies.is_empty() {
230            ClaimStatus::Supported
231        } else if !supporting.is_empty() && !refuting.is_empty() {
232            ClaimStatus::Disputed
233        } else {
234            ClaimStatus::Inconclusive
235        };
236
237        // Calculate confidence
238        let base_confidence = if status == ClaimStatus::Supported {
239            agreement_ratio
240        } else if status == ClaimStatus::Refuted {
241            refutation_ratio
242        } else {
243            0.5
244        };
245
246        // Weight by source quality
247        let quality_factor: f64 = usable_sources
248            .iter()
249            .map(|s| s.quality.tier.weight())
250            .sum::<f64>()
251            / usable_sources.len() as f64;
252
253        let confidence = base_confidence * quality_factor;
254
255        // Generate consensus answer
256        let consensus_answer = if status == ClaimStatus::Supported {
257            self.extract_consensus_answer(&supporting)
258        } else if status == ClaimStatus::Refuted {
259            self.extract_consensus_answer(&refuting)
260        } else {
261            None
262        };
263
264        // Generate summary
265        let summary = self.generate_summary(&status, &usable_sources, &discrepancies);
266
267        ConsensusResult {
268            claim,
269            status,
270            confidence,
271            consensus_answer,
272            supporting_evidence: supporting,
273            refuting_evidence: refuting,
274            discrepancies,
275            summary,
276        }
277    }
278
279    /// Detect discrepancies between source values
280    fn detect_discrepancies(
281        &self,
282        values: &HashMap<String, Vec<(String, f64)>>,
283    ) -> Vec<Discrepancy> {
284        let mut discrepancies = Vec::new();
285
286        for (aspect, vals) in values {
287            if vals.len() < 2 {
288                continue;
289            }
290
291            // Simple string comparison - in production, use semantic similarity
292            let unique_values: Vec<&str> = vals
293                .iter()
294                .map(|(v, _)| v.as_str())
295                .collect::<std::collections::HashSet<_>>()
296                .into_iter()
297                .collect();
298
299            if unique_values.len() > 1 {
300                // Discrepancy found
301                let severity = if unique_values.len() == vals.len() {
302                    0.9 // All different
303                } else {
304                    0.5 // Some overlap
305                };
306
307                let values_list: Vec<DiscrepancyValue> = vals
308                    .iter()
309                    .map(|(v, w)| DiscrepancyValue {
310                        value: v.clone(),
311                        source_url: "[source]".to_string(),
312                        source_weight: *w,
313                    })
314                    .collect();
315
316                discrepancies.push(Discrepancy {
317                    aspect: aspect.clone(),
318                    values: values_list,
319                    severity,
320                    explanations: vec![
321                        "Sources report different values".to_string(),
322                        "May reflect different measurement methodologies".to_string(),
323                    ],
324                });
325            }
326        }
327
328        discrepancies
329    }
330
331    /// Extract consensus answer from evidence
332    fn extract_consensus_answer(&self, evidence: &[Evidence]) -> Option<String> {
333        if evidence.is_empty() {
334            return None;
335        }
336
337        // Find highest confidence evidence
338        let best = evidence.iter().max_by(|a, b| {
339            a.confidence
340                .partial_cmp(&b.confidence)
341                .unwrap_or(std::cmp::Ordering::Equal)
342        })?;
343
344        Some(best.quote.clone())
345    }
346
347    /// Generate summary text
348    fn generate_summary(
349        &self,
350        status: &ClaimStatus,
351        sources: &[&VerifiedSource],
352        discrepancies: &[Discrepancy],
353    ) -> String {
354        let tier1_count = sources
355            .iter()
356            .filter(|s| s.quality.tier == super::sources::SourceTier::Tier1)
357            .count();
358        let tier2_count = sources
359            .iter()
360            .filter(|s| s.quality.tier == super::sources::SourceTier::Tier2)
361            .count();
362
363        let status_text = match status {
364            ClaimStatus::Supported => "VERIFIED",
365            ClaimStatus::Refuted => "REFUTED",
366            ClaimStatus::Disputed => "DISPUTED",
367            ClaimStatus::Inconclusive => "INCONCLUSIVE",
368            ClaimStatus::NoData => "NO DATA",
369        };
370
371        let discrepancy_note = if discrepancies.is_empty() {
372            "No discrepancies found.".to_string()
373        } else {
374            format!(
375                "{} discrepancies detected (review recommended).",
376                discrepancies.len()
377            )
378        };
379
380        format!(
381            "{}: Based on {} sources ({} Tier 1, {} Tier 2). {}",
382            status_text,
383            sources.len(),
384            tier1_count,
385            tier2_count,
386            discrepancy_note
387        )
388    }
389
390    /// Determine overall verification status from consensus
391    pub fn to_verification_status(&self, consensus: &ConsensusResult) -> VerificationStatus {
392        match consensus.status {
393            ClaimStatus::Supported if consensus.confidence >= 0.7 => VerificationStatus::Verified,
394            ClaimStatus::Supported => VerificationStatus::PartiallyVerified,
395            ClaimStatus::Refuted => VerificationStatus::Refuted,
396            ClaimStatus::Disputed => VerificationStatus::Conflicting,
397            ClaimStatus::Inconclusive | ClaimStatus::NoData => VerificationStatus::Unverified,
398        }
399    }
400}
401
402impl Default for ConsensusAnalyzer {
403    fn default() -> Self {
404        Self::new()
405    }
406}
407
408/// Normalize text for comparison
409pub fn normalize_text(text: &str) -> String {
410    text.to_lowercase()
411        .split_whitespace()
412        .collect::<Vec<_>>()
413        .join(" ")
414        .trim()
415        .to_string()
416}
417
418/// Extract keywords from text (simple implementation)
419pub fn extract_keywords(text: &str) -> Vec<String> {
420    let stop_words = [
421        "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had",
422        "do", "does", "did", "will", "would", "could", "should", "may", "might", "must", "shall",
423        "can", "need", "dare", "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
424        "from", "as", "into", "through", "during", "before", "after", "above", "below", "between",
425        "under", "again", "further", "then", "once", "here", "there", "when", "where", "why",
426        "how", "all", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not",
427        "only", "own", "same", "so", "than", "too", "very", "just", "and", "but", "if", "or",
428        "because", "until", "while", "this", "that", "these", "those", "it", "its",
429    ];
430
431    text.to_lowercase()
432        .split(|c: char| !c.is_alphanumeric())
433        .filter(|word| word.len() > 2 && !stop_words.contains(&word.to_lowercase().as_str()))
434        .map(String::from)
435        .collect()
436}
437
438#[cfg(test)]
439mod tests {
440    use super::*;
441    use crate::research::sources::{SourceQuality, SourceTier};
442
443    #[test]
444    fn test_claim_status_emoji() {
445        assert!(!ClaimStatus::Supported.emoji().is_empty());
446        assert!(!ClaimStatus::Refuted.emoji().is_empty());
447        assert!(!ClaimStatus::Disputed.emoji().is_empty());
448    }
449
450    #[test]
451    fn test_normalize_text() {
452        assert_eq!(normalize_text("  Hello   World  "), "hello world");
453        assert_eq!(normalize_text("UPPERCASE"), "uppercase");
454    }
455
456    #[test]
457    fn test_extract_keywords() {
458        let text = "The Rust programming language is designed for safety and performance.";
459        let keywords = extract_keywords(text);
460
461        assert!(keywords.contains(&"rust".to_string()));
462        assert!(keywords.contains(&"programming".to_string()));
463        assert!(keywords.contains(&"safety".to_string()));
464        assert!(keywords.contains(&"performance".to_string()));
465
466        // Stop words should be filtered
467        assert!(!keywords.contains(&"the".to_string()));
468        assert!(!keywords.contains(&"is".to_string()));
469        assert!(!keywords.contains(&"and".to_string()));
470    }
471
472    #[test]
473    fn test_consensus_analyzer_insufficient_sources() {
474        let analyzer = ConsensusAnalyzer::new();
475        let claim = Claim {
476            text: "Test claim".to_string(),
477            normalized: "test claim".to_string(),
478            entities: vec![],
479            keywords: vec!["test".to_string()],
480            category: Some(ClaimCategory::Factual),
481        };
482
483        let sources = vec![VerifiedSource::new(
484            "https://example.com".to_string(),
485            SourceQuality {
486                tier: SourceTier::Tier1,
487                ..Default::default()
488            },
489        )];
490
491        let result = analyzer.analyze(claim, &sources);
492        assert_eq!(result.status, ClaimStatus::Inconclusive);
493    }
494
495    #[test]
496    fn test_consensus_analyzer_supported() {
497        let analyzer = ConsensusAnalyzer::new();
498        let claim = Claim {
499            text: "Test claim".to_string(),
500            normalized: "test claim".to_string(),
501            entities: vec![],
502            keywords: vec!["test".to_string()],
503            category: Some(ClaimCategory::Factual),
504        };
505
506        let mut sources: Vec<VerifiedSource> = Vec::new();
507        for i in 0..4 {
508            let mut source = VerifiedSource::new(
509                format!("https://source{}.com", i),
510                SourceQuality {
511                    tier: SourceTier::Tier1,
512                    confidence: 0.9,
513                    ..Default::default()
514                },
515            );
516            source.supports_claim = Some(true);
517            source.relevance_score = 0.8;
518            source.http_status = Some(200);
519            sources.push(source);
520        }
521
522        let result = analyzer.analyze(claim, &sources);
523        assert_eq!(result.status, ClaimStatus::Supported);
524        assert!(result.confidence > 0.5);
525    }
526
527    #[test]
528    fn test_consensus_analyzer_disputed() {
529        let analyzer = ConsensusAnalyzer::new();
530        let claim = Claim {
531            text: "Test claim".to_string(),
532            normalized: "test claim".to_string(),
533            entities: vec![],
534            keywords: vec!["test".to_string()],
535            category: Some(ClaimCategory::Factual),
536        };
537
538        let mut sources: Vec<VerifiedSource> = Vec::new();
539
540        // 2 supporting
541        for i in 0..2 {
542            let mut source = VerifiedSource::new(
543                format!("https://source{}.com", i),
544                SourceQuality {
545                    tier: SourceTier::Tier1,
546                    confidence: 0.9,
547                    ..Default::default()
548                },
549            );
550            source.supports_claim = Some(true);
551            source.http_status = Some(200);
552            sources.push(source);
553        }
554
555        // 2 refuting
556        for i in 2..4 {
557            let mut source = VerifiedSource::new(
558                format!("https://source{}.com", i),
559                SourceQuality {
560                    tier: SourceTier::Tier1,
561                    confidence: 0.9,
562                    ..Default::default()
563                },
564            );
565            source.supports_claim = Some(false);
566            source.http_status = Some(200);
567            sources.push(source);
568        }
569
570        let result = analyzer.analyze(claim, &sources);
571        assert_eq!(result.status, ClaimStatus::Disputed);
572    }
573}
reasonkit_web/research/consensus.rs

reasonkit_web/research/
consensus.rs