use super::verification::{Evidence, VerificationStatus, VerifiedSource};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Claim {
pub text: String,
pub normalized: String,
pub entities: Vec<String>,
pub keywords: Vec<String>,
pub category: Option<ClaimCategory>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ClaimCategory {
Factual,
Statistical,
Temporal,
Attribution,
Definition,
Causal,
Opinion,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ClaimStatus {
Supported,
Refuted,
Disputed,
Inconclusive,
NoData,
}
impl ClaimStatus {
pub fn emoji(&self) -> &'static str {
match self {
Self::Supported => "\u{2705}", Self::Refuted => "\u{274c}", Self::Disputed => "\u{26a0}", Self::Inconclusive => "\u{2753}", Self::NoData => "\u{2796}", }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Discrepancy {
pub aspect: String,
pub values: Vec<DiscrepancyValue>,
pub severity: f64,
pub explanations: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiscrepancyValue {
pub value: String,
pub source_url: String,
pub source_weight: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConsensusResult {
pub claim: Claim,
pub status: ClaimStatus,
pub confidence: f64,
pub consensus_answer: Option<String>,
pub supporting_evidence: Vec<Evidence>,
pub refuting_evidence: Vec<Evidence>,
pub discrepancies: Vec<Discrepancy>,
pub summary: String,
}
pub struct ConsensusAnalyzer {
min_agreement_ratio: f64,
min_sources: usize,
}
impl ConsensusAnalyzer {
pub fn new() -> Self {
Self {
min_agreement_ratio: 0.7,
min_sources: 3,
}
}
pub fn with_min_agreement(mut self, ratio: f64) -> Self {
self.min_agreement_ratio = ratio.clamp(0.0, 1.0);
self
}
pub fn with_min_sources(mut self, count: usize) -> Self {
self.min_sources = count.max(1);
self
}
pub fn analyze(&self, claim: Claim, sources: &[VerifiedSource]) -> ConsensusResult {
let usable_sources: Vec<&VerifiedSource> =
sources.iter().filter(|s| s.is_usable()).collect();
if usable_sources.len() < self.min_sources {
return ConsensusResult {
claim,
status: ClaimStatus::Inconclusive,
confidence: 0.0,
consensus_answer: None,
supporting_evidence: Vec::new(),
refuting_evidence: Vec::new(),
discrepancies: Vec::new(),
summary: format!(
"Insufficient sources: {} found, {} required",
usable_sources.len(),
self.min_sources
),
};
}
let mut supporting: Vec<Evidence> = Vec::new();
let mut refuting: Vec<Evidence> = Vec::new();
let mut values_found: HashMap<String, Vec<(String, f64)>> = HashMap::new();
for source in &usable_sources {
if let Some(supports) = source.supports_claim {
let evidence = Evidence {
source_url: source.url.clone(),
quote: source
.content_snippet
.clone()
.unwrap_or_else(|| "[No snippet]".to_string()),
supports,
confidence: source.weighted_confidence(),
position: None,
};
if supports {
supporting.push(evidence);
} else {
refuting.push(evidence);
}
}
if let Some(snippet) = &source.content_snippet {
let value_key = "primary_value".to_string();
values_found
.entry(value_key)
.or_default()
.push((snippet.clone(), source.quality.tier.weight()));
}
}
let total_opinionated = supporting.len() + refuting.len();
let agreement_ratio = if total_opinionated > 0 {
supporting.len() as f64 / total_opinionated as f64
} else {
0.5 };
let refutation_ratio = if total_opinionated > 0 {
refuting.len() as f64 / total_opinionated as f64
} else {
0.0
};
let discrepancies = self.detect_discrepancies(&values_found);
let status = if usable_sources.is_empty() {
ClaimStatus::NoData
} else if total_opinionated == 0 {
ClaimStatus::Inconclusive
} else if refutation_ratio > self.min_agreement_ratio {
ClaimStatus::Refuted
} else if agreement_ratio >= self.min_agreement_ratio && discrepancies.is_empty() {
ClaimStatus::Supported
} else if !supporting.is_empty() && !refuting.is_empty() {
ClaimStatus::Disputed
} else {
ClaimStatus::Inconclusive
};
let base_confidence = if status == ClaimStatus::Supported {
agreement_ratio
} else if status == ClaimStatus::Refuted {
refutation_ratio
} else {
0.5
};
let quality_factor: f64 = usable_sources
.iter()
.map(|s| s.quality.tier.weight())
.sum::<f64>()
/ usable_sources.len() as f64;
let confidence = base_confidence * quality_factor;
let consensus_answer = if status == ClaimStatus::Supported {
self.extract_consensus_answer(&supporting)
} else if status == ClaimStatus::Refuted {
self.extract_consensus_answer(&refuting)
} else {
None
};
let summary = self.generate_summary(&status, &usable_sources, &discrepancies);
ConsensusResult {
claim,
status,
confidence,
consensus_answer,
supporting_evidence: supporting,
refuting_evidence: refuting,
discrepancies,
summary,
}
}
fn detect_discrepancies(
&self,
values: &HashMap<String, Vec<(String, f64)>>,
) -> Vec<Discrepancy> {
let mut discrepancies = Vec::new();
for (aspect, vals) in values {
if vals.len() < 2 {
continue;
}
let unique_values: Vec<&str> = vals
.iter()
.map(|(v, _)| v.as_str())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
if unique_values.len() > 1 {
let severity = if unique_values.len() == vals.len() {
0.9 } else {
0.5 };
let values_list: Vec<DiscrepancyValue> = vals
.iter()
.map(|(v, w)| DiscrepancyValue {
value: v.clone(),
source_url: "[source]".to_string(),
source_weight: *w,
})
.collect();
discrepancies.push(Discrepancy {
aspect: aspect.clone(),
values: values_list,
severity,
explanations: vec![
"Sources report different values".to_string(),
"May reflect different measurement methodologies".to_string(),
],
});
}
}
discrepancies
}
fn extract_consensus_answer(&self, evidence: &[Evidence]) -> Option<String> {
if evidence.is_empty() {
return None;
}
let best = evidence.iter().max_by(|a, b| {
a.confidence
.partial_cmp(&b.confidence)
.unwrap_or(std::cmp::Ordering::Equal)
})?;
Some(best.quote.clone())
}
fn generate_summary(
&self,
status: &ClaimStatus,
sources: &[&VerifiedSource],
discrepancies: &[Discrepancy],
) -> String {
let tier1_count = sources
.iter()
.filter(|s| s.quality.tier == super::sources::SourceTier::Tier1)
.count();
let tier2_count = sources
.iter()
.filter(|s| s.quality.tier == super::sources::SourceTier::Tier2)
.count();
let status_text = match status {
ClaimStatus::Supported => "VERIFIED",
ClaimStatus::Refuted => "REFUTED",
ClaimStatus::Disputed => "DISPUTED",
ClaimStatus::Inconclusive => "INCONCLUSIVE",
ClaimStatus::NoData => "NO DATA",
};
let discrepancy_note = if discrepancies.is_empty() {
"No discrepancies found.".to_string()
} else {
format!(
"{} discrepancies detected (review recommended).",
discrepancies.len()
)
};
format!(
"{}: Based on {} sources ({} Tier 1, {} Tier 2). {}",
status_text,
sources.len(),
tier1_count,
tier2_count,
discrepancy_note
)
}
pub fn to_verification_status(&self, consensus: &ConsensusResult) -> VerificationStatus {
match consensus.status {
ClaimStatus::Supported if consensus.confidence >= 0.7 => VerificationStatus::Verified,
ClaimStatus::Supported => VerificationStatus::PartiallyVerified,
ClaimStatus::Refuted => VerificationStatus::Refuted,
ClaimStatus::Disputed => VerificationStatus::Conflicting,
ClaimStatus::Inconclusive | ClaimStatus::NoData => VerificationStatus::Unverified,
}
}
}
impl Default for ConsensusAnalyzer {
fn default() -> Self {
Self::new()
}
}
pub fn normalize_text(text: &str) -> String {
text.to_lowercase()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_string()
}
pub fn extract_keywords(text: &str) -> Vec<String> {
let stop_words = [
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had",
"do", "does", "did", "will", "would", "could", "should", "may", "might", "must", "shall",
"can", "need", "dare", "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
"from", "as", "into", "through", "during", "before", "after", "above", "below", "between",
"under", "again", "further", "then", "once", "here", "there", "when", "where", "why",
"how", "all", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not",
"only", "own", "same", "so", "than", "too", "very", "just", "and", "but", "if", "or",
"because", "until", "while", "this", "that", "these", "those", "it", "its",
];
text.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|word| word.len() > 2 && !stop_words.contains(&word.to_lowercase().as_str()))
.map(String::from)
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::research::sources::{SourceQuality, SourceTier};
#[test]
fn test_claim_status_emoji() {
assert!(!ClaimStatus::Supported.emoji().is_empty());
assert!(!ClaimStatus::Refuted.emoji().is_empty());
assert!(!ClaimStatus::Disputed.emoji().is_empty());
}
#[test]
fn test_normalize_text() {
assert_eq!(normalize_text(" Hello World "), "hello world");
assert_eq!(normalize_text("UPPERCASE"), "uppercase");
}
#[test]
fn test_extract_keywords() {
let text = "The Rust programming language is designed for safety and performance.";
let keywords = extract_keywords(text);
assert!(keywords.contains(&"rust".to_string()));
assert!(keywords.contains(&"programming".to_string()));
assert!(keywords.contains(&"safety".to_string()));
assert!(keywords.contains(&"performance".to_string()));
assert!(!keywords.contains(&"the".to_string()));
assert!(!keywords.contains(&"is".to_string()));
assert!(!keywords.contains(&"and".to_string()));
}
#[test]
fn test_consensus_analyzer_insufficient_sources() {
let analyzer = ConsensusAnalyzer::new();
let claim = Claim {
text: "Test claim".to_string(),
normalized: "test claim".to_string(),
entities: vec![],
keywords: vec!["test".to_string()],
category: Some(ClaimCategory::Factual),
};
let sources = vec![VerifiedSource::new(
"https://example.com".to_string(),
SourceQuality {
tier: SourceTier::Tier1,
..Default::default()
},
)];
let result = analyzer.analyze(claim, &sources);
assert_eq!(result.status, ClaimStatus::Inconclusive);
}
#[test]
fn test_consensus_analyzer_supported() {
let analyzer = ConsensusAnalyzer::new();
let claim = Claim {
text: "Test claim".to_string(),
normalized: "test claim".to_string(),
entities: vec![],
keywords: vec!["test".to_string()],
category: Some(ClaimCategory::Factual),
};
let mut sources: Vec<VerifiedSource> = Vec::new();
for i in 0..4 {
let mut source = VerifiedSource::new(
format!("https://source{}.com", i),
SourceQuality {
tier: SourceTier::Tier1,
confidence: 0.9,
..Default::default()
},
);
source.supports_claim = Some(true);
source.relevance_score = 0.8;
source.http_status = Some(200);
sources.push(source);
}
let result = analyzer.analyze(claim, &sources);
assert_eq!(result.status, ClaimStatus::Supported);
assert!(result.confidence > 0.5);
}
#[test]
fn test_consensus_analyzer_disputed() {
let analyzer = ConsensusAnalyzer::new();
let claim = Claim {
text: "Test claim".to_string(),
normalized: "test claim".to_string(),
entities: vec![],
keywords: vec!["test".to_string()],
category: Some(ClaimCategory::Factual),
};
let mut sources: Vec<VerifiedSource> = Vec::new();
for i in 0..2 {
let mut source = VerifiedSource::new(
format!("https://source{}.com", i),
SourceQuality {
tier: SourceTier::Tier1,
confidence: 0.9,
..Default::default()
},
);
source.supports_claim = Some(true);
source.http_status = Some(200);
sources.push(source);
}
for i in 2..4 {
let mut source = VerifiedSource::new(
format!("https://source{}.com", i),
SourceQuality {
tier: SourceTier::Tier1,
confidence: 0.9,
..Default::default()
},
);
source.supports_claim = Some(false);
source.http_status = Some(200);
sources.push(source);
}
let result = analyzer.analyze(claim, &sources);
assert_eq!(result.status, ClaimStatus::Disputed);
}
}