1use super::verification::{Evidence, VerificationStatus, VerifiedSource};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct Claim {
12 pub text: String,
14 pub normalized: String,
16 pub entities: Vec<String>,
18 pub keywords: Vec<String>,
20 pub category: Option<ClaimCategory>,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26#[serde(rename_all = "snake_case")]
27pub enum ClaimCategory {
28 Factual,
30 Statistical,
32 Temporal,
34 Attribution,
36 Definition,
38 Causal,
40 Opinion,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
46#[serde(rename_all = "snake_case")]
47pub enum ClaimStatus {
48 Supported,
50 Refuted,
52 Disputed,
54 Inconclusive,
56 NoData,
58}
59
60impl ClaimStatus {
61 pub fn emoji(&self) -> &'static str {
63 match self {
64 Self::Supported => "\u{2705}", Self::Refuted => "\u{274c}", Self::Disputed => "\u{26a0}", Self::Inconclusive => "\u{2753}", Self::NoData => "\u{2796}", }
70 }
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct Discrepancy {
76 pub aspect: String,
78 pub values: Vec<DiscrepancyValue>,
80 pub severity: f64,
82 pub explanations: Vec<String>,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct DiscrepancyValue {
89 pub value: String,
91 pub source_url: String,
93 pub source_weight: f64,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct ConsensusResult {
100 pub claim: Claim,
102 pub status: ClaimStatus,
104 pub confidence: f64,
106 pub consensus_answer: Option<String>,
108 pub supporting_evidence: Vec<Evidence>,
110 pub refuting_evidence: Vec<Evidence>,
112 pub discrepancies: Vec<Discrepancy>,
114 pub summary: String,
116}
117
118pub struct ConsensusAnalyzer {
120 min_agreement_ratio: f64,
122 min_sources: usize,
124}
125
126impl ConsensusAnalyzer {
127 pub fn new() -> Self {
129 Self {
130 min_agreement_ratio: 0.7,
131 min_sources: 3,
132 }
133 }
134
135 pub fn with_min_agreement(mut self, ratio: f64) -> Self {
137 self.min_agreement_ratio = ratio.clamp(0.0, 1.0);
138 self
139 }
140
141 pub fn with_min_sources(mut self, count: usize) -> Self {
143 self.min_sources = count.max(1);
144 self
145 }
146
147 pub fn analyze(&self, claim: Claim, sources: &[VerifiedSource]) -> ConsensusResult {
149 let usable_sources: Vec<&VerifiedSource> =
150 sources.iter().filter(|s| s.is_usable()).collect();
151
152 if usable_sources.len() < self.min_sources {
153 return ConsensusResult {
154 claim,
155 status: ClaimStatus::Inconclusive,
156 confidence: 0.0,
157 consensus_answer: None,
158 supporting_evidence: Vec::new(),
159 refuting_evidence: Vec::new(),
160 discrepancies: Vec::new(),
161 summary: format!(
162 "Insufficient sources: {} found, {} required",
163 usable_sources.len(),
164 self.min_sources
165 ),
166 };
167 }
168
169 let mut supporting: Vec<Evidence> = Vec::new();
171 let mut refuting: Vec<Evidence> = Vec::new();
172 let mut values_found: HashMap<String, Vec<(String, f64)>> = HashMap::new();
173
174 for source in &usable_sources {
175 if let Some(supports) = source.supports_claim {
176 let evidence = Evidence {
177 source_url: source.url.clone(),
178 quote: source
179 .content_snippet
180 .clone()
181 .unwrap_or_else(|| "[No snippet]".to_string()),
182 supports,
183 confidence: source.weighted_confidence(),
184 position: None,
185 };
186
187 if supports {
188 supporting.push(evidence);
189 } else {
190 refuting.push(evidence);
191 }
192 }
193
194 if let Some(snippet) = &source.content_snippet {
196 let value_key = "primary_value".to_string();
198 values_found
199 .entry(value_key)
200 .or_default()
201 .push((snippet.clone(), source.quality.tier.weight()));
202 }
203 }
204
205 let total_opinionated = supporting.len() + refuting.len();
207 let agreement_ratio = if total_opinionated > 0 {
208 supporting.len() as f64 / total_opinionated as f64
209 } else {
210 0.5 };
212
213 let refutation_ratio = if total_opinionated > 0 {
214 refuting.len() as f64 / total_opinionated as f64
215 } else {
216 0.0
217 };
218
219 let discrepancies = self.detect_discrepancies(&values_found);
221
222 let status = if usable_sources.is_empty() {
224 ClaimStatus::NoData
225 } else if total_opinionated == 0 {
226 ClaimStatus::Inconclusive
227 } else if refutation_ratio > self.min_agreement_ratio {
228 ClaimStatus::Refuted
229 } else if agreement_ratio >= self.min_agreement_ratio && discrepancies.is_empty() {
230 ClaimStatus::Supported
231 } else if !supporting.is_empty() && !refuting.is_empty() {
232 ClaimStatus::Disputed
233 } else {
234 ClaimStatus::Inconclusive
235 };
236
237 let base_confidence = if status == ClaimStatus::Supported {
239 agreement_ratio
240 } else if status == ClaimStatus::Refuted {
241 refutation_ratio
242 } else {
243 0.5
244 };
245
246 let quality_factor: f64 = usable_sources
248 .iter()
249 .map(|s| s.quality.tier.weight())
250 .sum::<f64>()
251 / usable_sources.len() as f64;
252
253 let confidence = base_confidence * quality_factor;
254
255 let consensus_answer = if status == ClaimStatus::Supported {
257 self.extract_consensus_answer(&supporting)
258 } else if status == ClaimStatus::Refuted {
259 self.extract_consensus_answer(&refuting)
260 } else {
261 None
262 };
263
264 let summary = self.generate_summary(&status, &usable_sources, &discrepancies);
266
267 ConsensusResult {
268 claim,
269 status,
270 confidence,
271 consensus_answer,
272 supporting_evidence: supporting,
273 refuting_evidence: refuting,
274 discrepancies,
275 summary,
276 }
277 }
278
279 fn detect_discrepancies(
281 &self,
282 values: &HashMap<String, Vec<(String, f64)>>,
283 ) -> Vec<Discrepancy> {
284 let mut discrepancies = Vec::new();
285
286 for (aspect, vals) in values {
287 if vals.len() < 2 {
288 continue;
289 }
290
291 let unique_values: Vec<&str> = vals
293 .iter()
294 .map(|(v, _)| v.as_str())
295 .collect::<std::collections::HashSet<_>>()
296 .into_iter()
297 .collect();
298
299 if unique_values.len() > 1 {
300 let severity = if unique_values.len() == vals.len() {
302 0.9 } else {
304 0.5 };
306
307 let values_list: Vec<DiscrepancyValue> = vals
308 .iter()
309 .map(|(v, w)| DiscrepancyValue {
310 value: v.clone(),
311 source_url: "[source]".to_string(),
312 source_weight: *w,
313 })
314 .collect();
315
316 discrepancies.push(Discrepancy {
317 aspect: aspect.clone(),
318 values: values_list,
319 severity,
320 explanations: vec![
321 "Sources report different values".to_string(),
322 "May reflect different measurement methodologies".to_string(),
323 ],
324 });
325 }
326 }
327
328 discrepancies
329 }
330
331 fn extract_consensus_answer(&self, evidence: &[Evidence]) -> Option<String> {
333 if evidence.is_empty() {
334 return None;
335 }
336
337 let best = evidence.iter().max_by(|a, b| {
339 a.confidence
340 .partial_cmp(&b.confidence)
341 .unwrap_or(std::cmp::Ordering::Equal)
342 })?;
343
344 Some(best.quote.clone())
345 }
346
347 fn generate_summary(
349 &self,
350 status: &ClaimStatus,
351 sources: &[&VerifiedSource],
352 discrepancies: &[Discrepancy],
353 ) -> String {
354 let tier1_count = sources
355 .iter()
356 .filter(|s| s.quality.tier == super::sources::SourceTier::Tier1)
357 .count();
358 let tier2_count = sources
359 .iter()
360 .filter(|s| s.quality.tier == super::sources::SourceTier::Tier2)
361 .count();
362
363 let status_text = match status {
364 ClaimStatus::Supported => "VERIFIED",
365 ClaimStatus::Refuted => "REFUTED",
366 ClaimStatus::Disputed => "DISPUTED",
367 ClaimStatus::Inconclusive => "INCONCLUSIVE",
368 ClaimStatus::NoData => "NO DATA",
369 };
370
371 let discrepancy_note = if discrepancies.is_empty() {
372 "No discrepancies found.".to_string()
373 } else {
374 format!(
375 "{} discrepancies detected (review recommended).",
376 discrepancies.len()
377 )
378 };
379
380 format!(
381 "{}: Based on {} sources ({} Tier 1, {} Tier 2). {}",
382 status_text,
383 sources.len(),
384 tier1_count,
385 tier2_count,
386 discrepancy_note
387 )
388 }
389
390 pub fn to_verification_status(&self, consensus: &ConsensusResult) -> VerificationStatus {
392 match consensus.status {
393 ClaimStatus::Supported if consensus.confidence >= 0.7 => VerificationStatus::Verified,
394 ClaimStatus::Supported => VerificationStatus::PartiallyVerified,
395 ClaimStatus::Refuted => VerificationStatus::Refuted,
396 ClaimStatus::Disputed => VerificationStatus::Conflicting,
397 ClaimStatus::Inconclusive | ClaimStatus::NoData => VerificationStatus::Unverified,
398 }
399 }
400}
401
402impl Default for ConsensusAnalyzer {
403 fn default() -> Self {
404 Self::new()
405 }
406}
407
408pub fn normalize_text(text: &str) -> String {
410 text.to_lowercase()
411 .split_whitespace()
412 .collect::<Vec<_>>()
413 .join(" ")
414 .trim()
415 .to_string()
416}
417
418pub fn extract_keywords(text: &str) -> Vec<String> {
420 let stop_words = [
421 "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had",
422 "do", "does", "did", "will", "would", "could", "should", "may", "might", "must", "shall",
423 "can", "need", "dare", "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
424 "from", "as", "into", "through", "during", "before", "after", "above", "below", "between",
425 "under", "again", "further", "then", "once", "here", "there", "when", "where", "why",
426 "how", "all", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not",
427 "only", "own", "same", "so", "than", "too", "very", "just", "and", "but", "if", "or",
428 "because", "until", "while", "this", "that", "these", "those", "it", "its",
429 ];
430
431 text.to_lowercase()
432 .split(|c: char| !c.is_alphanumeric())
433 .filter(|word| word.len() > 2 && !stop_words.contains(&word.to_lowercase().as_str()))
434 .map(String::from)
435 .collect()
436}
437
438#[cfg(test)]
439mod tests {
440 use super::*;
441 use crate::research::sources::{SourceQuality, SourceTier};
442
443 #[test]
444 fn test_claim_status_emoji() {
445 assert!(!ClaimStatus::Supported.emoji().is_empty());
446 assert!(!ClaimStatus::Refuted.emoji().is_empty());
447 assert!(!ClaimStatus::Disputed.emoji().is_empty());
448 }
449
450 #[test]
451 fn test_normalize_text() {
452 assert_eq!(normalize_text(" Hello World "), "hello world");
453 assert_eq!(normalize_text("UPPERCASE"), "uppercase");
454 }
455
456 #[test]
457 fn test_extract_keywords() {
458 let text = "The Rust programming language is designed for safety and performance.";
459 let keywords = extract_keywords(text);
460
461 assert!(keywords.contains(&"rust".to_string()));
462 assert!(keywords.contains(&"programming".to_string()));
463 assert!(keywords.contains(&"safety".to_string()));
464 assert!(keywords.contains(&"performance".to_string()));
465
466 assert!(!keywords.contains(&"the".to_string()));
468 assert!(!keywords.contains(&"is".to_string()));
469 assert!(!keywords.contains(&"and".to_string()));
470 }
471
472 #[test]
473 fn test_consensus_analyzer_insufficient_sources() {
474 let analyzer = ConsensusAnalyzer::new();
475 let claim = Claim {
476 text: "Test claim".to_string(),
477 normalized: "test claim".to_string(),
478 entities: vec![],
479 keywords: vec!["test".to_string()],
480 category: Some(ClaimCategory::Factual),
481 };
482
483 let sources = vec![VerifiedSource::new(
484 "https://example.com".to_string(),
485 SourceQuality {
486 tier: SourceTier::Tier1,
487 ..Default::default()
488 },
489 )];
490
491 let result = analyzer.analyze(claim, &sources);
492 assert_eq!(result.status, ClaimStatus::Inconclusive);
493 }
494
495 #[test]
496 fn test_consensus_analyzer_supported() {
497 let analyzer = ConsensusAnalyzer::new();
498 let claim = Claim {
499 text: "Test claim".to_string(),
500 normalized: "test claim".to_string(),
501 entities: vec![],
502 keywords: vec!["test".to_string()],
503 category: Some(ClaimCategory::Factual),
504 };
505
506 let mut sources: Vec<VerifiedSource> = Vec::new();
507 for i in 0..4 {
508 let mut source = VerifiedSource::new(
509 format!("https://source{}.com", i),
510 SourceQuality {
511 tier: SourceTier::Tier1,
512 confidence: 0.9,
513 ..Default::default()
514 },
515 );
516 source.supports_claim = Some(true);
517 source.relevance_score = 0.8;
518 source.http_status = Some(200);
519 sources.push(source);
520 }
521
522 let result = analyzer.analyze(claim, &sources);
523 assert_eq!(result.status, ClaimStatus::Supported);
524 assert!(result.confidence > 0.5);
525 }
526
527 #[test]
528 fn test_consensus_analyzer_disputed() {
529 let analyzer = ConsensusAnalyzer::new();
530 let claim = Claim {
531 text: "Test claim".to_string(),
532 normalized: "test claim".to_string(),
533 entities: vec![],
534 keywords: vec!["test".to_string()],
535 category: Some(ClaimCategory::Factual),
536 };
537
538 let mut sources: Vec<VerifiedSource> = Vec::new();
539
540 for i in 0..2 {
542 let mut source = VerifiedSource::new(
543 format!("https://source{}.com", i),
544 SourceQuality {
545 tier: SourceTier::Tier1,
546 confidence: 0.9,
547 ..Default::default()
548 },
549 );
550 source.supports_claim = Some(true);
551 source.http_status = Some(200);
552 sources.push(source);
553 }
554
555 for i in 2..4 {
557 let mut source = VerifiedSource::new(
558 format!("https://source{}.com", i),
559 SourceQuality {
560 tier: SourceTier::Tier1,
561 confidence: 0.9,
562 ..Default::default()
563 },
564 );
565 source.supports_claim = Some(false);
566 source.http_status = Some(200);
567 sources.push(source);
568 }
569
570 let result = analyzer.analyze(claim, &sources);
571 assert_eq!(result.status, ClaimStatus::Disputed);
572 }
573}