use super::consensus::{Claim, ClaimCategory, ConsensusAnalyzer, ConsensusResult};
use super::sources::{SourceQuality, SourceTier, TierClassifier};
use super::verification::{VerificationMetrics, VerificationStatus, VerifiedSource};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::time::Instant;
use tracing::{debug, info, instrument, warn};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResearchConfig {
pub min_sources: usize,
pub max_sources: usize,
pub min_source_tier: SourceTier,
pub fetch_timeout_ms: u64,
pub max_parallel_fetches: usize,
pub min_agreement_ratio: f64,
pub enable_cache: bool,
pub cache_ttl_secs: u64,
pub require_https: bool,
pub include_snippets: bool,
pub max_snippet_length: usize,
}
impl Default for ResearchConfig {
fn default() -> Self {
Self {
min_sources: 3, max_sources: 10,
min_source_tier: SourceTier::Tier2,
fetch_timeout_ms: 30_000,
max_parallel_fetches: 5,
min_agreement_ratio: 0.7,
enable_cache: true,
cache_ttl_secs: 3600, require_https: false, include_snippets: true,
max_snippet_length: 500,
}
}
}
impl ResearchConfig {
pub fn strict() -> Self {
Self {
min_sources: 5,
max_sources: 15,
min_source_tier: SourceTier::Tier1,
min_agreement_ratio: 0.8,
require_https: true,
..Default::default()
}
}
pub fn balanced() -> Self {
Self::default()
}
pub fn permissive() -> Self {
Self {
min_sources: 2,
max_sources: 5,
min_source_tier: SourceTier::Tier3,
min_agreement_ratio: 0.6,
fetch_timeout_ms: 15_000,
..Default::default()
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResearchResult {
pub query: String,
pub status: VerificationStatus,
pub confidence: f64,
pub sources: Vec<VerifiedSource>,
pub consensus: ConsensusResult,
pub metrics: VerificationMetrics,
pub timestamp: DateTime<Utc>,
pub duration_ms: u64,
pub config_used: ResearchConfig,
pub warnings: Vec<String>,
}
impl ResearchResult {
pub fn is_verified(&self) -> bool {
self.status.is_success()
}
pub fn has_problems(&self) -> bool {
self.status.is_problem() || !self.warnings.is_empty()
}
pub fn summary(&self) -> String {
format!(
"{} {} - {} sources, {:.0}% confidence, {}ms",
self.status.emoji(),
self.status.description(),
self.sources.len(),
self.confidence * 100.0,
self.duration_ms
)
}
pub fn detailed_report(&self) -> String {
let mut report = String::new();
report.push_str("=== TRIANGULATED RESEARCH REPORT ===\n\n");
report.push_str(&format!("Query: {}\n", self.query));
report.push_str(&format!(
"Status: {} {}\n",
self.status.emoji(),
self.status.description()
));
report.push_str(&format!("Confidence: {:.1}%\n", self.confidence * 100.0));
report.push_str(&format!("Duration: {}ms\n\n", self.duration_ms));
report.push_str("--- Sources ---\n");
for (i, source) in self.sources.iter().enumerate() {
let tier_label = match source.quality.tier {
SourceTier::Tier1 => "[T1]",
SourceTier::Tier2 => "[T2]",
SourceTier::Tier3 => "[T3]",
SourceTier::Unknown => "[??]",
};
let support = match source.supports_claim {
Some(true) => "\u{2705}",
Some(false) => "\u{274c}",
None => "\u{2796}",
};
report.push_str(&format!(
"{}. {} {} {}\n",
i + 1,
tier_label,
support,
source.url
));
}
report.push_str("\n--- Metrics ---\n");
report.push_str(&format!("Total sources: {}\n", self.metrics.total_sources));
report.push_str(&format!(
"Accessible: {}\n",
self.metrics.accessible_sources
));
report.push_str(&format!("Tier 1: {}\n", self.metrics.tier1_count));
report.push_str(&format!("Tier 2: {}\n", self.metrics.tier2_count));
report.push_str(&format!("Tier 3: {}\n", self.metrics.tier3_count));
report.push_str(&format!(
"Supporting: {}\n",
self.metrics.supporting_sources
));
report.push_str(&format!("Refuting: {}\n", self.metrics.refuting_sources));
if !self.consensus.discrepancies.is_empty() {
report.push_str("\n--- Discrepancies ---\n");
for disc in &self.consensus.discrepancies {
report.push_str(&format!(
"- {} (severity: {:.1})\n",
disc.aspect, disc.severity
));
}
}
if !self.warnings.is_empty() {
report.push_str("\n--- Warnings ---\n");
for warn in &self.warnings {
report.push_str(&format!("! {}\n", warn));
}
}
report.push_str("\n--- Consensus ---\n");
report.push_str(&self.consensus.summary);
report.push('\n');
report
}
}
pub struct TriangulationEngine {
config: ResearchConfig,
classifier: TierClassifier,
consensus_analyzer: ConsensusAnalyzer,
}
impl TriangulationEngine {
pub fn new(config: ResearchConfig) -> Self {
let consensus_analyzer = ConsensusAnalyzer::new()
.with_min_agreement(config.min_agreement_ratio)
.with_min_sources(config.min_sources);
Self {
config,
classifier: TierClassifier::new(),
consensus_analyzer,
}
}
pub fn default_engine() -> Self {
Self::new(ResearchConfig::default())
}
pub fn strict_engine() -> Self {
Self::new(ResearchConfig::strict())
}
pub fn config(&self) -> &ResearchConfig {
&self.config
}
pub fn classifier_mut(&mut self) -> &mut TierClassifier {
&mut self.classifier
}
#[instrument(skip(self, source_urls))]
pub fn research_with_urls(
&self,
query: &str,
source_urls: &[String],
source_contents: &[(String, Option<String>, Option<bool>)], ) -> ResearchResult {
let start = Instant::now();
let mut warnings = Vec::new();
info!(query = %query, source_count = %source_urls.len(), "Starting triangulated research");
let classified = self.classifier.classify_multiple(source_urls);
let mut verified_sources: Vec<VerifiedSource> = Vec::new();
let mut seen_domains: HashSet<String> = HashSet::new();
for (url, quality) in classified {
if !quality.tier.meets_minimum(self.config.min_source_tier) {
debug!(url = %url, tier = ?quality.tier, "Source below minimum tier, skipping");
continue;
}
if self.config.require_https && !quality.has_https {
debug!(url = %url, "Source not HTTPS, skipping");
warnings.push(format!("Skipped non-HTTPS source: {}", url));
continue;
}
if seen_domains.contains(&quality.domain) {
debug!(url = %url, domain = %quality.domain, "Duplicate domain, skipping");
continue;
}
seen_domains.insert(quality.domain.clone());
let content_info = source_contents.iter().find(|(u, _, _)| u == &url);
let mut source = VerifiedSource::new(url.clone(), quality);
source.http_status = Some(200);
if let Some((_, snippet, supports)) = content_info {
source.content_snippet = snippet.clone().map(|s| {
if s.len() > self.config.max_snippet_length {
format!("{}...", &s[..self.config.max_snippet_length])
} else {
s
}
});
source.supports_claim = *supports;
source.relevance_score = if supports.is_some() { 0.8 } else { 0.5 };
}
verified_sources.push(source);
if verified_sources.len() >= self.config.max_sources {
break;
}
}
if verified_sources.len() < self.config.min_sources {
warnings.push(format!(
"Insufficient sources: {} found, {} required (CONS-006 violation)",
verified_sources.len(),
self.config.min_sources
));
}
let claim = Claim {
text: query.to_string(),
normalized: super::consensus::normalize_text(query),
entities: Vec::new(), keywords: super::consensus::extract_keywords(query),
category: Some(ClaimCategory::Factual), };
let consensus = self.consensus_analyzer.analyze(claim, &verified_sources);
let duration_ms = start.elapsed().as_millis() as u64;
let metrics = VerificationMetrics::from_sources(&verified_sources, duration_ms);
let status = if verified_sources.len() < self.config.min_sources {
VerificationStatus::Unverified
} else {
self.consensus_analyzer.to_verification_status(&consensus)
};
let confidence = if verified_sources.is_empty() {
0.0
} else {
consensus.confidence
* (verified_sources.len() as f64 / self.config.min_sources as f64).min(1.0)
};
info!(
status = ?status,
confidence = %confidence,
sources = %verified_sources.len(),
duration_ms = %duration_ms,
"Research complete"
);
ResearchResult {
query: query.to_string(),
status,
confidence,
sources: verified_sources,
consensus,
metrics,
timestamp: Utc::now(),
duration_ms,
config_used: self.config.clone(),
warnings,
}
}
pub fn quick_verify(&self, urls: &[String]) -> (bool, String) {
let classified = self.classifier.classify_multiple(urls);
let qualities: Vec<SourceQuality> = classified.into_iter().map(|(_, q)| q).collect();
self.classifier.meets_triangulation_requirement(
&qualities,
self.config.min_sources,
self.config.min_source_tier,
)
}
pub fn check_source(&self, url: &str) -> SourceQuality {
self.classifier.classify(url)
}
pub fn get_tier(&self, url: &str) -> SourceTier {
self.classifier.classify(url).tier
}
}
impl Default for TriangulationEngine {
fn default() -> Self {
Self::default_engine()
}
}
pub struct TriangulationEngineBuilder {
config: ResearchConfig,
custom_tier1_domains: Vec<String>,
custom_tier2_domains: Vec<String>,
custom_unreliable_domains: Vec<String>,
}
impl TriangulationEngineBuilder {
pub fn new() -> Self {
Self {
config: ResearchConfig::default(),
custom_tier1_domains: Vec::new(),
custom_tier2_domains: Vec::new(),
custom_unreliable_domains: Vec::new(),
}
}
pub fn min_sources(mut self, count: usize) -> Self {
self.config.min_sources = count.max(1);
self
}
pub fn max_sources(mut self, count: usize) -> Self {
self.config.max_sources = count.max(self.config.min_sources);
self
}
pub fn min_tier(mut self, tier: SourceTier) -> Self {
self.config.min_source_tier = tier;
self
}
pub fn timeout_ms(mut self, ms: u64) -> Self {
self.config.fetch_timeout_ms = ms;
self
}
pub fn agreement_ratio(mut self, ratio: f64) -> Self {
self.config.min_agreement_ratio = ratio.clamp(0.0, 1.0);
self
}
pub fn require_https(mut self, require: bool) -> Self {
self.config.require_https = require;
self
}
pub fn add_tier1_domain(mut self, domain: &str) -> Self {
self.custom_tier1_domains.push(domain.to_string());
self
}
pub fn add_tier2_domain(mut self, domain: &str) -> Self {
self.custom_tier2_domains.push(domain.to_string());
self
}
pub fn add_unreliable_domain(mut self, domain: &str) -> Self {
self.custom_unreliable_domains.push(domain.to_string());
self
}
pub fn build(self) -> TriangulationEngine {
let mut engine = TriangulationEngine::new(self.config);
for domain in self.custom_tier1_domains {
engine.classifier_mut().add_tier1_domain(&domain);
}
for domain in self.custom_tier2_domains {
engine.classifier_mut().add_tier2_domain(&domain);
}
for domain in self.custom_unreliable_domains {
engine.classifier_mut().add_unreliable_domain(&domain);
}
engine
}
}
impl Default for TriangulationEngineBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_config_default() {
let config = ResearchConfig::default();
assert_eq!(config.min_sources, 3); assert_eq!(config.min_source_tier, SourceTier::Tier2);
}
#[test]
fn test_config_strict() {
let config = ResearchConfig::strict();
assert_eq!(config.min_sources, 5);
assert_eq!(config.min_source_tier, SourceTier::Tier1);
assert!(config.require_https);
}
#[test]
fn test_engine_creation() {
let engine = TriangulationEngine::default_engine();
assert_eq!(engine.config().min_sources, 3);
}
#[test]
fn test_quick_verify_pass() {
let engine = TriangulationEngine::default_engine();
let urls = vec![
"https://docs.rs/tokio".to_string(),
"https://github.com/rust-lang/rust".to_string(),
"https://en.wikipedia.org/wiki/Rust".to_string(),
];
let (passes, _msg) = engine.quick_verify(&urls);
assert!(passes);
}
#[test]
fn test_quick_verify_fail_insufficient() {
let engine = TriangulationEngine::default_engine();
let urls = vec![
"https://random-blog-123.com/post".to_string(),
"https://another-unknown.net/article".to_string(),
];
let (passes, _msg) = engine.quick_verify(&urls);
assert!(!passes);
}
#[test]
fn test_check_source() {
let engine = TriangulationEngine::default_engine();
let quality = engine.check_source("https://docs.rs/tokio");
assert_eq!(quality.tier, SourceTier::Tier1);
let quality = engine.check_source("https://randomsite.xyz/page");
assert_eq!(quality.tier, SourceTier::Tier3);
}
#[test]
fn test_research_with_urls() {
let engine = TriangulationEngine::default_engine();
let urls = vec![
"https://docs.rs/tokio".to_string(),
"https://github.com/tokio-rs/tokio".to_string(),
"https://stackoverflow.com/questions/tokio".to_string(),
"https://en.wikipedia.org/wiki/Tokio_(software)".to_string(),
];
let consensus_snippet = "Tokio is an async runtime for Rust".to_string();
let contents = vec![
(
"https://docs.rs/tokio".to_string(),
Some(consensus_snippet.clone()),
Some(true),
),
(
"https://github.com/tokio-rs/tokio".to_string(),
Some(consensus_snippet.clone()),
Some(true),
),
(
"https://stackoverflow.com/questions/tokio".to_string(),
Some(consensus_snippet.clone()),
Some(true),
),
(
"https://en.wikipedia.org/wiki/Tokio_(software)".to_string(),
Some(consensus_snippet.clone()),
Some(true),
),
];
let result =
engine.research_with_urls("Is Tokio an async runtime for Rust?", &urls, &contents);
assert!(result.sources.len() >= 3);
assert!(result.confidence > 0.0);
assert!(
result.status.is_success(),
"Expected successful verification status, got {:?}",
result.status
);
}
#[test]
fn test_builder() {
let engine = TriangulationEngineBuilder::new()
.min_sources(5)
.max_sources(15)
.min_tier(SourceTier::Tier1)
.require_https(true)
.add_tier1_domain("mycustomdocs.com")
.build();
assert_eq!(engine.config().min_sources, 5);
assert!(engine.config().require_https);
let quality = engine.check_source("https://mycustomdocs.com/page");
assert_eq!(quality.tier, SourceTier::Tier1);
}
#[test]
fn test_result_summary() {
let engine = TriangulationEngine::default_engine();
let urls = vec![
"https://docs.rs/test".to_string(),
"https://github.com/test".to_string(),
"https://stackoverflow.com/test".to_string(),
];
let contents = vec![
(
"https://docs.rs/test".to_string(),
Some("Test content".to_string()),
Some(true),
),
(
"https://github.com/test".to_string(),
Some("Test content".to_string()),
Some(true),
),
(
"https://stackoverflow.com/test".to_string(),
Some("Test content".to_string()),
Some(true),
),
];
let result = engine.research_with_urls("Test query", &urls, &contents);
let summary = result.summary();
assert!(!summary.is_empty());
assert!(summary.contains("sources"));
}
#[test]
fn test_detailed_report() {
let engine = TriangulationEngine::default_engine();
let urls = vec![
"https://docs.rs/test".to_string(),
"https://github.com/test".to_string(),
"https://stackoverflow.com/test".to_string(),
];
let contents = vec![
(
"https://docs.rs/test".to_string(),
Some("Test".to_string()),
Some(true),
),
(
"https://github.com/test".to_string(),
Some("Test".to_string()),
Some(true),
),
(
"https://stackoverflow.com/test".to_string(),
Some("Test".to_string()),
Some(true),
),
];
let result = engine.research_with_urls("Test query", &urls, &contents);
let report = result.detailed_report();
assert!(report.contains("TRIANGULATED RESEARCH REPORT"));
assert!(report.contains("Sources"));
assert!(report.contains("Metrics"));
}
}