1use super::consensus::{Claim, ClaimCategory, ConsensusAnalyzer, ConsensusResult};
17use super::sources::{SourceQuality, SourceTier, TierClassifier};
18use super::verification::{VerificationMetrics, VerificationStatus, VerifiedSource};
19use chrono::{DateTime, Utc};
20use serde::{Deserialize, Serialize};
21use std::collections::HashSet;
22use std::time::Instant;
23use tracing::{debug, info, instrument, warn};
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct ResearchConfig {
28 pub min_sources: usize,
30 pub max_sources: usize,
32 pub min_source_tier: SourceTier,
34 pub fetch_timeout_ms: u64,
36 pub max_parallel_fetches: usize,
38 pub min_agreement_ratio: f64,
40 pub enable_cache: bool,
42 pub cache_ttl_secs: u64,
44 pub require_https: bool,
46 pub include_snippets: bool,
48 pub max_snippet_length: usize,
50}
51
52impl Default for ResearchConfig {
53 fn default() -> Self {
54 Self {
55 min_sources: 3, max_sources: 10,
57 min_source_tier: SourceTier::Tier2,
58 fetch_timeout_ms: 30_000,
59 max_parallel_fetches: 5,
60 min_agreement_ratio: 0.7,
61 enable_cache: true,
62 cache_ttl_secs: 3600, require_https: false, include_snippets: true,
65 max_snippet_length: 500,
66 }
67 }
68}
69
70impl ResearchConfig {
71 pub fn strict() -> Self {
73 Self {
74 min_sources: 5,
75 max_sources: 15,
76 min_source_tier: SourceTier::Tier1,
77 min_agreement_ratio: 0.8,
78 require_https: true,
79 ..Default::default()
80 }
81 }
82
83 pub fn balanced() -> Self {
85 Self::default()
86 }
87
88 pub fn permissive() -> Self {
90 Self {
91 min_sources: 2,
92 max_sources: 5,
93 min_source_tier: SourceTier::Tier3,
94 min_agreement_ratio: 0.6,
95 fetch_timeout_ms: 15_000,
96 ..Default::default()
97 }
98 }
99}
100
101#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct ResearchResult {
104 pub query: String,
106 pub status: VerificationStatus,
108 pub confidence: f64,
110 pub sources: Vec<VerifiedSource>,
112 pub consensus: ConsensusResult,
114 pub metrics: VerificationMetrics,
116 pub timestamp: DateTime<Utc>,
118 pub duration_ms: u64,
120 pub config_used: ResearchConfig,
122 pub warnings: Vec<String>,
124}
125
126impl ResearchResult {
127 pub fn is_verified(&self) -> bool {
129 self.status.is_success()
130 }
131
132 pub fn has_problems(&self) -> bool {
134 self.status.is_problem() || !self.warnings.is_empty()
135 }
136
137 pub fn summary(&self) -> String {
139 format!(
140 "{} {} - {} sources, {:.0}% confidence, {}ms",
141 self.status.emoji(),
142 self.status.description(),
143 self.sources.len(),
144 self.confidence * 100.0,
145 self.duration_ms
146 )
147 }
148
149 pub fn detailed_report(&self) -> String {
151 let mut report = String::new();
152
153 report.push_str("=== TRIANGULATED RESEARCH REPORT ===\n\n");
154 report.push_str(&format!("Query: {}\n", self.query));
155 report.push_str(&format!(
156 "Status: {} {}\n",
157 self.status.emoji(),
158 self.status.description()
159 ));
160 report.push_str(&format!("Confidence: {:.1}%\n", self.confidence * 100.0));
161 report.push_str(&format!("Duration: {}ms\n\n", self.duration_ms));
162
163 report.push_str("--- Sources ---\n");
164 for (i, source) in self.sources.iter().enumerate() {
165 let tier_label = match source.quality.tier {
166 SourceTier::Tier1 => "[T1]",
167 SourceTier::Tier2 => "[T2]",
168 SourceTier::Tier3 => "[T3]",
169 SourceTier::Unknown => "[??]",
170 };
171 let support = match source.supports_claim {
172 Some(true) => "\u{2705}",
173 Some(false) => "\u{274c}",
174 None => "\u{2796}",
175 };
176 report.push_str(&format!(
177 "{}. {} {} {}\n",
178 i + 1,
179 tier_label,
180 support,
181 source.url
182 ));
183 }
184
185 report.push_str("\n--- Metrics ---\n");
186 report.push_str(&format!("Total sources: {}\n", self.metrics.total_sources));
187 report.push_str(&format!(
188 "Accessible: {}\n",
189 self.metrics.accessible_sources
190 ));
191 report.push_str(&format!("Tier 1: {}\n", self.metrics.tier1_count));
192 report.push_str(&format!("Tier 2: {}\n", self.metrics.tier2_count));
193 report.push_str(&format!("Tier 3: {}\n", self.metrics.tier3_count));
194 report.push_str(&format!(
195 "Supporting: {}\n",
196 self.metrics.supporting_sources
197 ));
198 report.push_str(&format!("Refuting: {}\n", self.metrics.refuting_sources));
199
200 if !self.consensus.discrepancies.is_empty() {
201 report.push_str("\n--- Discrepancies ---\n");
202 for disc in &self.consensus.discrepancies {
203 report.push_str(&format!(
204 "- {} (severity: {:.1})\n",
205 disc.aspect, disc.severity
206 ));
207 }
208 }
209
210 if !self.warnings.is_empty() {
211 report.push_str("\n--- Warnings ---\n");
212 for warn in &self.warnings {
213 report.push_str(&format!("! {}\n", warn));
214 }
215 }
216
217 report.push_str("\n--- Consensus ---\n");
218 report.push_str(&self.consensus.summary);
219 report.push('\n');
220
221 report
222 }
223}
224
225pub struct TriangulationEngine {
229 config: ResearchConfig,
231 classifier: TierClassifier,
233 consensus_analyzer: ConsensusAnalyzer,
235}
236
237impl TriangulationEngine {
238 pub fn new(config: ResearchConfig) -> Self {
240 let consensus_analyzer = ConsensusAnalyzer::new()
241 .with_min_agreement(config.min_agreement_ratio)
242 .with_min_sources(config.min_sources);
243
244 Self {
245 config,
246 classifier: TierClassifier::new(),
247 consensus_analyzer,
248 }
249 }
250
251 pub fn default_engine() -> Self {
253 Self::new(ResearchConfig::default())
254 }
255
256 pub fn strict_engine() -> Self {
258 Self::new(ResearchConfig::strict())
259 }
260
261 pub fn config(&self) -> &ResearchConfig {
263 &self.config
264 }
265
266 pub fn classifier_mut(&mut self) -> &mut TierClassifier {
268 &mut self.classifier
269 }
270
271 #[instrument(skip(self, source_urls))]
280 pub fn research_with_urls(
281 &self,
282 query: &str,
283 source_urls: &[String],
284 source_contents: &[(String, Option<String>, Option<bool>)], ) -> ResearchResult {
286 let start = Instant::now();
287 let mut warnings = Vec::new();
288
289 info!(query = %query, source_count = %source_urls.len(), "Starting triangulated research");
290
291 let classified = self.classifier.classify_multiple(source_urls);
293
294 let mut verified_sources: Vec<VerifiedSource> = Vec::new();
296 let mut seen_domains: HashSet<String> = HashSet::new();
297
298 for (url, quality) in classified {
299 if !quality.tier.meets_minimum(self.config.min_source_tier) {
301 debug!(url = %url, tier = ?quality.tier, "Source below minimum tier, skipping");
302 continue;
303 }
304
305 if self.config.require_https && !quality.has_https {
307 debug!(url = %url, "Source not HTTPS, skipping");
308 warnings.push(format!("Skipped non-HTTPS source: {}", url));
309 continue;
310 }
311
312 if seen_domains.contains(&quality.domain) {
314 debug!(url = %url, domain = %quality.domain, "Duplicate domain, skipping");
315 continue;
316 }
317 seen_domains.insert(quality.domain.clone());
318
319 let content_info = source_contents.iter().find(|(u, _, _)| u == &url);
321
322 let mut source = VerifiedSource::new(url.clone(), quality);
323 source.http_status = Some(200); if let Some((_, snippet, supports)) = content_info {
326 source.content_snippet = snippet.clone().map(|s| {
327 if s.len() > self.config.max_snippet_length {
328 format!("{}...", &s[..self.config.max_snippet_length])
329 } else {
330 s
331 }
332 });
333 source.supports_claim = *supports;
334 source.relevance_score = if supports.is_some() { 0.8 } else { 0.5 };
335 }
336
337 verified_sources.push(source);
338
339 if verified_sources.len() >= self.config.max_sources {
341 break;
342 }
343 }
344
345 if verified_sources.len() < self.config.min_sources {
347 warnings.push(format!(
348 "Insufficient sources: {} found, {} required (CONS-006 violation)",
349 verified_sources.len(),
350 self.config.min_sources
351 ));
352 }
353
354 let claim = Claim {
356 text: query.to_string(),
357 normalized: super::consensus::normalize_text(query),
358 entities: Vec::new(), keywords: super::consensus::extract_keywords(query),
360 category: Some(ClaimCategory::Factual), };
362
363 let consensus = self.consensus_analyzer.analyze(claim, &verified_sources);
365
366 let duration_ms = start.elapsed().as_millis() as u64;
368 let metrics = VerificationMetrics::from_sources(&verified_sources, duration_ms);
369
370 let status = if verified_sources.len() < self.config.min_sources {
372 VerificationStatus::Unverified
373 } else {
374 self.consensus_analyzer.to_verification_status(&consensus)
375 };
376
377 let confidence = if verified_sources.is_empty() {
379 0.0
380 } else {
381 consensus.confidence
382 * (verified_sources.len() as f64 / self.config.min_sources as f64).min(1.0)
383 };
384
385 info!(
386 status = ?status,
387 confidence = %confidence,
388 sources = %verified_sources.len(),
389 duration_ms = %duration_ms,
390 "Research complete"
391 );
392
393 ResearchResult {
394 query: query.to_string(),
395 status,
396 confidence,
397 sources: verified_sources,
398 consensus,
399 metrics,
400 timestamp: Utc::now(),
401 duration_ms,
402 config_used: self.config.clone(),
403 warnings,
404 }
405 }
406
407 pub fn quick_verify(&self, urls: &[String]) -> (bool, String) {
409 let classified = self.classifier.classify_multiple(urls);
410 let qualities: Vec<SourceQuality> = classified.into_iter().map(|(_, q)| q).collect();
411
412 self.classifier.meets_triangulation_requirement(
413 &qualities,
414 self.config.min_sources,
415 self.config.min_source_tier,
416 )
417 }
418
419 pub fn check_source(&self, url: &str) -> SourceQuality {
421 self.classifier.classify(url)
422 }
423
424 pub fn get_tier(&self, url: &str) -> SourceTier {
426 self.classifier.classify(url).tier
427 }
428}
429
430impl Default for TriangulationEngine {
431 fn default() -> Self {
432 Self::default_engine()
433 }
434}
435
436pub struct TriangulationEngineBuilder {
438 config: ResearchConfig,
439 custom_tier1_domains: Vec<String>,
440 custom_tier2_domains: Vec<String>,
441 custom_unreliable_domains: Vec<String>,
442}
443
444impl TriangulationEngineBuilder {
445 pub fn new() -> Self {
447 Self {
448 config: ResearchConfig::default(),
449 custom_tier1_domains: Vec::new(),
450 custom_tier2_domains: Vec::new(),
451 custom_unreliable_domains: Vec::new(),
452 }
453 }
454
455 pub fn min_sources(mut self, count: usize) -> Self {
457 self.config.min_sources = count.max(1);
458 self
459 }
460
461 pub fn max_sources(mut self, count: usize) -> Self {
463 self.config.max_sources = count.max(self.config.min_sources);
464 self
465 }
466
467 pub fn min_tier(mut self, tier: SourceTier) -> Self {
469 self.config.min_source_tier = tier;
470 self
471 }
472
473 pub fn timeout_ms(mut self, ms: u64) -> Self {
475 self.config.fetch_timeout_ms = ms;
476 self
477 }
478
479 pub fn agreement_ratio(mut self, ratio: f64) -> Self {
481 self.config.min_agreement_ratio = ratio.clamp(0.0, 1.0);
482 self
483 }
484
485 pub fn require_https(mut self, require: bool) -> Self {
487 self.config.require_https = require;
488 self
489 }
490
491 pub fn add_tier1_domain(mut self, domain: &str) -> Self {
493 self.custom_tier1_domains.push(domain.to_string());
494 self
495 }
496
497 pub fn add_tier2_domain(mut self, domain: &str) -> Self {
499 self.custom_tier2_domains.push(domain.to_string());
500 self
501 }
502
503 pub fn add_unreliable_domain(mut self, domain: &str) -> Self {
505 self.custom_unreliable_domains.push(domain.to_string());
506 self
507 }
508
509 pub fn build(self) -> TriangulationEngine {
511 let mut engine = TriangulationEngine::new(self.config);
512
513 for domain in self.custom_tier1_domains {
514 engine.classifier_mut().add_tier1_domain(&domain);
515 }
516 for domain in self.custom_tier2_domains {
517 engine.classifier_mut().add_tier2_domain(&domain);
518 }
519 for domain in self.custom_unreliable_domains {
520 engine.classifier_mut().add_unreliable_domain(&domain);
521 }
522
523 engine
524 }
525}
526
527impl Default for TriangulationEngineBuilder {
528 fn default() -> Self {
529 Self::new()
530 }
531}
532
533#[cfg(test)]
534mod tests {
535 use super::*;
536
537 #[test]
538 fn test_config_default() {
539 let config = ResearchConfig::default();
540 assert_eq!(config.min_sources, 3); assert_eq!(config.min_source_tier, SourceTier::Tier2);
542 }
543
544 #[test]
545 fn test_config_strict() {
546 let config = ResearchConfig::strict();
547 assert_eq!(config.min_sources, 5);
548 assert_eq!(config.min_source_tier, SourceTier::Tier1);
549 assert!(config.require_https);
550 }
551
552 #[test]
553 fn test_engine_creation() {
554 let engine = TriangulationEngine::default_engine();
555 assert_eq!(engine.config().min_sources, 3);
556 }
557
558 #[test]
559 fn test_quick_verify_pass() {
560 let engine = TriangulationEngine::default_engine();
561
562 let urls = vec![
563 "https://docs.rs/tokio".to_string(),
564 "https://github.com/rust-lang/rust".to_string(),
565 "https://en.wikipedia.org/wiki/Rust".to_string(),
566 ];
567
568 let (passes, _msg) = engine.quick_verify(&urls);
569 assert!(passes);
570 }
571
572 #[test]
573 fn test_quick_verify_fail_insufficient() {
574 let engine = TriangulationEngine::default_engine();
575
576 let urls = vec![
577 "https://random-blog-123.com/post".to_string(),
578 "https://another-unknown.net/article".to_string(),
579 ];
580
581 let (passes, _msg) = engine.quick_verify(&urls);
582 assert!(!passes);
583 }
584
585 #[test]
586 fn test_check_source() {
587 let engine = TriangulationEngine::default_engine();
588
589 let quality = engine.check_source("https://docs.rs/tokio");
590 assert_eq!(quality.tier, SourceTier::Tier1);
591
592 let quality = engine.check_source("https://randomsite.xyz/page");
593 assert_eq!(quality.tier, SourceTier::Tier3);
594 }
595
596 #[test]
597 fn test_research_with_urls() {
598 let engine = TriangulationEngine::default_engine();
599
600 let urls = vec![
601 "https://docs.rs/tokio".to_string(),
602 "https://github.com/tokio-rs/tokio".to_string(),
603 "https://stackoverflow.com/questions/tokio".to_string(),
604 "https://en.wikipedia.org/wiki/Tokio_(software)".to_string(),
605 ];
606
607 let consensus_snippet = "Tokio is an async runtime for Rust".to_string();
610 let contents = vec![
611 (
612 "https://docs.rs/tokio".to_string(),
613 Some(consensus_snippet.clone()),
614 Some(true),
615 ),
616 (
617 "https://github.com/tokio-rs/tokio".to_string(),
618 Some(consensus_snippet.clone()),
619 Some(true),
620 ),
621 (
622 "https://stackoverflow.com/questions/tokio".to_string(),
623 Some(consensus_snippet.clone()),
624 Some(true),
625 ),
626 (
627 "https://en.wikipedia.org/wiki/Tokio_(software)".to_string(),
628 Some(consensus_snippet.clone()),
629 Some(true),
630 ),
631 ];
632
633 let result =
634 engine.research_with_urls("Is Tokio an async runtime for Rust?", &urls, &contents);
635
636 assert!(result.sources.len() >= 3);
638 assert!(result.confidence > 0.0);
640 assert!(
642 result.status.is_success(),
643 "Expected successful verification status, got {:?}",
644 result.status
645 );
646 }
647
648 #[test]
649 fn test_builder() {
650 let engine = TriangulationEngineBuilder::new()
651 .min_sources(5)
652 .max_sources(15)
653 .min_tier(SourceTier::Tier1)
654 .require_https(true)
655 .add_tier1_domain("mycustomdocs.com")
656 .build();
657
658 assert_eq!(engine.config().min_sources, 5);
659 assert!(engine.config().require_https);
660
661 let quality = engine.check_source("https://mycustomdocs.com/page");
663 assert_eq!(quality.tier, SourceTier::Tier1);
664 }
665
666 #[test]
667 fn test_result_summary() {
668 let engine = TriangulationEngine::default_engine();
669
670 let urls = vec![
671 "https://docs.rs/test".to_string(),
672 "https://github.com/test".to_string(),
673 "https://stackoverflow.com/test".to_string(),
674 ];
675
676 let contents = vec![
677 (
678 "https://docs.rs/test".to_string(),
679 Some("Test content".to_string()),
680 Some(true),
681 ),
682 (
683 "https://github.com/test".to_string(),
684 Some("Test content".to_string()),
685 Some(true),
686 ),
687 (
688 "https://stackoverflow.com/test".to_string(),
689 Some("Test content".to_string()),
690 Some(true),
691 ),
692 ];
693
694 let result = engine.research_with_urls("Test query", &urls, &contents);
695 let summary = result.summary();
696
697 assert!(!summary.is_empty());
698 assert!(summary.contains("sources"));
699 }
700
701 #[test]
702 fn test_detailed_report() {
703 let engine = TriangulationEngine::default_engine();
704
705 let urls = vec![
706 "https://docs.rs/test".to_string(),
707 "https://github.com/test".to_string(),
708 "https://stackoverflow.com/test".to_string(),
709 ];
710
711 let contents = vec![
712 (
713 "https://docs.rs/test".to_string(),
714 Some("Test".to_string()),
715 Some(true),
716 ),
717 (
718 "https://github.com/test".to_string(),
719 Some("Test".to_string()),
720 Some(true),
721 ),
722 (
723 "https://stackoverflow.com/test".to_string(),
724 Some("Test".to_string()),
725 Some(true),
726 ),
727 ];
728
729 let result = engine.research_with_urls("Test query", &urls, &contents);
730 let report = result.detailed_report();
731
732 assert!(report.contains("TRIANGULATED RESEARCH REPORT"));
733 assert!(report.contains("Sources"));
734 assert!(report.contains("Metrics"));
735 }
736}