Skip to main content

reasonkit_web/research/
triangulation.rs

1//! Triangulation Engine
2//!
3//! Core orchestrator for triangulated web research (CONS-006 compliance).
4//!
5//! # Philosophy
6//!
7//! **"Three-Source Rule: No claim without 3+ independent verifications."**
8//!
9//! The triangulation engine orchestrates the full research pipeline:
10//! 1. Query analysis and search generation
11//! 2. Multi-source fetching with tier classification
12//! 3. Content extraction and relevance scoring
13//! 4. Consensus analysis and conflict detection
14//! 5. Final verification result generation
15
16use super::consensus::{Claim, ClaimCategory, ConsensusAnalyzer, ConsensusResult};
17use super::sources::{SourceQuality, SourceTier, TierClassifier};
18use super::verification::{VerificationMetrics, VerificationStatus, VerifiedSource};
19use chrono::{DateTime, Utc};
20use serde::{Deserialize, Serialize};
21use std::collections::HashSet;
22use std::time::Instant;
23use tracing::{debug, info, instrument, warn};
24
25/// Configuration for triangulated research
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct ResearchConfig {
28    /// Minimum number of sources required (CONS-006: 3)
29    pub min_sources: usize,
30    /// Maximum number of sources to fetch
31    pub max_sources: usize,
32    /// Minimum source tier for inclusion (default: Tier2)
33    pub min_source_tier: SourceTier,
34    /// Timeout for each source fetch (milliseconds)
35    pub fetch_timeout_ms: u64,
36    /// Maximum parallel fetches
37    pub max_parallel_fetches: usize,
38    /// Minimum agreement ratio for verification
39    pub min_agreement_ratio: f64,
40    /// Enable caching of verification results
41    pub enable_cache: bool,
42    /// Cache TTL in seconds
43    pub cache_ttl_secs: u64,
44    /// Require HTTPS sources
45    pub require_https: bool,
46    /// Include source snippets in results
47    pub include_snippets: bool,
48    /// Maximum snippet length
49    pub max_snippet_length: usize,
50}
51
52impl Default for ResearchConfig {
53    fn default() -> Self {
54        Self {
55            min_sources: 3, // CONS-006 compliance
56            max_sources: 10,
57            min_source_tier: SourceTier::Tier2,
58            fetch_timeout_ms: 30_000,
59            max_parallel_fetches: 5,
60            min_agreement_ratio: 0.7,
61            enable_cache: true,
62            cache_ttl_secs: 3600, // 1 hour
63            require_https: false, // Allow HTTP for broader coverage
64            include_snippets: true,
65            max_snippet_length: 500,
66        }
67    }
68}
69
70impl ResearchConfig {
71    /// Create a strict configuration (highest quality)
72    pub fn strict() -> Self {
73        Self {
74            min_sources: 5,
75            max_sources: 15,
76            min_source_tier: SourceTier::Tier1,
77            min_agreement_ratio: 0.8,
78            require_https: true,
79            ..Default::default()
80        }
81    }
82
83    /// Create a balanced configuration
84    pub fn balanced() -> Self {
85        Self::default()
86    }
87
88    /// Create a permissive configuration (faster, less rigorous)
89    pub fn permissive() -> Self {
90        Self {
91            min_sources: 2,
92            max_sources: 5,
93            min_source_tier: SourceTier::Tier3,
94            min_agreement_ratio: 0.6,
95            fetch_timeout_ms: 15_000,
96            ..Default::default()
97        }
98    }
99}
100
101/// Final research result
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct ResearchResult {
104    /// Original query/claim
105    pub query: String,
106    /// Verification status
107    pub status: VerificationStatus,
108    /// Overall confidence (0.0 - 1.0)
109    pub confidence: f64,
110    /// Verified sources used
111    pub sources: Vec<VerifiedSource>,
112    /// Consensus analysis result
113    pub consensus: ConsensusResult,
114    /// Verification metrics
115    pub metrics: VerificationMetrics,
116    /// When the research was conducted
117    pub timestamp: DateTime<Utc>,
118    /// Research duration in milliseconds
119    pub duration_ms: u64,
120    /// Configuration used
121    pub config_used: ResearchConfig,
122    /// Warnings or notes
123    pub warnings: Vec<String>,
124}
125
126impl ResearchResult {
127    /// Check if the result is considered verified
128    pub fn is_verified(&self) -> bool {
129        self.status.is_success()
130    }
131
132    /// Check if there are problems with the result
133    pub fn has_problems(&self) -> bool {
134        self.status.is_problem() || !self.warnings.is_empty()
135    }
136
137    /// Get a short summary
138    pub fn summary(&self) -> String {
139        format!(
140            "{} {} - {} sources, {:.0}% confidence, {}ms",
141            self.status.emoji(),
142            self.status.description(),
143            self.sources.len(),
144            self.confidence * 100.0,
145            self.duration_ms
146        )
147    }
148
149    /// Get detailed report
150    pub fn detailed_report(&self) -> String {
151        let mut report = String::new();
152
153        report.push_str("=== TRIANGULATED RESEARCH REPORT ===\n\n");
154        report.push_str(&format!("Query: {}\n", self.query));
155        report.push_str(&format!(
156            "Status: {} {}\n",
157            self.status.emoji(),
158            self.status.description()
159        ));
160        report.push_str(&format!("Confidence: {:.1}%\n", self.confidence * 100.0));
161        report.push_str(&format!("Duration: {}ms\n\n", self.duration_ms));
162
163        report.push_str("--- Sources ---\n");
164        for (i, source) in self.sources.iter().enumerate() {
165            let tier_label = match source.quality.tier {
166                SourceTier::Tier1 => "[T1]",
167                SourceTier::Tier2 => "[T2]",
168                SourceTier::Tier3 => "[T3]",
169                SourceTier::Unknown => "[??]",
170            };
171            let support = match source.supports_claim {
172                Some(true) => "\u{2705}",
173                Some(false) => "\u{274c}",
174                None => "\u{2796}",
175            };
176            report.push_str(&format!(
177                "{}. {} {} {}\n",
178                i + 1,
179                tier_label,
180                support,
181                source.url
182            ));
183        }
184
185        report.push_str("\n--- Metrics ---\n");
186        report.push_str(&format!("Total sources: {}\n", self.metrics.total_sources));
187        report.push_str(&format!(
188            "Accessible: {}\n",
189            self.metrics.accessible_sources
190        ));
191        report.push_str(&format!("Tier 1: {}\n", self.metrics.tier1_count));
192        report.push_str(&format!("Tier 2: {}\n", self.metrics.tier2_count));
193        report.push_str(&format!("Tier 3: {}\n", self.metrics.tier3_count));
194        report.push_str(&format!(
195            "Supporting: {}\n",
196            self.metrics.supporting_sources
197        ));
198        report.push_str(&format!("Refuting: {}\n", self.metrics.refuting_sources));
199
200        if !self.consensus.discrepancies.is_empty() {
201            report.push_str("\n--- Discrepancies ---\n");
202            for disc in &self.consensus.discrepancies {
203                report.push_str(&format!(
204                    "- {} (severity: {:.1})\n",
205                    disc.aspect, disc.severity
206                ));
207            }
208        }
209
210        if !self.warnings.is_empty() {
211            report.push_str("\n--- Warnings ---\n");
212            for warn in &self.warnings {
213                report.push_str(&format!("! {}\n", warn));
214            }
215        }
216
217        report.push_str("\n--- Consensus ---\n");
218        report.push_str(&self.consensus.summary);
219        report.push('\n');
220
221        report
222    }
223}
224
225/// The Triangulation Engine
226///
227/// Orchestrates triangulated web research with 3+ source verification.
228pub struct TriangulationEngine {
229    /// Configuration
230    config: ResearchConfig,
231    /// Source tier classifier
232    classifier: TierClassifier,
233    /// Consensus analyzer
234    consensus_analyzer: ConsensusAnalyzer,
235}
236
237impl TriangulationEngine {
238    /// Create a new triangulation engine with default config
239    pub fn new(config: ResearchConfig) -> Self {
240        let consensus_analyzer = ConsensusAnalyzer::new()
241            .with_min_agreement(config.min_agreement_ratio)
242            .with_min_sources(config.min_sources);
243
244        Self {
245            config,
246            classifier: TierClassifier::new(),
247            consensus_analyzer,
248        }
249    }
250
251    /// Create with default configuration
252    pub fn default_engine() -> Self {
253        Self::new(ResearchConfig::default())
254    }
255
256    /// Create with strict configuration
257    pub fn strict_engine() -> Self {
258        Self::new(ResearchConfig::strict())
259    }
260
261    /// Get current configuration
262    pub fn config(&self) -> &ResearchConfig {
263        &self.config
264    }
265
266    /// Get mutable reference to classifier for customization
267    pub fn classifier_mut(&mut self) -> &mut TierClassifier {
268        &mut self.classifier
269    }
270
271    /// Perform triangulated research on a query/claim
272    ///
273    /// This is the main entry point. It:
274    /// 1. Analyzes the query
275    /// 2. Classifies provided source URLs by tier
276    /// 3. Filters sources by minimum tier
277    /// 4. Analyzes consensus
278    /// 5. Returns comprehensive verification result
279    #[instrument(skip(self, source_urls))]
280    pub fn research_with_urls(
281        &self,
282        query: &str,
283        source_urls: &[String],
284        source_contents: &[(String, Option<String>, Option<bool>)], // (url, snippet, supports)
285    ) -> ResearchResult {
286        let start = Instant::now();
287        let mut warnings = Vec::new();
288
289        info!(query = %query, source_count = %source_urls.len(), "Starting triangulated research");
290
291        // Step 1: Classify all sources
292        let classified = self.classifier.classify_multiple(source_urls);
293
294        // Step 2: Filter by minimum tier and create VerifiedSource objects
295        let mut verified_sources: Vec<VerifiedSource> = Vec::new();
296        let mut seen_domains: HashSet<String> = HashSet::new();
297
298        for (url, quality) in classified {
299            // Check minimum tier
300            if !quality.tier.meets_minimum(self.config.min_source_tier) {
301                debug!(url = %url, tier = ?quality.tier, "Source below minimum tier, skipping");
302                continue;
303            }
304
305            // Require HTTPS if configured
306            if self.config.require_https && !quality.has_https {
307                debug!(url = %url, "Source not HTTPS, skipping");
308                warnings.push(format!("Skipped non-HTTPS source: {}", url));
309                continue;
310            }
311
312            // Check for domain uniqueness (we want independent sources)
313            if seen_domains.contains(&quality.domain) {
314                debug!(url = %url, domain = %quality.domain, "Duplicate domain, skipping");
315                continue;
316            }
317            seen_domains.insert(quality.domain.clone());
318
319            // Find content for this source
320            let content_info = source_contents.iter().find(|(u, _, _)| u == &url);
321
322            let mut source = VerifiedSource::new(url.clone(), quality);
323            source.http_status = Some(200); // Assume success if provided
324
325            if let Some((_, snippet, supports)) = content_info {
326                source.content_snippet = snippet.clone().map(|s| {
327                    if s.len() > self.config.max_snippet_length {
328                        format!("{}...", &s[..self.config.max_snippet_length])
329                    } else {
330                        s
331                    }
332                });
333                source.supports_claim = *supports;
334                source.relevance_score = if supports.is_some() { 0.8 } else { 0.5 };
335            }
336
337            verified_sources.push(source);
338
339            // Stop if we have enough sources
340            if verified_sources.len() >= self.config.max_sources {
341                break;
342            }
343        }
344
345        // Step 3: Check if we have enough sources
346        if verified_sources.len() < self.config.min_sources {
347            warnings.push(format!(
348                "Insufficient sources: {} found, {} required (CONS-006 violation)",
349                verified_sources.len(),
350                self.config.min_sources
351            ));
352        }
353
354        // Step 4: Build claim for consensus analysis
355        let claim = Claim {
356            text: query.to_string(),
357            normalized: super::consensus::normalize_text(query),
358            entities: Vec::new(), // Would extract with NLP in production
359            keywords: super::consensus::extract_keywords(query),
360            category: Some(ClaimCategory::Factual), // Default
361        };
362
363        // Step 5: Analyze consensus
364        let consensus = self.consensus_analyzer.analyze(claim, &verified_sources);
365
366        // Step 6: Calculate metrics
367        let duration_ms = start.elapsed().as_millis() as u64;
368        let metrics = VerificationMetrics::from_sources(&verified_sources, duration_ms);
369
370        // Step 7: Determine final status
371        let status = if verified_sources.len() < self.config.min_sources {
372            VerificationStatus::Unverified
373        } else {
374            self.consensus_analyzer.to_verification_status(&consensus)
375        };
376
377        // Step 8: Calculate confidence
378        let confidence = if verified_sources.is_empty() {
379            0.0
380        } else {
381            consensus.confidence
382                * (verified_sources.len() as f64 / self.config.min_sources as f64).min(1.0)
383        };
384
385        info!(
386            status = ?status,
387            confidence = %confidence,
388            sources = %verified_sources.len(),
389            duration_ms = %duration_ms,
390            "Research complete"
391        );
392
393        ResearchResult {
394            query: query.to_string(),
395            status,
396            confidence,
397            sources: verified_sources,
398            consensus,
399            metrics,
400            timestamp: Utc::now(),
401            duration_ms,
402            config_used: self.config.clone(),
403            warnings,
404        }
405    }
406
407    /// Quick verification: just classify URLs and check triangulation requirement
408    pub fn quick_verify(&self, urls: &[String]) -> (bool, String) {
409        let classified = self.classifier.classify_multiple(urls);
410        let qualities: Vec<SourceQuality> = classified.into_iter().map(|(_, q)| q).collect();
411
412        self.classifier.meets_triangulation_requirement(
413            &qualities,
414            self.config.min_sources,
415            self.config.min_source_tier,
416        )
417    }
418
419    /// Check if a single URL meets tier requirements
420    pub fn check_source(&self, url: &str) -> SourceQuality {
421        self.classifier.classify(url)
422    }
423
424    /// Get source tier for a URL
425    pub fn get_tier(&self, url: &str) -> SourceTier {
426        self.classifier.classify(url).tier
427    }
428}
429
430impl Default for TriangulationEngine {
431    fn default() -> Self {
432        Self::default_engine()
433    }
434}
435
436/// Builder for TriangulationEngine
437pub struct TriangulationEngineBuilder {
438    config: ResearchConfig,
439    custom_tier1_domains: Vec<String>,
440    custom_tier2_domains: Vec<String>,
441    custom_unreliable_domains: Vec<String>,
442}
443
444impl TriangulationEngineBuilder {
445    /// Create a new builder
446    pub fn new() -> Self {
447        Self {
448            config: ResearchConfig::default(),
449            custom_tier1_domains: Vec::new(),
450            custom_tier2_domains: Vec::new(),
451            custom_unreliable_domains: Vec::new(),
452        }
453    }
454
455    /// Set minimum sources
456    pub fn min_sources(mut self, count: usize) -> Self {
457        self.config.min_sources = count.max(1);
458        self
459    }
460
461    /// Set maximum sources
462    pub fn max_sources(mut self, count: usize) -> Self {
463        self.config.max_sources = count.max(self.config.min_sources);
464        self
465    }
466
467    /// Set minimum source tier
468    pub fn min_tier(mut self, tier: SourceTier) -> Self {
469        self.config.min_source_tier = tier;
470        self
471    }
472
473    /// Set fetch timeout
474    pub fn timeout_ms(mut self, ms: u64) -> Self {
475        self.config.fetch_timeout_ms = ms;
476        self
477    }
478
479    /// Set agreement ratio
480    pub fn agreement_ratio(mut self, ratio: f64) -> Self {
481        self.config.min_agreement_ratio = ratio.clamp(0.0, 1.0);
482        self
483    }
484
485    /// Require HTTPS
486    pub fn require_https(mut self, require: bool) -> Self {
487        self.config.require_https = require;
488        self
489    }
490
491    /// Add custom Tier 1 domain
492    pub fn add_tier1_domain(mut self, domain: &str) -> Self {
493        self.custom_tier1_domains.push(domain.to_string());
494        self
495    }
496
497    /// Add custom Tier 2 domain
498    pub fn add_tier2_domain(mut self, domain: &str) -> Self {
499        self.custom_tier2_domains.push(domain.to_string());
500        self
501    }
502
503    /// Add custom unreliable domain
504    pub fn add_unreliable_domain(mut self, domain: &str) -> Self {
505        self.custom_unreliable_domains.push(domain.to_string());
506        self
507    }
508
509    /// Build the engine
510    pub fn build(self) -> TriangulationEngine {
511        let mut engine = TriangulationEngine::new(self.config);
512
513        for domain in self.custom_tier1_domains {
514            engine.classifier_mut().add_tier1_domain(&domain);
515        }
516        for domain in self.custom_tier2_domains {
517            engine.classifier_mut().add_tier2_domain(&domain);
518        }
519        for domain in self.custom_unreliable_domains {
520            engine.classifier_mut().add_unreliable_domain(&domain);
521        }
522
523        engine
524    }
525}
526
527impl Default for TriangulationEngineBuilder {
528    fn default() -> Self {
529        Self::new()
530    }
531}
532
533#[cfg(test)]
534mod tests {
535    use super::*;
536
537    #[test]
538    fn test_config_default() {
539        let config = ResearchConfig::default();
540        assert_eq!(config.min_sources, 3); // CONS-006
541        assert_eq!(config.min_source_tier, SourceTier::Tier2);
542    }
543
544    #[test]
545    fn test_config_strict() {
546        let config = ResearchConfig::strict();
547        assert_eq!(config.min_sources, 5);
548        assert_eq!(config.min_source_tier, SourceTier::Tier1);
549        assert!(config.require_https);
550    }
551
552    #[test]
553    fn test_engine_creation() {
554        let engine = TriangulationEngine::default_engine();
555        assert_eq!(engine.config().min_sources, 3);
556    }
557
558    #[test]
559    fn test_quick_verify_pass() {
560        let engine = TriangulationEngine::default_engine();
561
562        let urls = vec![
563            "https://docs.rs/tokio".to_string(),
564            "https://github.com/rust-lang/rust".to_string(),
565            "https://en.wikipedia.org/wiki/Rust".to_string(),
566        ];
567
568        let (passes, _msg) = engine.quick_verify(&urls);
569        assert!(passes);
570    }
571
572    #[test]
573    fn test_quick_verify_fail_insufficient() {
574        let engine = TriangulationEngine::default_engine();
575
576        let urls = vec![
577            "https://random-blog-123.com/post".to_string(),
578            "https://another-unknown.net/article".to_string(),
579        ];
580
581        let (passes, _msg) = engine.quick_verify(&urls);
582        assert!(!passes);
583    }
584
585    #[test]
586    fn test_check_source() {
587        let engine = TriangulationEngine::default_engine();
588
589        let quality = engine.check_source("https://docs.rs/tokio");
590        assert_eq!(quality.tier, SourceTier::Tier1);
591
592        let quality = engine.check_source("https://randomsite.xyz/page");
593        assert_eq!(quality.tier, SourceTier::Tier3);
594    }
595
596    #[test]
597    fn test_research_with_urls() {
598        let engine = TriangulationEngine::default_engine();
599
600        let urls = vec![
601            "https://docs.rs/tokio".to_string(),
602            "https://github.com/tokio-rs/tokio".to_string(),
603            "https://stackoverflow.com/questions/tokio".to_string(),
604            "https://en.wikipedia.org/wiki/Tokio_(software)".to_string(),
605        ];
606
607        // Use consistent snippet text to simulate true consensus
608        // (different snippets trigger "discrepancy" detection which blocks Supported status)
609        let consensus_snippet = "Tokio is an async runtime for Rust".to_string();
610        let contents = vec![
611            (
612                "https://docs.rs/tokio".to_string(),
613                Some(consensus_snippet.clone()),
614                Some(true),
615            ),
616            (
617                "https://github.com/tokio-rs/tokio".to_string(),
618                Some(consensus_snippet.clone()),
619                Some(true),
620            ),
621            (
622                "https://stackoverflow.com/questions/tokio".to_string(),
623                Some(consensus_snippet.clone()),
624                Some(true),
625            ),
626            (
627                "https://en.wikipedia.org/wiki/Tokio_(software)".to_string(),
628                Some(consensus_snippet.clone()),
629                Some(true),
630            ),
631        ];
632
633        let result =
634            engine.research_with_urls("Is Tokio an async runtime for Rust?", &urls, &contents);
635
636        // Should have at least 3 sources (CONS-006 requirement)
637        assert!(result.sources.len() >= 3);
638        // Should have some confidence (not zero)
639        assert!(result.confidence > 0.0);
640        // Verification status should indicate success since all sources support the claim
641        assert!(
642            result.status.is_success(),
643            "Expected successful verification status, got {:?}",
644            result.status
645        );
646    }
647
648    #[test]
649    fn test_builder() {
650        let engine = TriangulationEngineBuilder::new()
651            .min_sources(5)
652            .max_sources(15)
653            .min_tier(SourceTier::Tier1)
654            .require_https(true)
655            .add_tier1_domain("mycustomdocs.com")
656            .build();
657
658        assert_eq!(engine.config().min_sources, 5);
659        assert!(engine.config().require_https);
660
661        // Custom domain should be Tier 1
662        let quality = engine.check_source("https://mycustomdocs.com/page");
663        assert_eq!(quality.tier, SourceTier::Tier1);
664    }
665
666    #[test]
667    fn test_result_summary() {
668        let engine = TriangulationEngine::default_engine();
669
670        let urls = vec![
671            "https://docs.rs/test".to_string(),
672            "https://github.com/test".to_string(),
673            "https://stackoverflow.com/test".to_string(),
674        ];
675
676        let contents = vec![
677            (
678                "https://docs.rs/test".to_string(),
679                Some("Test content".to_string()),
680                Some(true),
681            ),
682            (
683                "https://github.com/test".to_string(),
684                Some("Test content".to_string()),
685                Some(true),
686            ),
687            (
688                "https://stackoverflow.com/test".to_string(),
689                Some("Test content".to_string()),
690                Some(true),
691            ),
692        ];
693
694        let result = engine.research_with_urls("Test query", &urls, &contents);
695        let summary = result.summary();
696
697        assert!(!summary.is_empty());
698        assert!(summary.contains("sources"));
699    }
700
701    #[test]
702    fn test_detailed_report() {
703        let engine = TriangulationEngine::default_engine();
704
705        let urls = vec![
706            "https://docs.rs/test".to_string(),
707            "https://github.com/test".to_string(),
708            "https://stackoverflow.com/test".to_string(),
709        ];
710
711        let contents = vec![
712            (
713                "https://docs.rs/test".to_string(),
714                Some("Test".to_string()),
715                Some(true),
716            ),
717            (
718                "https://github.com/test".to_string(),
719                Some("Test".to_string()),
720                Some(true),
721            ),
722            (
723                "https://stackoverflow.com/test".to_string(),
724                Some("Test".to_string()),
725                Some(true),
726            ),
727        ];
728
729        let result = engine.research_with_urls("Test query", &urls, &contents);
730        let report = result.detailed_report();
731
732        assert!(report.contains("TRIANGULATED RESEARCH REPORT"));
733        assert!(report.contains("Sources"));
734        assert!(report.contains("Metrics"));
735    }
736}