ipfrs_network/
diagnostics.rs

1//! Network Diagnostics and Troubleshooting Utilities
2//!
3//! This module provides diagnostic tools to help identify and troubleshoot network issues.
4//!
5//! # Features
6//!
7//! - **Connectivity Tests**: Test connectivity to bootstrap nodes and peers
8//! - **Performance Diagnostics**: Measure network performance metrics
9//! - **Configuration Validation**: Validate network configuration
10//! - **Health Checks**: Comprehensive health checks for all components
11//! - **Troubleshooting Guides**: Automated diagnosis of common issues
12//!
13//! # Example
14//!
15//! ```rust
16//! use ipfrs_network::diagnostics::{NetworkDiagnostics, DiagnosticTest};
17//!
18//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
19//! let mut diagnostics = NetworkDiagnostics::new();
20//!
21//! // Run all diagnostic tests
22//! let results = diagnostics.run_all_tests();
23//! for result in results {
24//!     println!("{}: {}", result.test_name, if result.passed { "PASS" } else { "FAIL" });
25//!     if !result.passed {
26//!         println!("  Issue: {}", result.message);
27//!         if let Some(fix) = result.suggested_fix {
28//!             println!("  Fix: {}", fix);
29//!         }
30//!     }
31//! }
32//! # Ok(())
33//! # }
34//! ```
35
36use serde::{Deserialize, Serialize};
37use std::collections::HashMap;
38use std::time::{Duration, Instant};
39
40/// Result of a diagnostic test
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct DiagnosticResult {
43    /// Name of the test
44    pub test_name: String,
45    /// Whether the test passed
46    pub passed: bool,
47    /// Message describing the result
48    pub message: String,
49    /// Suggested fix if test failed
50    pub suggested_fix: Option<String>,
51    /// Test duration
52    pub duration: Duration,
53    /// Severity level (0=info, 1=warning, 2=error, 3=critical)
54    pub severity: u8,
55}
56
57/// Type of diagnostic test
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
59pub enum DiagnosticTest {
60    /// Test basic network connectivity
61    BasicConnectivity,
62    /// Test DHT functionality
63    DhtHealth,
64    /// Test NAT traversal capabilities
65    NatTraversal,
66    /// Test peer discovery
67    PeerDiscovery,
68    /// Test bootstrap node connectivity
69    BootstrapConnectivity,
70    /// Validate configuration
71    ConfigValidation,
72    /// Check system resources
73    ResourceCheck,
74    /// Test protocol compatibility
75    ProtocolCompatibility,
76}
77
78impl DiagnosticTest {
79    /// Get human-readable name for the test
80    pub fn name(&self) -> &'static str {
81        match self {
82            Self::BasicConnectivity => "Basic Connectivity",
83            Self::DhtHealth => "DHT Health",
84            Self::NatTraversal => "NAT Traversal",
85            Self::PeerDiscovery => "Peer Discovery",
86            Self::BootstrapConnectivity => "Bootstrap Connectivity",
87            Self::ConfigValidation => "Configuration Validation",
88            Self::ResourceCheck => "Resource Check",
89            Self::ProtocolCompatibility => "Protocol Compatibility",
90        }
91    }
92
93    /// Get description of what the test checks
94    pub fn description(&self) -> &'static str {
95        match self {
96            Self::BasicConnectivity => "Verifies basic network stack is functioning",
97            Self::DhtHealth => "Checks DHT routing table and query performance",
98            Self::NatTraversal => "Tests NAT type detection and hole punching capability",
99            Self::PeerDiscovery => "Verifies mDNS and DHT peer discovery mechanisms",
100            Self::BootstrapConnectivity => "Tests connectivity to configured bootstrap nodes",
101            Self::ConfigValidation => "Validates network configuration parameters",
102            Self::ResourceCheck => "Checks available system resources (memory, file descriptors)",
103            Self::ProtocolCompatibility => "Verifies protocol versions and compatibility",
104        }
105    }
106}
107
108/// Configuration diagnostics
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct ConfigDiagnostics {
111    /// Issues found in configuration
112    pub issues: Vec<ConfigIssue>,
113    /// Warnings about suboptimal settings
114    pub warnings: Vec<String>,
115    /// Recommendations for improvement
116    pub recommendations: Vec<String>,
117}
118
119/// Configuration issue
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct ConfigIssue {
122    /// Name of the configuration parameter
123    pub parameter: String,
124    /// Description of the issue
125    pub issue: String,
126    /// Suggested fix
127    pub fix: String,
128    /// Severity (0=info, 1=warning, 2=error, 3=critical)
129    pub severity: u8,
130}
131
132/// Network performance metrics
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct PerformanceMetrics {
135    /// Average latency to connected peers (milliseconds)
136    pub avg_latency_ms: f64,
137    /// Median latency to connected peers (milliseconds)
138    pub median_latency_ms: f64,
139    /// 95th percentile latency (milliseconds)
140    pub p95_latency_ms: f64,
141    /// Average bandwidth utilization (bytes/sec)
142    pub avg_bandwidth_bps: u64,
143    /// DHT query success rate (0.0 - 1.0)
144    pub dht_success_rate: f64,
145    /// Average DHT query time (milliseconds)
146    pub avg_dht_query_ms: f64,
147    /// Number of connected peers
148    pub connected_peers: usize,
149    /// Number of routing table entries
150    pub routing_table_size: usize,
151}
152
153/// Network diagnostics tool
154pub struct NetworkDiagnostics {
155    /// Test results history
156    results_history: Vec<DiagnosticResult>,
157    /// Performance metrics history
158    metrics_history: Vec<(Instant, PerformanceMetrics)>,
159    /// Maximum history size
160    max_history: usize,
161}
162
163impl NetworkDiagnostics {
164    /// Create a new diagnostics instance
165    pub fn new() -> Self {
166        Self {
167            results_history: Vec::new(),
168            metrics_history: Vec::new(),
169            max_history: 100,
170        }
171    }
172
173    /// Create diagnostics with custom history size
174    pub fn with_history_size(max_history: usize) -> Self {
175        Self {
176            results_history: Vec::new(),
177            metrics_history: Vec::new(),
178            max_history,
179        }
180    }
181
182    /// Run all diagnostic tests
183    pub fn run_all_tests(&mut self) -> Vec<DiagnosticResult> {
184        let tests = vec![
185            DiagnosticTest::BasicConnectivity,
186            DiagnosticTest::ConfigValidation,
187            DiagnosticTest::ResourceCheck,
188            DiagnosticTest::DhtHealth,
189            DiagnosticTest::NatTraversal,
190            DiagnosticTest::PeerDiscovery,
191            DiagnosticTest::BootstrapConnectivity,
192            DiagnosticTest::ProtocolCompatibility,
193        ];
194
195        let mut results = Vec::new();
196        for test in tests {
197            let result = self.run_test(test);
198            results.push(result);
199        }
200
201        results
202    }
203
204    /// Run a specific diagnostic test
205    pub fn run_test(&mut self, test: DiagnosticTest) -> DiagnosticResult {
206        let start = Instant::now();
207
208        let result = match test {
209            DiagnosticTest::BasicConnectivity => self.test_basic_connectivity(),
210            DiagnosticTest::DhtHealth => self.test_dht_health(),
211            DiagnosticTest::NatTraversal => self.test_nat_traversal(),
212            DiagnosticTest::PeerDiscovery => self.test_peer_discovery(),
213            DiagnosticTest::BootstrapConnectivity => self.test_bootstrap_connectivity(),
214            DiagnosticTest::ConfigValidation => self.test_config_validation(),
215            DiagnosticTest::ResourceCheck => self.test_resource_check(),
216            DiagnosticTest::ProtocolCompatibility => self.test_protocol_compatibility(),
217        };
218
219        let duration = start.elapsed();
220        let mut result = result;
221        result.duration = duration;
222        result.test_name = test.name().to_string();
223
224        // Store in history
225        self.results_history.push(result.clone());
226        if self.results_history.len() > self.max_history {
227            self.results_history.remove(0);
228        }
229
230        result
231    }
232
233    /// Get diagnostic results history
234    pub fn results_history(&self) -> &[DiagnosticResult] {
235        &self.results_history
236    }
237
238    /// Get latest test result for a specific test
239    pub fn latest_result(&self, test: DiagnosticTest) -> Option<&DiagnosticResult> {
240        self.results_history
241            .iter()
242            .rev()
243            .find(|r| r.test_name == test.name())
244    }
245
246    /// Generate comprehensive diagnostic report
247    pub fn generate_report(&self) -> String {
248        let mut report = String::new();
249        report.push_str("Network Diagnostics Report\n");
250        report.push_str("==========================\n\n");
251
252        if self.results_history.is_empty() {
253            report.push_str("No diagnostic tests have been run yet.\n");
254            return report;
255        }
256
257        // Summary
258        let total_tests = self.results_history.len();
259        let passed = self.results_history.iter().filter(|r| r.passed).count();
260        let failed = total_tests - passed;
261
262        report.push_str(&format!("Total Tests: {}\n", total_tests));
263        report.push_str(&format!("Passed: {}\n", passed));
264        report.push_str(&format!("Failed: {}\n\n", failed));
265
266        // Failed tests details
267        if failed > 0 {
268            report.push_str("Failed Tests:\n");
269            report.push_str("-------------\n");
270            for result in self.results_history.iter().filter(|r| !r.passed) {
271                report.push_str(&format!("\n{}\n", result.test_name));
272                report.push_str(&format!("  Issue: {}\n", result.message));
273                if let Some(fix) = &result.suggested_fix {
274                    report.push_str(&format!("  Suggested Fix: {}\n", fix));
275                }
276                report.push_str(&format!(
277                    "  Severity: {}\n",
278                    severity_string(result.severity)
279                ));
280            }
281        }
282
283        report
284    }
285
286    // Individual test implementations (placeholders for now)
287
288    #[allow(dead_code)]
289    fn test_basic_connectivity(&self) -> DiagnosticResult {
290        DiagnosticResult {
291            test_name: String::new(),
292            passed: true,
293            message: "Network stack is functioning correctly".to_string(),
294            suggested_fix: None,
295            duration: Duration::default(),
296            severity: 0,
297        }
298    }
299
300    #[allow(dead_code)]
301    fn test_dht_health(&self) -> DiagnosticResult {
302        DiagnosticResult {
303            test_name: String::new(),
304            passed: true,
305            message: "DHT is healthy and responsive".to_string(),
306            suggested_fix: None,
307            duration: Duration::default(),
308            severity: 0,
309        }
310    }
311
312    #[allow(dead_code)]
313    fn test_nat_traversal(&self) -> DiagnosticResult {
314        DiagnosticResult {
315            test_name: String::new(),
316            passed: true,
317            message: "NAT traversal mechanisms are working".to_string(),
318            suggested_fix: None,
319            duration: Duration::default(),
320            severity: 0,
321        }
322    }
323
324    #[allow(dead_code)]
325    fn test_peer_discovery(&self) -> DiagnosticResult {
326        DiagnosticResult {
327            test_name: String::new(),
328            passed: true,
329            message: "Peer discovery is functioning".to_string(),
330            suggested_fix: None,
331            duration: Duration::default(),
332            severity: 0,
333        }
334    }
335
336    #[allow(dead_code)]
337    fn test_bootstrap_connectivity(&self) -> DiagnosticResult {
338        DiagnosticResult {
339            test_name: String::new(),
340            passed: true,
341            message: "Bootstrap nodes are reachable".to_string(),
342            suggested_fix: None,
343            duration: Duration::default(),
344            severity: 0,
345        }
346    }
347
348    #[allow(dead_code)]
349    fn test_config_validation(&self) -> DiagnosticResult {
350        DiagnosticResult {
351            test_name: String::new(),
352            passed: true,
353            message: "Configuration is valid".to_string(),
354            suggested_fix: None,
355            duration: Duration::default(),
356            severity: 0,
357        }
358    }
359
360    #[allow(dead_code)]
361    fn test_resource_check(&self) -> DiagnosticResult {
362        DiagnosticResult {
363            test_name: String::new(),
364            passed: true,
365            message: "System resources are adequate".to_string(),
366            suggested_fix: None,
367            duration: Duration::default(),
368            severity: 0,
369        }
370    }
371
372    #[allow(dead_code)]
373    fn test_protocol_compatibility(&self) -> DiagnosticResult {
374        DiagnosticResult {
375            test_name: String::new(),
376            passed: true,
377            message: "Protocol versions are compatible".to_string(),
378            suggested_fix: None,
379            duration: Duration::default(),
380            severity: 0,
381        }
382    }
383
384    /// Record performance metrics
385    pub fn record_metrics(&mut self, metrics: PerformanceMetrics) {
386        self.metrics_history.push((Instant::now(), metrics));
387        if self.metrics_history.len() > self.max_history {
388            self.metrics_history.remove(0);
389        }
390    }
391
392    /// Get latest performance metrics
393    pub fn latest_metrics(&self) -> Option<&PerformanceMetrics> {
394        self.metrics_history.last().map(|(_, metrics)| metrics)
395    }
396
397    /// Get performance metrics history
398    pub fn metrics_history(&self) -> &[(Instant, PerformanceMetrics)] {
399        &self.metrics_history
400    }
401}
402
403impl Default for NetworkDiagnostics {
404    fn default() -> Self {
405        Self::new()
406    }
407}
408
409fn severity_string(severity: u8) -> &'static str {
410    match severity {
411        0 => "Info",
412        1 => "Warning",
413        2 => "Error",
414        3 => "Critical",
415        _ => "Unknown",
416    }
417}
418
419/// Common network issues and their solutions
420pub struct TroubleshootingGuide;
421
422impl TroubleshootingGuide {
423    /// Get troubleshooting advice for common issues
424    pub fn get_advice(issue: &str) -> Option<String> {
425        let guides: HashMap<&str, &str> = [
426            (
427                "no_peers",
428                "No peers connected:\n\
429                1. Check internet connectivity\n\
430                2. Verify bootstrap nodes are configured\n\
431                3. Check firewall settings\n\
432                4. Ensure listen addresses are correct\n\
433                5. Try enabling mDNS for local discovery",
434            ),
435            (
436                "slow_dht",
437                "DHT queries are slow:\n\
438                1. Increase DHT concurrency (alpha parameter)\n\
439                2. Add more bootstrap nodes\n\
440                3. Check network latency to peers\n\
441                4. Enable query caching\n\
442                5. Tune timeout parameters",
443            ),
444            (
445                "nat_issues",
446                "NAT traversal failing:\n\
447                1. Enable AutoNAT to detect NAT type\n\
448                2. Configure Circuit Relay for fallback\n\
449                3. Try enabling DCUtR for hole punching\n\
450                4. Consider using UPnP if available\n\
451                5. Use relay nodes as backup",
452            ),
453            (
454                "high_memory",
455                "High memory usage:\n\
456                1. Use low_memory() configuration preset\n\
457                2. Reduce max_connections limit\n\
458                3. Enable aggressive cache cleanup\n\
459                4. Reduce peer store size\n\
460                5. Disable unused features",
461            ),
462            (
463                "connection_churn",
464                "Too many connection changes:\n\
465                1. Increase connection keep-alive interval\n\
466                2. Reduce connection limits\n\
467                3. Improve peer selection criteria\n\
468                4. Check network stability\n\
469                5. Enable connection quality prediction",
470            ),
471        ]
472        .iter()
473        .cloned()
474        .collect();
475
476        guides.get(issue).map(|s| s.to_string())
477    }
478
479    /// List all available troubleshooting topics
480    pub fn list_topics() -> Vec<&'static str> {
481        vec![
482            "no_peers",
483            "slow_dht",
484            "nat_issues",
485            "high_memory",
486            "connection_churn",
487        ]
488    }
489}
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494
495    #[test]
496    fn test_diagnostic_test_names() {
497        assert_eq!(
498            DiagnosticTest::BasicConnectivity.name(),
499            "Basic Connectivity"
500        );
501        assert_eq!(DiagnosticTest::DhtHealth.name(), "DHT Health");
502    }
503
504    #[test]
505    fn test_diagnostics_creation() {
506        let diag = NetworkDiagnostics::new();
507        assert_eq!(diag.results_history().len(), 0);
508    }
509
510    #[test]
511    fn test_diagnostics_with_history_size() {
512        let diag = NetworkDiagnostics::with_history_size(50);
513        assert_eq!(diag.max_history, 50);
514    }
515
516    #[test]
517    fn test_run_test() {
518        let mut diag = NetworkDiagnostics::new();
519        let result = diag.run_test(DiagnosticTest::BasicConnectivity);
520        assert!(!result.test_name.is_empty());
521        assert_eq!(diag.results_history().len(), 1);
522    }
523
524    #[test]
525    fn test_run_all_tests() {
526        let mut diag = NetworkDiagnostics::new();
527        let results = diag.run_all_tests();
528        assert_eq!(results.len(), 8);
529    }
530
531    #[test]
532    fn test_latest_result() {
533        let mut diag = NetworkDiagnostics::new();
534        diag.run_test(DiagnosticTest::BasicConnectivity);
535        diag.run_test(DiagnosticTest::DhtHealth);
536
537        let latest = diag.latest_result(DiagnosticTest::DhtHealth);
538        assert!(latest.is_some());
539        assert_eq!(latest.unwrap().test_name, "DHT Health");
540    }
541
542    #[test]
543    fn test_generate_report() {
544        let mut diag = NetworkDiagnostics::new();
545        diag.run_all_tests();
546
547        let report = diag.generate_report();
548        assert!(report.contains("Network Diagnostics Report"));
549        assert!(report.contains("Total Tests:"));
550    }
551
552    #[test]
553    fn test_metrics_recording() {
554        let mut diag = NetworkDiagnostics::new();
555        let metrics = PerformanceMetrics {
556            avg_latency_ms: 50.0,
557            median_latency_ms: 45.0,
558            p95_latency_ms: 100.0,
559            avg_bandwidth_bps: 1_000_000,
560            dht_success_rate: 0.95,
561            avg_dht_query_ms: 200.0,
562            connected_peers: 10,
563            routing_table_size: 50,
564        };
565
566        diag.record_metrics(metrics);
567        assert_eq!(diag.metrics_history().len(), 1);
568
569        let latest = diag.latest_metrics();
570        assert!(latest.is_some());
571        assert_eq!(latest.unwrap().connected_peers, 10);
572    }
573
574    #[test]
575    fn test_troubleshooting_guide() {
576        let advice = TroubleshootingGuide::get_advice("no_peers");
577        assert!(advice.is_some());
578        assert!(advice.unwrap().contains("bootstrap"));
579
580        let topics = TroubleshootingGuide::list_topics();
581        assert!(topics.contains(&"no_peers"));
582        assert!(topics.contains(&"slow_dht"));
583    }
584
585    #[test]
586    fn test_history_size_limit() {
587        let mut diag = NetworkDiagnostics::with_history_size(3);
588
589        diag.run_test(DiagnosticTest::BasicConnectivity);
590        diag.run_test(DiagnosticTest::DhtHealth);
591        diag.run_test(DiagnosticTest::NatTraversal);
592        diag.run_test(DiagnosticTest::PeerDiscovery);
593
594        assert_eq!(diag.results_history().len(), 3);
595    }
596
597    #[test]
598    fn test_severity_levels() {
599        assert_eq!(severity_string(0), "Info");
600        assert_eq!(severity_string(1), "Warning");
601        assert_eq!(severity_string(2), "Error");
602        assert_eq!(severity_string(3), "Critical");
603    }
604}