1use std::{
7 collections::HashMap,
8 net::SocketAddr,
9 time::Duration,
10};
11
12use serde::{Deserialize, Serialize};
13use tracing::{debug, error, info, warn, Span};
14
15use crate::{
16 monitoring::{ErrorCategory, NatType},
17 nat_traversal_api::PeerId,
18 VarInt,
19};
20
21pub struct NatTraversalLogger {
23 config: LoggingConfig,
25 context: LoggingContext,
27}
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct LoggingConfig {
32 pub enable_frame_debug: bool,
34 pub enable_performance_logging: bool,
36 pub enable_failure_analysis: bool,
38 pub phase_log_levels: HashMap<String, String>,
40 pub include_sensitive_info: bool,
42 pub max_message_size: usize,
44}
45
46impl Default for LoggingConfig {
47 fn default() -> Self {
48 Self {
49 enable_frame_debug: true,
50 enable_performance_logging: true,
51 enable_failure_analysis: true,
52 phase_log_levels: HashMap::from([
53 ("discovery".to_string(), "info".to_string()),
54 ("coordination".to_string(), "info".to_string()),
55 ("hole_punching".to_string(), "debug".to_string()),
56 ("validation".to_string(), "info".to_string()),
57 ]),
58 include_sensitive_info: false,
59 max_message_size: 4096,
60 }
61 }
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct LoggingContext {
67 pub session_id: String,
69 pub peer_id: Option<PeerId>,
71 pub endpoint_role: String,
73 pub bootstrap_node: Option<SocketAddr>,
75 pub current_phase: NatTraversalPhase,
77 pub context_fields: HashMap<String, String>,
79}
80
81#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
83pub enum NatTraversalPhase {
84 Initialization,
85 CandidateDiscovery,
86 BootstrapCoordination,
87 HolePunching,
88 PathValidation,
89 ConnectionEstablishment,
90 Completed,
91 Failed,
92}
93
94impl std::fmt::Display for NatTraversalPhase {
95 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96 match self {
97 NatTraversalPhase::Initialization => write!(f, "initialization"),
98 NatTraversalPhase::CandidateDiscovery => write!(f, "candidate_discovery"),
99 NatTraversalPhase::BootstrapCoordination => write!(f, "bootstrap_coordination"),
100 NatTraversalPhase::HolePunching => write!(f, "hole_punching"),
101 NatTraversalPhase::PathValidation => write!(f, "path_validation"),
102 NatTraversalPhase::ConnectionEstablishment => write!(f, "connection_establishment"),
103 NatTraversalPhase::Completed => write!(f, "completed"),
104 NatTraversalPhase::Failed => write!(f, "failed"),
105 }
106 }
107}
108
109impl NatTraversalLogger {
110 pub fn new(config: LoggingConfig) -> Self {
112 Self {
113 config,
114 context: LoggingContext {
115 session_id: uuid::Uuid::new_v4().to_string(),
116 peer_id: None,
117 endpoint_role: "unknown".to_string(),
118 bootstrap_node: None,
119 current_phase: NatTraversalPhase::Initialization,
120 context_fields: HashMap::new(),
121 },
122 }
123 }
124
125 pub fn set_context(&mut self, context: LoggingContext) {
127 self.context = context;
128 }
129
130 pub fn set_phase(&mut self, phase: NatTraversalPhase) {
132 self.context.current_phase = phase;
133 }
134
135 pub fn add_context(&mut self, key: String, value: String) {
137 self.context.context_fields.insert(key, value);
138 }
139
140 pub fn log_phase_transition(&self, from_phase: NatTraversalPhase, to_phase: NatTraversalPhase, duration: Option<Duration>) {
142 let span = tracing::info_span!(
143 "nat_traversal_phase_transition",
144 session_id = %self.context.session_id,
145 peer_id = ?self.context.peer_id,
146 from_phase = %from_phase,
147 to_phase = %to_phase,
148 duration_ms = duration.map(|d| d.as_millis()),
149 );
150
151 let _enter = span.enter();
152 info!(
153 "NAT traversal phase transition: {} -> {} (duration: {:?})",
154 from_phase, to_phase, duration
155 );
156 }
157
158 pub fn log_candidate_discovery_start(&self, bootstrap_nodes: &[SocketAddr], nat_type: Option<NatType>) {
160 let span = tracing::info_span!(
161 "candidate_discovery_start",
162 session_id = %self.context.session_id,
163 peer_id = ?self.context.peer_id,
164 bootstrap_count = bootstrap_nodes.len(),
165 nat_type = ?nat_type,
166 );
167
168 let _enter = span.enter();
169 info!(
170 "Starting candidate discovery with {} bootstrap nodes (NAT type: {:?})",
171 bootstrap_nodes.len(), nat_type
172 );
173
174 for (i, node) in bootstrap_nodes.iter().enumerate() {
175 debug!("Bootstrap node {}: {}", i + 1, self.sanitize_address(*node));
176 }
177 }
178
179 pub fn log_candidate_discovered(&self, candidate: &CandidateInfo) {
181 let span = tracing::debug_span!(
182 "candidate_discovered",
183 session_id = %self.context.session_id,
184 peer_id = ?self.context.peer_id,
185 candidate_type = %candidate.candidate_type,
186 priority = candidate.priority,
187 source = %candidate.source,
188 );
189
190 let _enter = span.enter();
191 debug!(
192 "Discovered candidate: {} (type: {}, priority: {}, source: {})",
193 self.sanitize_address(candidate.address),
194 candidate.candidate_type,
195 candidate.priority,
196 candidate.source
197 );
198 }
199
200 pub fn log_coordination_request(&self, coordinator: SocketAddr, round_id: VarInt, candidates: &[CandidateInfo]) {
202 let span = tracing::info_span!(
203 "coordination_request",
204 session_id = %self.context.session_id,
205 peer_id = ?self.context.peer_id,
206 coordinator = %self.sanitize_address(coordinator),
207 round_id = %round_id,
208 candidate_count = candidates.len(),
209 );
210
211 let _enter = span.enter();
212 info!(
213 "Sending coordination request to {} (round: {}, candidates: {})",
214 self.sanitize_address(coordinator), round_id, candidates.len()
215 );
216
217 if self.config.enable_frame_debug {
218 for candidate in candidates {
219 debug!(
220 "Candidate in request: {} (priority: {})",
221 self.sanitize_address(candidate.address), candidate.priority
222 );
223 }
224 }
225 }
226
227 pub fn log_coordination_response(&self, coordinator: SocketAddr, success: bool, peer_candidates: Option<&[CandidateInfo]>) {
229 let span = tracing::info_span!(
230 "coordination_response",
231 session_id = %self.context.session_id,
232 peer_id = ?self.context.peer_id,
233 coordinator = %self.sanitize_address(coordinator),
234 success = success,
235 peer_candidate_count = peer_candidates.map(|c| c.len()),
236 );
237
238 let _enter = span.enter();
239 if success {
240 info!(
241 "Received coordination response from {} (peer candidates: {})",
242 self.sanitize_address(coordinator),
243 peer_candidates.map(|c| c.len()).unwrap_or(0)
244 );
245
246 if let Some(candidates) = peer_candidates {
247 if self.config.enable_frame_debug {
248 for candidate in candidates {
249 debug!(
250 "Peer candidate: {} (priority: {})",
251 self.sanitize_address(candidate.address), candidate.priority
252 );
253 }
254 }
255 }
256 } else {
257 warn!(
258 "Coordination request to {} failed",
259 self.sanitize_address(coordinator)
260 );
261 }
262 }
263
264 pub fn log_hole_punching_start(&self, target_addresses: &[SocketAddr], strategy: &str) {
266 let span = tracing::info_span!(
267 "hole_punching_start",
268 session_id = %self.context.session_id,
269 peer_id = ?self.context.peer_id,
270 target_count = target_addresses.len(),
271 strategy = strategy,
272 );
273
274 let _enter = span.enter();
275 info!(
276 "Starting hole punching to {} targets (strategy: {})",
277 target_addresses.len(), strategy
278 );
279
280 for (i, addr) in target_addresses.iter().enumerate() {
281 debug!("Hole punch target {}: {}", i + 1, self.sanitize_address(*addr));
282 }
283 }
284
285 pub fn log_hole_punch_attempt(&self, target: SocketAddr, attempt_number: u32, packet_size: usize) {
287 if self.config.enable_frame_debug {
288 let span = tracing::debug_span!(
289 "hole_punch_attempt",
290 session_id = %self.context.session_id,
291 peer_id = ?self.context.peer_id,
292 target = %self.sanitize_address(target),
293 attempt = attempt_number,
294 packet_size = packet_size,
295 );
296
297 let _enter = span.enter();
298 debug!(
299 "Hole punch attempt {} to {} (packet size: {} bytes)",
300 attempt_number, self.sanitize_address(target), packet_size
301 );
302 }
303 }
304
305 pub fn log_hole_punch_result(&self, target: SocketAddr, success: bool, response_time: Option<Duration>) {
307 let span = tracing::info_span!(
308 "hole_punch_result",
309 session_id = %self.context.session_id,
310 peer_id = ?self.context.peer_id,
311 target = %self.sanitize_address(target),
312 success = success,
313 response_time_ms = response_time.map(|d| d.as_millis()),
314 );
315
316 let _enter = span.enter();
317 if success {
318 info!(
319 "Hole punch to {} succeeded (response time: {:?})",
320 self.sanitize_address(target), response_time
321 );
322 } else {
323 warn!(
324 "Hole punch to {} failed",
325 self.sanitize_address(target)
326 );
327 }
328 }
329
330 pub fn log_path_validation_start(&self, paths: &[SocketAddr]) {
332 let span = tracing::info_span!(
333 "path_validation_start",
334 session_id = %self.context.session_id,
335 peer_id = ?self.context.peer_id,
336 path_count = paths.len(),
337 );
338
339 let _enter = span.enter();
340 info!("Starting path validation for {} paths", paths.len());
341
342 for (i, path) in paths.iter().enumerate() {
343 debug!("Validating path {}: {}", i + 1, self.sanitize_address(*path));
344 }
345 }
346
347 pub fn log_path_validation_result(&self, path: SocketAddr, success: bool, rtt: Option<Duration>, error: Option<&str>) {
349 let span = tracing::info_span!(
350 "path_validation_result",
351 session_id = %self.context.session_id,
352 peer_id = ?self.context.peer_id,
353 path = %self.sanitize_address(path),
354 success = success,
355 rtt_ms = rtt.map(|d| d.as_millis()),
356 error = error,
357 );
358
359 let _enter = span.enter();
360 if success {
361 info!(
362 "Path validation to {} succeeded (RTT: {:?})",
363 self.sanitize_address(path), rtt
364 );
365 } else {
366 warn!(
367 "Path validation to {} failed: {}",
368 self.sanitize_address(path),
369 error.unwrap_or("unknown error")
370 );
371 }
372 }
373
374 pub fn log_connection_established(&self, remote_address: SocketAddr, total_time: Duration, method: &str) {
376 let span = tracing::info_span!(
377 "connection_established",
378 session_id = %self.context.session_id,
379 peer_id = ?self.context.peer_id,
380 remote_address = %self.sanitize_address(remote_address),
381 total_time_ms = total_time.as_millis(),
382 method = method,
383 );
384
385 let _enter = span.enter();
386 info!(
387 "Connection established to {} via {} (total time: {:?})",
388 self.sanitize_address(remote_address), method, total_time
389 );
390 }
391
392 pub fn log_traversal_failure(&self, error_category: ErrorCategory, error_message: &str, total_time: Duration, attempts: u32) {
394 let span = tracing::error_span!(
395 "nat_traversal_failure",
396 session_id = %self.context.session_id,
397 peer_id = ?self.context.peer_id,
398 error_category = ?error_category,
399 total_time_ms = total_time.as_millis(),
400 attempts = attempts,
401 );
402
403 let _enter = span.enter();
404 error!(
405 "NAT traversal failed: {} (category: {:?}, time: {:?}, attempts: {})",
406 error_message, error_category, total_time, attempts
407 );
408 }
409
410 pub fn log_frame_transmission(&self, frame_type: &str, destination: SocketAddr, frame_size: usize, sequence: Option<u64>) {
412 if self.config.enable_frame_debug {
413 let span = tracing::debug_span!(
414 "frame_transmission",
415 session_id = %self.context.session_id,
416 peer_id = ?self.context.peer_id,
417 frame_type = frame_type,
418 destination = %self.sanitize_address(destination),
419 frame_size = frame_size,
420 sequence = sequence,
421 );
422
423 let _enter = span.enter();
424 debug!(
425 "Transmitting {} frame to {} (size: {} bytes, seq: {:?})",
426 frame_type, self.sanitize_address(destination), frame_size, sequence
427 );
428 }
429 }
430
431 pub fn log_frame_reception(&self, frame_type: &str, source: SocketAddr, frame_size: usize, sequence: Option<u64>) {
433 if self.config.enable_frame_debug {
434 let span = tracing::debug_span!(
435 "frame_reception",
436 session_id = %self.context.session_id,
437 peer_id = ?self.context.peer_id,
438 frame_type = frame_type,
439 source = %self.sanitize_address(source),
440 frame_size = frame_size,
441 sequence = sequence,
442 );
443
444 let _enter = span.enter();
445 debug!(
446 "Received {} frame from {} (size: {} bytes, seq: {:?})",
447 frame_type, self.sanitize_address(source), frame_size, sequence
448 );
449 }
450 }
451
452 pub fn log_performance_metrics(&self, metrics: &PerformanceMetrics) {
454 if self.config.enable_performance_logging {
455 let span = tracing::info_span!(
456 "performance_metrics",
457 session_id = %self.context.session_id,
458 peer_id = ?self.context.peer_id,
459 connection_time_ms = metrics.connection_time_ms,
460 first_candidate_time_ms = metrics.first_candidate_time_ms,
461 candidates_tried = metrics.candidates_tried,
462 round_trips = metrics.round_trips,
463 setup_bytes = metrics.setup_bytes,
464 );
465
466 let _enter = span.enter();
467 info!(
468 "Performance metrics - Connection: {}ms, First candidate: {}ms, Candidates tried: {}, Round trips: {}, Setup bytes: {}",
469 metrics.connection_time_ms,
470 metrics.first_candidate_time_ms,
471 metrics.candidates_tried,
472 metrics.round_trips,
473 metrics.setup_bytes
474 );
475 }
476 }
477
478 pub fn log_failure_diagnostics(&self, diagnostics: &FailureDiagnostics) {
480 if self.config.enable_failure_analysis {
481 let span = tracing::warn_span!(
482 "failure_diagnostics",
483 session_id = %self.context.session_id,
484 peer_id = ?self.context.peer_id,
485 failure_stage = %diagnostics.failure_stage,
486 primary_cause = %diagnostics.primary_cause,
487 confidence = diagnostics.confidence,
488 );
489
490 let _enter = span.enter();
491 warn!(
492 "Failure diagnostics - Stage: {}, Cause: {}, Confidence: {:.2}",
493 diagnostics.failure_stage, diagnostics.primary_cause, diagnostics.confidence
494 );
495
496 for (i, factor) in diagnostics.contributing_factors.iter().enumerate() {
497 debug!("Contributing factor {}: {}", i + 1, factor);
498 }
499
500 for (i, suggestion) in diagnostics.recovery_suggestions.iter().enumerate() {
501 info!("Recovery suggestion {}: {}", i + 1, suggestion);
502 }
503 }
504 }
505
506 pub fn create_traversal_span(&self, operation: &str) -> Span {
508 tracing::info_span!(
509 "nat_traversal_operation",
510 session_id = %self.context.session_id,
511 peer_id = ?self.context.peer_id,
512 operation = operation,
513 phase = %self.context.current_phase,
514 endpoint_role = %self.context.endpoint_role,
515 )
516 }
517
518 fn sanitize_address(&self, addr: SocketAddr) -> String {
520 if self.config.include_sensitive_info {
521 addr.to_string()
522 } else {
523 let ip_hash = self.hash_ip(addr.ip());
525 format!("{}:{}", ip_hash, addr.port())
526 }
527 }
528
529 fn hash_ip(&self, ip: std::net::IpAddr) -> String {
531 use std::collections::hash_map::DefaultHasher;
532 use std::hash::{Hash, Hasher};
533
534 let mut hasher = DefaultHasher::new();
535 ip.hash(&mut hasher);
536 format!("ip_{:x}", hasher.finish() & 0xFFFF) }
538}
539
540#[derive(Debug, Clone)]
542pub struct CandidateInfo {
543 pub address: SocketAddr,
544 pub candidate_type: String,
545 pub priority: u32,
546 pub source: String,
547}
548
549#[derive(Debug, Clone)]
551pub struct PerformanceMetrics {
552 pub connection_time_ms: u64,
553 pub first_candidate_time_ms: u64,
554 pub candidates_tried: u32,
555 pub round_trips: u32,
556 pub setup_bytes: u64,
557}
558
559#[derive(Debug, Clone)]
561pub struct FailureDiagnostics {
562 pub failure_stage: String,
563 pub primary_cause: String,
564 pub confidence: f64,
565 pub contributing_factors: Vec<String>,
566 pub recovery_suggestions: Vec<String>,
567}
568
569pub struct TroubleshootingGuide;
571
572impl TroubleshootingGuide {
573 pub fn generate_guide(error_category: ErrorCategory, failure_stage: &str, context: &HashMap<String, String>) -> String {
575 let mut guide = String::new();
576
577 guide.push_str(&format!("# Troubleshooting Guide: {:?} at {}\n\n", error_category, failure_stage));
578
579 match error_category {
580 ErrorCategory::NetworkConnectivity => {
581 guide.push_str("## Network Connectivity Issues\n\n");
582 guide.push_str("### Common Causes:\n");
583 guide.push_str("- Bootstrap nodes are unreachable\n");
584 guide.push_str("- Firewall blocking UDP traffic\n");
585 guide.push_str("- Network interface down\n");
586 guide.push_str("- DNS resolution failures\n\n");
587
588 guide.push_str("### Diagnostic Steps:\n");
589 guide.push_str("1. Check network interface status: `ip addr show`\n");
590 guide.push_str("2. Test bootstrap node connectivity: `nc -u <bootstrap_ip> <port>`\n");
591 guide.push_str("3. Check firewall rules: `iptables -L` or `ufw status`\n");
592 guide.push_str("4. Verify DNS resolution: `nslookup <bootstrap_hostname>`\n\n");
593
594 guide.push_str("### Recovery Actions:\n");
595 guide.push_str("- Restart network interface\n");
596 guide.push_str("- Update firewall rules to allow UDP traffic\n");
597 guide.push_str("- Try alternative bootstrap nodes\n");
598 guide.push_str("- Check network configuration\n");
599 }
600
601 ErrorCategory::NatTraversal => {
602 guide.push_str("## NAT Traversal Issues\n\n");
603 guide.push_str("### Common Causes:\n");
604 guide.push_str("- Symmetric NAT preventing hole punching\n");
605 guide.push_str("- Aggressive NAT timeout settings\n");
606 guide.push_str("- Carrier-grade NAT (CGNAT)\n");
607 guide.push_str("- Coordination timing issues\n\n");
608
609 guide.push_str("### Diagnostic Steps:\n");
610 guide.push_str("1. Detect NAT type using STUN\n");
611 guide.push_str("2. Check NAT timeout behavior\n");
612 guide.push_str("3. Verify bootstrap node coordination\n");
613 guide.push_str("4. Test with different candidate pairs\n\n");
614
615 guide.push_str("### Recovery Actions:\n");
616 guide.push_str("- Enable relay fallback\n");
617 guide.push_str("- Adjust coordination timing\n");
618 guide.push_str("- Try different bootstrap nodes\n");
619 guide.push_str("- Consider TURN relay servers\n");
620 }
621
622 ErrorCategory::Timeout => {
623 guide.push_str("## Timeout Issues\n\n");
624 guide.push_str("### Common Causes:\n");
625 guide.push_str("- Network latency higher than expected\n");
626 guide.push_str("- Aggressive timeout configuration\n");
627 guide.push_str("- Bootstrap node overload\n");
628 guide.push_str("- Packet loss causing retransmissions\n\n");
629
630 guide.push_str("### Diagnostic Steps:\n");
631 guide.push_str("1. Measure network latency: `ping <bootstrap_node>`\n");
632 guide.push_str("2. Check packet loss: `mtr <bootstrap_node>`\n");
633 guide.push_str("3. Monitor bootstrap node response times\n");
634 guide.push_str("4. Review timeout configuration\n\n");
635
636 guide.push_str("### Recovery Actions:\n");
637 guide.push_str("- Increase timeout values\n");
638 guide.push_str("- Implement adaptive timeouts\n");
639 guide.push_str("- Use multiple bootstrap nodes\n");
640 guide.push_str("- Optimize network path\n");
641 }
642
643 _ => {
644 guide.push_str("## General Troubleshooting\n\n");
645 guide.push_str("### Basic Diagnostic Steps:\n");
646 guide.push_str("1. Check system logs for errors\n");
647 guide.push_str("2. Verify configuration settings\n");
648 guide.push_str("3. Test with minimal configuration\n");
649 guide.push_str("4. Enable debug logging\n\n");
650 }
651 }
652
653 if !context.is_empty() {
655 guide.push_str("## Context Information:\n");
656 for (key, value) in context {
657 guide.push_str(&format!("- {}: {}\n", key, value));
658 }
659 guide.push('\n');
660 }
661
662 guide.push_str("## Additional Resources:\n");
663 guide.push_str("- NAT Traversal RFC: https://tools.ietf.org/html/rfc5389\n");
664 guide.push_str("- QUIC NAT Traversal Draft: https://datatracker.ietf.org/doc/draft-seemann-quic-nat-traversal/\n");
665 guide.push_str("- Project Documentation: https://github.com/your-org/ant-quic\n");
666
667 guide
668 }
669}
670
671#[cfg(test)]
672mod tests {
673 use super::*;
674
675 #[test]
676 fn test_phase_display() {
677 assert_eq!(NatTraversalPhase::CandidateDiscovery.to_string(), "candidate_discovery");
678 assert_eq!(NatTraversalPhase::HolePunching.to_string(), "hole_punching");
679 }
680
681 #[test]
682 fn test_troubleshooting_guide_generation() {
683 let context = HashMap::from([
684 ("nat_type".to_string(), "Symmetric".to_string()),
685 ("bootstrap_nodes".to_string(), "2".to_string()),
686 ]);
687
688 let guide = TroubleshootingGuide::generate_guide(
689 ErrorCategory::NatTraversal,
690 "hole_punching",
691 &context
692 );
693
694 assert!(guide.contains("NAT Traversal Issues"));
695 assert!(guide.contains("Symmetric NAT"));
696 assert!(guide.contains("nat_type: Symmetric"));
697 }
698
699 #[test]
700 fn test_address_sanitization() {
701 let config = LoggingConfig {
702 include_sensitive_info: false,
703 ..LoggingConfig::default()
704 };
705
706 let logger = NatTraversalLogger::new(config);
707 let addr: SocketAddr = "192.168.1.100:9000".parse().unwrap();
708 let sanitized = logger.sanitize_address(addr);
709
710 assert!(!sanitized.contains("192.168.1.100"));
712 assert!(sanitized.contains("9000"));
714 assert!(sanitized.starts_with("ip_"));
716 }
717}