ant_quic/relay/
mod.rs

1// Copyright 2024 Saorsa Labs Ltd.
2//
3// This Saorsa Network Software is licensed under the General Public License (GPL), version 3.
4// Please see the file LICENSE-GPL, or visit <http://www.gnu.org/licenses/> for the full text.
5//
6// Full details available at https://saorsalabs.com/licenses
7
8//! TURN-style Relay Protocol Implementation
9//!
10//! This module implements a TURN-style relay protocol for NAT traversal fallback
11//! when direct peer-to-peer connections cannot be established. The relay system
12//! provides a fallback mechanism to ensure connectivity between peers through
13//! trusted relay servers.
14//!
15//! # Protocol Overview
16//!
17//! The relay protocol uses QUIC extension frames for communication:
18//! - `RELAY_REQUEST` (0x44): Request relay connection establishment
19//! - `RELAY_RESPONSE` (0x45): Response to relay request with status
20//! - `RELAY_DATA` (0x46): Bidirectional data forwarding through relay
21//!
22//! # Security
23//!
24//! All relay operations use Ed25519 cryptographic authentication with
25//! anti-replay protection. Rate limiting prevents abuse and ensures
26//! fair resource allocation among clients.
27
28pub mod authenticator;
29pub mod connection;
30pub mod error;
31pub mod rate_limiter;
32pub mod session_manager;
33pub mod statistics;
34
35pub use authenticator::{AuthToken, RelayAuthenticator};
36pub use connection::{RelayAction, RelayConnection, RelayConnectionConfig, RelayEvent};
37pub use error::{RelayError, RelayResult};
38pub use rate_limiter::{RateLimiter, TokenBucket};
39pub use session_manager::{
40    RelaySessionInfo, SessionConfig, SessionId, SessionManager, SessionManagerStats, SessionState,
41};
42
43use std::time::Duration;
44
45// Export the statistics collector
46pub use statistics::RelayStatisticsCollector;
47
48/// Default relay session timeout (5 minutes)
49pub const DEFAULT_SESSION_TIMEOUT: Duration = Duration::from_secs(300);
50
51/// Default bandwidth limit per session (1 MB/s)
52pub const DEFAULT_BANDWIDTH_LIMIT: u32 = 1_048_576;
53
54/// Maximum number of concurrent relay sessions per client
55pub const MAX_CONCURRENT_SESSIONS: usize = 10;
56
57/// Maximum size of relay data frame payload (64 KB)
58pub const MAX_RELAY_DATA_SIZE: usize = 65536;
59
60/// Rate limiting: tokens per second (100 requests/second)
61pub const RATE_LIMIT_TOKENS_PER_SECOND: u32 = 100;
62
63/// Rate limiting: maximum burst size (500 tokens)
64pub const RATE_LIMIT_BURST_SIZE: u32 = 500;
65
66/// Anti-replay window size for authentication tokens
67pub const ANTI_REPLAY_WINDOW_SIZE: u64 = 1000;
68
69/// Session cleanup interval (check every 30 seconds)
70pub const SESSION_CLEANUP_INTERVAL: Duration = Duration::from_secs(30);
71/// Comprehensive relay statistics combining all relay operations
72#[derive(Debug, Clone, Default)]
73pub struct RelayStatistics {
74    /// Session-related statistics
75    pub session_stats: SessionStatistics,
76
77    /// Connection-related statistics  
78    pub connection_stats: ConnectionStatistics,
79
80    /// Authentication and security statistics
81    pub auth_stats: AuthenticationStatistics,
82
83    /// Rate limiting statistics
84    pub rate_limit_stats: RateLimitingStatistics,
85
86    /// Error and failure statistics
87    pub error_stats: ErrorStatistics,
88}
89
90/// Session management statistics
91#[derive(Debug, Clone, Default)]
92pub struct SessionStatistics {
93    /// Total sessions created since startup
94    pub total_sessions_created: u64,
95
96    /// Currently active sessions
97    pub active_sessions: u32,
98
99    /// Sessions currently in pending state
100    pub pending_sessions: u32,
101
102    /// Sessions terminated normally
103    pub sessions_terminated_normally: u64,
104
105    /// Sessions terminated due to timeout
106    pub sessions_timed_out: u64,
107
108    /// Sessions terminated due to errors
109    pub sessions_terminated_with_errors: u64,
110
111    /// Average session duration (in seconds)
112    pub avg_session_duration: f64,
113
114    /// Total data forwarded across all sessions (bytes)
115    pub total_bytes_forwarded: u64,
116}
117
118/// Connection-level statistics
119#[derive(Debug, Clone, Default)]
120pub struct ConnectionStatistics {
121    /// Total relay connections established
122    pub total_connections: u64,
123
124    /// Currently active connections
125    pub active_connections: u32,
126
127    /// Total bytes sent through all connections
128    pub total_bytes_sent: u64,
129
130    /// Total bytes received through all connections
131    pub total_bytes_received: u64,
132
133    /// Average connection bandwidth usage (bytes/sec)
134    pub avg_bandwidth_usage: f64,
135
136    /// Peak concurrent connections
137    pub peak_concurrent_connections: u32,
138
139    /// Connection timeouts
140    pub connection_timeouts: u64,
141
142    /// Keep-alive packets sent
143    pub keep_alive_sent: u64,
144}
145
146/// Authentication and security statistics
147#[derive(Debug, Clone, Default)]
148pub struct AuthenticationStatistics {
149    /// Total authentication attempts
150    pub total_auth_attempts: u64,
151
152    /// Successful authentications
153    pub successful_auths: u64,
154
155    /// Failed authentications
156    pub failed_auths: u64,
157
158    /// Authentication rate (auths per second)
159    pub auth_rate: f64,
160
161    /// Replay attacks detected and blocked
162    pub replay_attacks_blocked: u64,
163
164    /// Invalid signatures detected
165    pub invalid_signatures: u64,
166
167    /// Unknown peer keys encountered
168    pub unknown_peer_keys: u64,
169}
170
171/// Rate limiting statistics
172#[derive(Debug, Clone, Default)]
173pub struct RateLimitingStatistics {
174    /// Total requests received
175    pub total_requests: u64,
176
177    /// Requests allowed through rate limiter
178    pub requests_allowed: u64,
179
180    /// Requests blocked by rate limiter
181    pub requests_blocked: u64,
182
183    /// Current token bucket levels
184    pub current_tokens: u32,
185
186    /// Rate limiting efficiency (% of requests allowed)
187    pub efficiency_percentage: f64,
188
189    /// Peak request rate (requests per second)
190    pub peak_request_rate: f64,
191}
192
193/// Error and failure statistics
194#[derive(Debug, Clone, Default)]
195pub struct ErrorStatistics {
196    /// Protocol errors encountered
197    pub protocol_errors: u64,
198
199    /// Resource exhaustion events
200    pub resource_exhausted: u64,
201
202    /// Session-related errors
203    pub session_errors: u64,
204
205    /// Authentication failures
206    pub auth_failures: u64,
207
208    /// Network-related errors
209    pub network_errors: u64,
210
211    /// Internal errors
212    pub internal_errors: u64,
213
214    /// Error rate (errors per second)
215    pub error_rate: f64,
216
217    /// Most common error types
218    pub error_breakdown: std::collections::HashMap<String, u64>,
219}
220
221impl RelayStatistics {
222    /// Create new empty relay statistics
223    pub fn new() -> Self {
224        Self::default()
225    }
226
227    /// Calculate overall success rate
228    pub fn success_rate(&self) -> f64 {
229        let total_ops = self.session_stats.total_sessions_created
230            + self.connection_stats.total_connections
231            + self.auth_stats.total_auth_attempts;
232
233        if total_ops == 0 {
234            return 1.0;
235        }
236
237        let total_failures = self.session_stats.sessions_terminated_with_errors
238            + self.connection_stats.connection_timeouts
239            + self.auth_stats.failed_auths
240            + self.error_stats.protocol_errors
241            + self.error_stats.resource_exhausted;
242
243        1.0 - (total_failures as f64 / total_ops as f64)
244    }
245
246    /// Calculate total throughput (bytes per second)
247    pub fn total_throughput(&self) -> f64 {
248        if self.session_stats.avg_session_duration == 0.0 {
249            return 0.0;
250        }
251        self.session_stats.total_bytes_forwarded as f64 / self.session_stats.avg_session_duration
252    }
253
254    /// Check if relay is operating within healthy parameters
255    pub fn is_healthy(&self) -> bool {
256        // Calculate total operations across all subsystems
257        let total_ops = self.session_stats.total_sessions_created
258            + self.connection_stats.total_connections
259            + self.auth_stats.total_auth_attempts
260            + self.rate_limit_stats.total_requests;
261
262        // If no operations have been recorded, consider it healthy (idle state)
263        if total_ops == 0 {
264            return true;
265        }
266
267        // Calculate total errors across all error types
268        let total_errors = self.error_stats.protocol_errors
269            + self.error_stats.resource_exhausted
270            + self.error_stats.session_errors
271            + self.error_stats.auth_failures
272            + self.error_stats.network_errors
273            + self.error_stats.internal_errors;
274
275        // For systems with operations, apply health criteria:
276        // 1. High success rate (>95%)
277        // 2. Error rate check (with special handling for short time periods)
278        // 3. Good rate limiting efficiency if applicable
279
280        let error_rate_ok = if total_errors == 0 {
281            true // No errors is always healthy
282        } else if self.error_stats.error_rate < 1.0 {
283            true // Less than 1 error/sec is healthy
284        } else {
285            // For high error rates, check if we have very few absolute errors
286            // This handles cases where tests run quickly and cause artificially high rates
287            total_errors <= 5 && total_ops >= 100 // Allow up to 5 errors if we have 100+ ops (5% error rate)
288        };
289
290        self.success_rate() > 0.95
291            && error_rate_ok
292            && (self.rate_limit_stats.total_requests == 0
293                || self.rate_limit_stats.efficiency_percentage > 80.0)
294    }
295}