saorsa_core/
validation.rs

1// Copyright (c) 2025 Saorsa Labs Limited
2
3// This software is dual-licensed under:
4// - GNU Affero General Public License v3.0 or later (AGPL-3.0-or-later)
5// - Commercial License
6//
7// For AGPL-3.0 license, see LICENSE-AGPL-3.0
8// For commercial licensing, contact: david@saorsalabs.com
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under these licenses is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
14// This program is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17// GNU Affero General Public License for more details.
18
19// You should have received a copy of the GNU Affero General Public License
20// along with this program. If not, see <https://www.gnu.org/licenses/>.
21
22//! Comprehensive input validation framework for P2P Foundation
23//!
24//! This module provides a robust validation system for all external inputs,
25//! including network messages, API parameters, file paths, and cryptographic parameters.
26//!
27//! # Features
28//!
29//! - **Type-safe validation traits**: Extensible validation system
30//! - **Rate limiting**: Per-IP and global rate limiting with adaptive throttling
31//! - **Performance optimized**: < 5% overhead for validation operations
32//! - **Security hardened**: Protection against common attack vectors
33//! - **Comprehensive logging**: All validation failures are logged
34//!
35//! # Usage
36//!
37//! ```rust
38//! use p2p_core::validation::{Validate, ValidationContext};
39//!
40//! #[derive(Debug)]
41//! struct NetworkMessage {
42//!     peer_id: String,
43//!     payload: Vec<u8>,
44//! }
45//!
46//! impl Validate for NetworkMessage {
47//!     fn validate(&self, ctx: &ValidationContext) -> Result<()> {
48//!         // Validate peer ID format
49//!         validate_peer_id(&self.peer_id)?;
50//!
51//!         // Validate payload size
52//!         validate_message_size(self.payload.len(), ctx.max_message_size)?;
53//!
54//!         Ok(())
55//!     }
56//! }
57//! ```
58
59use crate::error::{P2PError, P2pResult};
60
61use std::collections::HashMap;
62use std::net::{IpAddr, SocketAddr};
63use std::path::Path;
64use std::sync::Arc;
65use std::time::Duration;
66use thiserror::Error;
67
68// Constants for validation rules
69const MAX_PEER_ID_LENGTH: usize = 64;
70const MIN_PEER_ID_LENGTH: usize = 16;
71const MAX_MESSAGE_SIZE: usize = 16 * 1024 * 1024; // 16MB
72const MAX_PATH_LENGTH: usize = 4096;
73const MAX_KEY_SIZE: usize = 1024 * 1024; // 1MB for DHT keys
74const MAX_VALUE_SIZE: usize = 10 * 1024 * 1024; // 10MB for DHT values
75const MAX_FILE_NAME_LENGTH: usize = 255;
76
77// Rate limiting constants
78const DEFAULT_RATE_LIMIT_WINDOW: Duration = Duration::from_secs(60);
79const DEFAULT_MAX_REQUESTS_PER_WINDOW: u32 = 1000;
80const DEFAULT_BURST_SIZE: u32 = 100;
81
82// Validation functions below operate without panicking and avoid global regexes
83
84/// Validation errors specific to input validation
85#[derive(Debug, Error)]
86pub enum ValidationError {
87    #[error("Invalid peer ID format: {0}")]
88    InvalidPeerId(String),
89
90    #[error("Invalid network address: {0}")]
91    InvalidAddress(String),
92
93    #[error("Message size exceeds limit: {size} > {limit}")]
94    MessageTooLarge { size: usize, limit: usize },
95
96    #[error("Invalid file path: {0}")]
97    InvalidPath(String),
98
99    #[error("Path traversal attempt detected: {0}")]
100    PathTraversal(String),
101
102    #[error("Invalid key size: {size} bytes (max: {max})")]
103    InvalidKeySize { size: usize, max: usize },
104
105    #[error("Invalid value size: {size} bytes (max: {max})")]
106    InvalidValueSize { size: usize, max: usize },
107
108    #[error("Invalid cryptographic parameter: {0}")]
109    InvalidCryptoParam(String),
110
111    #[error("Rate limit exceeded for {identifier}")]
112    RateLimitExceeded { identifier: String },
113
114    #[error("Invalid format: {0}")]
115    InvalidFormat(String),
116
117    #[error("Value out of range: {value} (min: {min}, max: {max})")]
118    OutOfRange { value: i64, min: i64, max: i64 },
119}
120
121impl From<ValidationError> for P2PError {
122    fn from(err: ValidationError) -> Self {
123        P2PError::Validation(err.to_string().into())
124    }
125}
126
127/// Context for validation operations
128#[derive(Debug, Clone)]
129pub struct ValidationContext {
130    pub max_message_size: usize,
131    pub max_key_size: usize,
132    pub max_value_size: usize,
133    pub max_path_length: usize,
134    pub allow_localhost: bool,
135    pub allow_private_ips: bool,
136    pub rate_limiter: Option<Arc<RateLimiter>>,
137}
138
139impl Default for ValidationContext {
140    fn default() -> Self {
141        Self {
142            max_message_size: MAX_MESSAGE_SIZE,
143            max_key_size: MAX_KEY_SIZE,
144            max_value_size: MAX_VALUE_SIZE,
145            max_path_length: MAX_PATH_LENGTH,
146            allow_localhost: false,
147            allow_private_ips: false,
148            rate_limiter: None,
149        }
150    }
151}
152
153impl ValidationContext {
154    /// Create a new validation context with custom settings
155    pub fn new() -> Self {
156        Self::default()
157    }
158
159    /// Enable rate limiting
160    pub fn with_rate_limiting(mut self, limiter: Arc<RateLimiter>) -> Self {
161        self.rate_limiter = Some(limiter);
162        self
163    }
164
165    /// Allow localhost connections
166    pub fn allow_localhost(mut self) -> Self {
167        self.allow_localhost = true;
168        self
169    }
170
171    /// Allow private IP addresses
172    pub fn allow_private_ips(mut self) -> Self {
173        self.allow_private_ips = true;
174        self
175    }
176}
177
178/// Core validation trait
179pub trait Validate {
180    /// Validate the object with the given context
181    fn validate(&self, ctx: &ValidationContext) -> P2pResult<()>;
182}
183
184/// Trait for sanitizing input
185pub trait Sanitize {
186    /// Sanitize the input, returning a cleaned version
187    fn sanitize(&self) -> Self;
188}
189
190// ===== Network Address Validation =====
191
192/// Validate a network address
193pub fn validate_network_address(addr: &SocketAddr, ctx: &ValidationContext) -> P2pResult<()> {
194    let ip = addr.ip();
195
196    // Check for localhost
197    if ip.is_loopback() && !ctx.allow_localhost {
198        return Err(
199            ValidationError::InvalidAddress("Localhost addresses not allowed".to_string()).into(),
200        );
201    }
202
203    // Check for private IPs
204    if is_private_ip(&ip) && !ctx.allow_private_ips {
205        return Err(ValidationError::InvalidAddress(
206            "Private IP addresses not allowed".to_string(),
207        )
208        .into());
209    }
210
211    // Validate port
212    if addr.port() == 0 {
213        return Err(ValidationError::InvalidAddress("Port 0 is not allowed".to_string()).into());
214    }
215
216    Ok(())
217}
218
219/// Check if an IP is private
220fn is_private_ip(ip: &IpAddr) -> bool {
221    match ip {
222        IpAddr::V4(ipv4) => ipv4.is_private(),
223        IpAddr::V6(ipv6) => ipv6.is_unique_local() || ipv6.is_unicast_link_local(),
224    }
225}
226
227// ===== Peer ID Validation =====
228
229/// Validate a peer ID
230pub fn validate_peer_id(peer_id: &str) -> P2pResult<()> {
231    // Simple length and character set validation; constant-time not required here
232    if peer_id.len() < MIN_PEER_ID_LENGTH || peer_id.len() > MAX_PEER_ID_LENGTH {
233        return Err(ValidationError::InvalidPeerId(format!(
234            "Length must be between {} and {} characters",
235            MIN_PEER_ID_LENGTH, MAX_PEER_ID_LENGTH
236        ))
237        .into());
238    }
239
240    if !peer_id
241        .chars()
242        .all(|ch| ch.is_alphanumeric() || ch == '_' || ch == '-')
243    {
244        return Err(ValidationError::InvalidPeerId(
245            "Must contain only alphanumeric characters, hyphens, and underscores".to_string(),
246        )
247        .into());
248    }
249
250    Ok(())
251}
252
253// ===== Message Size Validation =====
254
255/// Validate message size
256pub fn validate_message_size(size: usize, max_size: usize) -> P2pResult<()> {
257    if size > max_size {
258        return Err(ValidationError::MessageTooLarge {
259            size,
260            limit: max_size,
261        }
262        .into());
263    }
264    Ok(())
265}
266
267// ===== File Path Validation =====
268
269/// Validate a file path for security
270pub fn validate_file_path(path: &Path) -> P2pResult<()> {
271    let path_str = path.to_string_lossy();
272
273    // Check path length
274    if path_str.len() > MAX_PATH_LENGTH {
275        return Err(ValidationError::InvalidPath(format!(
276            "Path too long: {} > {}",
277            path_str.len(),
278            MAX_PATH_LENGTH
279        ))
280        .into());
281    }
282
283    // URL decode to catch encoded traversal attempts
284    let decoded = path_str
285        .replace("%2e", ".")
286        .replace("%2f", "/")
287        .replace("%5c", "\\");
288
289    // Check for path traversal attempts (including encoded versions)
290    let traversal_patterns = ["../", "..\\", "..", "..;", "....//", "%2e%2e", "%252e%252e"];
291    for pattern in &traversal_patterns {
292        if path_str.contains(pattern) || decoded.contains(pattern) {
293            return Err(ValidationError::PathTraversal(path_str.to_string()).into());
294        }
295    }
296
297    // Check for null bytes
298    if path_str.contains('\0') {
299        return Err(ValidationError::InvalidPath("Path contains null bytes".to_string()).into());
300    }
301
302    // Check for command injection characters
303    let dangerous_chars = ['|', '&', ';', '$', '`', '\n'];
304    if path_str.chars().any(|c| dangerous_chars.contains(&c)) {
305        return Err(
306            ValidationError::InvalidPath("Path contains dangerous characters".to_string()).into(),
307        );
308    }
309
310    // Validate each component
311    for component in path.components() {
312        if let Some(name) = component.as_os_str().to_str() {
313            if name.len() > MAX_FILE_NAME_LENGTH {
314                return Err(ValidationError::InvalidPath(format!(
315                    "Component '{}' exceeds maximum length",
316                    name
317                ))
318                .into());
319            }
320
321            // Check for invalid characters
322            if name.contains('\0') {
323                return Err(ValidationError::InvalidPath(format!(
324                    "Component '{}' contains invalid characters",
325                    name
326                ))
327                .into());
328            }
329        }
330    }
331
332    Ok(())
333}
334
335// ===== Cryptographic Parameter Validation =====
336
337/// Validate key size for cryptographic operations
338pub fn validate_key_size(size: usize, expected: usize) -> P2pResult<()> {
339    if size != expected {
340        return Err(ValidationError::InvalidCryptoParam(format!(
341            "Invalid key size: expected {} bytes, got {}",
342            expected, size
343        ))
344        .into());
345    }
346    Ok(())
347}
348
349/// Validate nonce size
350pub fn validate_nonce_size(size: usize, expected: usize) -> P2pResult<()> {
351    if size != expected {
352        return Err(ValidationError::InvalidCryptoParam(format!(
353            "Invalid nonce size: expected {} bytes, got {}",
354            expected, size
355        ))
356        .into());
357    }
358    Ok(())
359}
360
361// ===== DHT Key/Value Validation =====
362
363/// Validate DHT key
364pub fn validate_dht_key(key: &[u8], ctx: &ValidationContext) -> P2pResult<()> {
365    if key.is_empty() {
366        return Err(ValidationError::InvalidFormat("DHT key cannot be empty".to_string()).into());
367    }
368
369    if key.len() > ctx.max_key_size {
370        return Err(ValidationError::InvalidKeySize {
371            size: key.len(),
372            max: ctx.max_key_size,
373        }
374        .into());
375    }
376
377    Ok(())
378}
379
380/// Validate DHT value
381pub fn validate_dht_value(value: &[u8], ctx: &ValidationContext) -> P2pResult<()> {
382    if value.len() > ctx.max_value_size {
383        return Err(ValidationError::InvalidValueSize {
384            size: value.len(),
385            max: ctx.max_value_size,
386        }
387        .into());
388    }
389
390    Ok(())
391}
392
393// ===== Rate Limiting =====
394
395/// Rate limiter for preventing abuse (unified engine)
396#[derive(Debug)]
397pub struct RateLimiter {
398    /// Shared token bucket engine for global and per-IP limiting
399    engine: crate::rate_limit::SharedEngine<IpAddr>,
400    /// Configuration
401    #[allow(dead_code)]
402    config: RateLimitConfig,
403}
404
405/// Rate limit configuration
406#[derive(Debug, Clone)]
407pub struct RateLimitConfig {
408    /// Time window for rate limiting
409    pub window: Duration,
410    /// Maximum requests per window
411    pub max_requests: u32,
412    /// Burst size allowed
413    pub burst_size: u32,
414    /// Enable adaptive throttling
415    pub adaptive: bool,
416    /// Cleanup interval for expired entries
417    pub cleanup_interval: Duration,
418}
419
420impl Default for RateLimitConfig {
421    fn default() -> Self {
422        Self {
423            window: DEFAULT_RATE_LIMIT_WINDOW,
424            max_requests: DEFAULT_MAX_REQUESTS_PER_WINDOW,
425            burst_size: DEFAULT_BURST_SIZE,
426            adaptive: true,
427            cleanup_interval: Duration::from_secs(300), // 5 minutes
428        }
429    }
430}
431
432// Deprecated per-module bucket removed; using crate::rate_limit::Engine instead.
433
434impl RateLimiter {
435    /// Create a new rate limiter
436    pub fn new(config: RateLimitConfig) -> Self {
437        let engine_cfg = crate::rate_limit::EngineConfig {
438            window: config.window,
439            max_requests: config.max_requests,
440            burst_size: config.burst_size,
441        };
442        Self {
443            engine: std::sync::Arc::new(crate::rate_limit::Engine::new(engine_cfg)),
444            config,
445        }
446    }
447
448    /// Check if a request from an IP is allowed
449    pub fn check_ip(&self, ip: &IpAddr) -> P2pResult<()> {
450        // Global limit
451        if !self.engine.try_consume_global() {
452            return Err(ValidationError::RateLimitExceeded {
453                identifier: "global".to_string(),
454            }
455            .into());
456        }
457
458        // Per-IP limit
459        if !self.engine.try_consume_key(ip) {
460            return Err(ValidationError::RateLimitExceeded {
461                identifier: ip.to_string(),
462            }
463            .into());
464        }
465
466        Ok(())
467    }
468
469    /// Clean up expired entries
470    pub fn cleanup(&self) {
471        // Not required with the unified engine (buckets age out via window). No-op.
472    }
473}
474
475// ===== Validation Implementations for Common Types =====
476
477/// Network message validation
478#[derive(Debug)]
479pub struct NetworkMessage {
480    pub peer_id: String,
481    pub payload: Vec<u8>,
482    pub timestamp: u64,
483}
484
485impl Validate for NetworkMessage {
486    fn validate(&self, ctx: &ValidationContext) -> P2pResult<()> {
487        // Validate peer ID
488        validate_peer_id(&self.peer_id)?;
489
490        // Validate payload size
491        validate_message_size(self.payload.len(), ctx.max_message_size)?;
492
493        // Validate timestamp (not too far in future)
494        let now = std::time::SystemTime::now()
495            .duration_since(std::time::UNIX_EPOCH)
496            .map_err(|e| P2PError::Internal(format!("System time error: {}", e).into()))?
497            .as_secs();
498
499        if self.timestamp > now + 300 {
500            // 5 minutes tolerance
501            return Err(
502                ValidationError::InvalidFormat("Timestamp too far in future".to_string()).into(),
503            );
504        }
505
506        Ok(())
507    }
508}
509
510/// API request validation
511#[derive(Debug)]
512pub struct ApiRequest {
513    pub method: String,
514    pub path: String,
515    pub params: HashMap<String, String>,
516}
517
518impl Validate for ApiRequest {
519    fn validate(&self, _ctx: &ValidationContext) -> P2pResult<()> {
520        // Validate method
521        match self.method.as_str() {
522            "GET" | "POST" | "PUT" | "DELETE" => {}
523            _ => {
524                return Err(ValidationError::InvalidFormat(format!(
525                    "Invalid HTTP method: {}",
526                    self.method
527                ))
528                .into());
529            }
530        }
531
532        // Validate path
533        if !self.path.starts_with('/') {
534            return Err(
535                ValidationError::InvalidFormat("Path must start with /".to_string()).into(),
536            );
537        }
538
539        if self.path.contains("..") {
540            return Err(ValidationError::PathTraversal(self.path.clone()).into());
541        }
542
543        // Validate parameters
544        for (key, value) in &self.params {
545            if key.is_empty() {
546                return Err(
547                    ValidationError::InvalidFormat("Empty parameter key".to_string()).into(),
548                );
549            }
550
551            // Check for SQL injection patterns
552            let lower_value = value.to_lowercase();
553            let sql_patterns = [
554                "select ", "insert ", "update ", "delete ", "drop ", "union ", "exec ", "--", "/*",
555                "*/", "'", "\"", " or ", " and ", "1=1", "1='1",
556            ];
557
558            for pattern in &sql_patterns {
559                if lower_value.contains(pattern) {
560                    return Err(ValidationError::InvalidFormat(
561                        "Suspicious parameter value: potential SQL injection".to_string(),
562                    )
563                    .into());
564                }
565            }
566
567            // Check for command injection patterns
568            let dangerous_chars = ['|', '&', ';', '$', '`', '\n', '\0'];
569            if value.chars().any(|c| dangerous_chars.contains(&c)) {
570                return Err(ValidationError::InvalidFormat(
571                    "Dangerous characters in parameter value".to_string(),
572                )
573                .into());
574            }
575        }
576
577        Ok(())
578    }
579}
580
581/// Configuration value validation
582pub fn validate_config_value<T>(value: &str, min: Option<T>, max: Option<T>) -> P2pResult<T>
583where
584    T: std::str::FromStr + PartialOrd + std::fmt::Display,
585{
586    let parsed = value
587        .parse::<T>()
588        .map_err(|_| ValidationError::InvalidFormat(format!("Failed to parse value: {}", value)))?;
589
590    if let Some(min_val) = min
591        && parsed < min_val
592    {
593        return Err(ValidationError::InvalidFormat(format!(
594            "Value {} is less than minimum {}",
595            parsed, min_val
596        ))
597        .into());
598    }
599
600    if let Some(max_val) = max
601        && parsed > max_val
602    {
603        return Err(ValidationError::InvalidFormat(format!(
604            "Value {} is greater than maximum {}",
605            parsed, max_val
606        ))
607        .into());
608    }
609
610    Ok(parsed)
611}
612
613/// Sanitize a string for safe usage
614pub fn sanitize_string(input: &str, max_length: usize) -> String {
615    // First remove any HTML tags and dangerous patterns
616    let mut cleaned = input
617        .replace(['<', '>'], "")
618        .replace("script", "")
619        .replace("javascript:", "")
620        .replace("onerror", "")
621        .replace("onload", "")
622        .replace("onclick", "")
623        .replace("alert", "")
624        .replace("iframe", "");
625
626    // Also handle unicode normalization attacks
627    cleaned = cleaned.replace('\u{2060}', ""); // Word joiner
628    cleaned = cleaned.replace('\u{ffa0}', ""); // Halfwidth hangul filler
629    cleaned = cleaned.replace('\u{200b}', ""); // Zero width space
630    cleaned = cleaned.replace('\u{200c}', ""); // Zero width non-joiner
631    cleaned = cleaned.replace('\u{200d}', ""); // Zero width joiner
632
633    // Finally filter to safe characters (no spaces allowed)
634    cleaned
635        .chars()
636        .filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-' || *c == '.')
637        .take(max_length)
638        .collect()
639}
640
641#[cfg(test)]
642mod tests {
643    use super::*;
644
645    #[test]
646    fn test_peer_id_validation() {
647        // Valid peer IDs
648        assert!(validate_peer_id("valid_peer_id_123").is_ok());
649        assert!(validate_peer_id("PEER-ID-WITH-CAPS").is_ok());
650
651        // Invalid peer IDs
652        assert!(validate_peer_id("short").is_err()); // Too short
653        assert!(validate_peer_id(&"x".repeat(100)).is_err()); // Too long
654        assert!(validate_peer_id("invalid peer id").is_err()); // Contains space
655        assert!(validate_peer_id("peer@id").is_err()); // Invalid character
656    }
657
658    #[test]
659    fn test_network_address_validation() {
660        let ctx = ValidationContext::default();
661
662        // Valid addresses
663        let addr: SocketAddr = "8.8.8.8:53".parse().unwrap();
664        assert!(validate_network_address(&addr, &ctx).is_ok());
665
666        // Invalid addresses
667        let localhost: SocketAddr = "127.0.0.1:80".parse().unwrap();
668        assert!(validate_network_address(&localhost, &ctx).is_err());
669
670        // Allow localhost when configured
671        let ctx_localhost = ValidationContext::default().allow_localhost();
672        assert!(validate_network_address(&localhost, &ctx_localhost).is_ok());
673    }
674
675    #[test]
676    fn test_file_path_validation() {
677        // Valid paths
678        assert!(validate_file_path(Path::new("data/file.txt")).is_ok());
679        assert!(validate_file_path(Path::new("/usr/local/bin")).is_ok());
680
681        // Invalid paths
682        assert!(validate_file_path(Path::new("../etc/passwd")).is_err());
683        assert!(validate_file_path(Path::new("file\0name")).is_err());
684    }
685
686    #[test]
687    fn test_rate_limiter() {
688        let config = RateLimitConfig {
689            window: Duration::from_millis(500), // Shorter window for testing
690            max_requests: 10,
691            burst_size: 5,
692            ..Default::default()
693        };
694
695        let limiter = RateLimiter::new(config);
696        let ip: IpAddr = "192.168.1.1".parse().unwrap();
697
698        // Should allow burst
699        for _ in 0..5 {
700            assert!(limiter.check_ip(&ip).is_ok());
701        }
702
703        // Should start rate limiting after burst
704        assert!(limiter.check_ip(&ip).is_err()); // Should be rate limited now
705
706        // After waiting longer than the window, should allow again
707        std::thread::sleep(Duration::from_millis(600));
708        assert!(limiter.check_ip(&ip).is_ok());
709    }
710
711    #[test]
712    fn test_message_validation() {
713        let ctx = ValidationContext::default();
714
715        let valid_msg = NetworkMessage {
716            peer_id: "valid_peer_id_123".to_string(),
717            payload: vec![0u8; 1024],
718            timestamp: std::time::SystemTime::now()
719                .duration_since(std::time::UNIX_EPOCH)
720                .unwrap()
721                .as_secs(),
722        };
723
724        assert!(valid_msg.validate(&ctx).is_ok());
725
726        // Test invalid message
727        let invalid_msg = NetworkMessage {
728            peer_id: "short".to_string(),
729            payload: vec![0u8; 1024],
730            timestamp: 0,
731        };
732
733        assert!(invalid_msg.validate(&ctx).is_err());
734    }
735
736    #[test]
737    fn test_sanitization() {
738        assert_eq!(sanitize_string("hello world!", 20), "helloworld");
739
740        assert_eq!(sanitize_string("test@#$%123", 20), "test123");
741
742        assert_eq!(
743            sanitize_string("very_long_string_that_exceeds_limit", 10),
744            "very_long_"
745        );
746    }
747}