turbomcp_protocol/security/validation.rs
1//! Path validation for security
2//!
3//! This module provides focused path validation utilities to prevent common
4//! security vulnerabilities like path traversal attacks. It follows the principle
5//! of doing one thing well rather than trying to cover every possible security scenario.
6
7use crate::Result;
8use percent_encoding::percent_decode_str;
9use std::path::{Path, PathBuf};
10use tracing::{debug, warn};
11
12/// Decode URL-encoded patterns using the battle-tested percent-encoding crate.
13/// Handles single and double encoding (e.g., %252e -> %2e -> .).
14///
15/// Uses RFC 3986 compliant decoding which properly handles all edge cases
16/// including triple-encoding and unusual character sequences.
17fn decode_url_encoded(s: &str) -> String {
18 // First pass: decode once
19 let first_pass = percent_decode_str(s).decode_utf8_lossy().to_string();
20
21 // Second pass: decode again to catch double-encoded patterns like %252e -> %2e -> .
22 percent_decode_str(&first_pass)
23 .decode_utf8_lossy()
24 .to_string()
25}
26
27/// Check for path traversal patterns including Unicode lookalikes
28/// v2.3.6: Added for enhanced path traversal detection
29fn contains_traversal_pattern(s: &str) -> bool {
30 // Standard traversal
31 if s.contains("..") {
32 return true;
33 }
34 // Unicode lookalikes for dots (fullwidth, ideographic)
35 if s.contains("..") || s.contains("。。") {
36 return true;
37 }
38 // Backslash variants
39 if s.contains("..\\") || s.contains("\\..") {
40 return true;
41 }
42 false
43}
44
45/// Validates a path for basic security constraints
46///
47/// This function performs essential security checks:
48/// - Canonicalizes the path to resolve symlinks and relative components
49/// - Prevents path traversal attacks by checking for ".." patterns
50/// - Validates that the path is within reasonable bounds
51///
52/// # Examples
53///
54/// ```rust,no_run
55/// use turbomcp_protocol::security::validate_path;
56///
57/// // Safe path
58/// let safe_path = validate_path("/home/user/data.txt")?;
59///
60/// // Path traversal attempt - will fail
61/// let result = validate_path("/home/user/../../../etc/passwd");
62/// assert!(result.is_err());
63/// # Ok::<(), Box<dyn std::error::Error>>(())
64/// ```
65pub fn validate_path<P: AsRef<Path>>(path: P) -> Result<PathBuf> {
66 let path = path.as_ref();
67 debug!("Validating path: {:?}", path);
68
69 // Check for obvious path traversal patterns before filesystem operations
70 // v2.3.6: Enhanced with URL-encoded and Unicode detection
71 let path_str = path.to_string_lossy();
72
73 // Check for null bytes (can be used to truncate paths)
74 if path_str.contains('\0') || path_str.contains("%00") {
75 return Err(crate::Error::security(format!(
76 "Null byte in path detected: {:?}",
77 path
78 )));
79 }
80
81 // Decode URL-encoded patterns for detection
82 let decoded = decode_url_encoded(&path_str);
83
84 // Check both original and decoded for traversal patterns
85 if contains_traversal_pattern(&path_str) || contains_traversal_pattern(&decoded) {
86 return Err(crate::Error::security(format!(
87 "Path traversal pattern detected: {:?}",
88 path
89 )));
90 }
91
92 // Canonicalize the path to resolve symlinks and relative components
93 let canonical_path = match path.canonicalize() {
94 Ok(p) => p,
95 Err(e) => {
96 warn!("Failed to canonicalize path {:?}: {}", path, e);
97 return Err(crate::Error::security(format!(
98 "Invalid path or access denied: {:?}",
99 path
100 )));
101 }
102 };
103
104 // Basic sanity check on path depth to prevent excessive nesting
105 let depth = canonical_path.components().count();
106 if depth > 20 {
107 // Reasonable limit for most use cases
108 return Err(crate::Error::security(format!(
109 "Path depth too deep ({}): {:?}",
110 depth, canonical_path
111 )));
112 }
113
114 debug!("Path validation successful: {:?}", canonical_path);
115 Ok(canonical_path)
116}
117
118/// Validates a path and enforces it's within a base directory
119///
120/// This is useful for ensuring file operations stay within allowed boundaries.
121///
122/// # Examples
123///
124/// ```rust,no_run
125/// use turbomcp_protocol::security::validate_path_within;
126///
127/// let base = "/home/user/workspace";
128/// let file_path = validate_path_within("/home/user/workspace/project/file.txt", base)?;
129/// # Ok::<(), Box<dyn std::error::Error>>(())
130/// ```
131pub fn validate_path_within<P: AsRef<Path>, B: AsRef<Path>>(path: P, base: B) -> Result<PathBuf> {
132 let validated_path = validate_path(path)?;
133 let base_path = base
134 .as_ref()
135 .canonicalize()
136 .map_err(|e| crate::Error::security(format!("Invalid base path: {}", e)))?;
137
138 if !validated_path.starts_with(&base_path) {
139 return Err(crate::Error::security(format!(
140 "Path outside allowed directory: {:?} not within {:?}",
141 validated_path, base_path
142 )));
143 }
144
145 Ok(validated_path)
146}
147
148/// Checks if a file extension is allowed
149///
150/// Simple utility for validating file extensions against an allow list.
151pub fn validate_file_extension<P: AsRef<Path>>(path: P, allowed_extensions: &[&str]) -> Result<()> {
152 let path = path.as_ref();
153
154 match path.extension().and_then(|ext| ext.to_str()) {
155 Some(ext) => {
156 if allowed_extensions.contains(&ext) {
157 Ok(())
158 } else {
159 Err(crate::Error::security(format!(
160 "File extension '{}' not allowed",
161 ext
162 )))
163 }
164 }
165 None => {
166 if allowed_extensions.is_empty() {
167 Ok(()) // No extension required
168 } else {
169 Err(crate::Error::security(
170 "File must have an extension".to_string(),
171 ))
172 }
173 }
174 }
175}