turbomcp_cli/
path_security.rs

1//! Path validation and sanitization to prevent path traversal attacks
2//!
3//! This module provides security-critical functions to validate output paths and
4//! sanitize filenames, preventing malicious servers from writing arbitrary files
5//! via crafted tool names.
6
7use crate::error::{CliError, CliResult};
8use std::path::{Component, Path, PathBuf};
9
10/// Maximum allowed filename length (to stay within filesystem limits)
11const MAX_FILENAME_LENGTH: usize = 255;
12
13/// Reserved filenames that are not allowed (Windows + Unix special cases)
14const RESERVED_FILENAMES: &[&str] = &[
15    ".", "..", "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", "com6", "com7",
16    "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9",
17];
18
19/// Validates an output path to ensure it's within the base directory.
20///
21/// This function prevents path traversal attacks by:
22/// - Rejecting absolute paths
23/// - Rejecting paths with parent directory components (`..`)
24/// - Canonicalizing paths to resolve symlinks
25/// - Verifying the resolved path is within the base directory
26///
27/// # Security
28///
29/// This function is security-critical. It must ALWAYS be called before writing
30/// files based on external input (e.g., tool names from MCP servers).
31///
32/// # Arguments
33///
34/// * `base_dir` - The base directory that all output files must be within
35/// * `requested_path` - The requested path (relative to base_dir)
36///
37/// # Returns
38///
39/// The canonicalized path if valid, or a SecurityViolation error if invalid.
40///
41/// # Examples
42///
43/// ```no_run
44/// # use std::path::Path;
45/// # use turbomcp_cli::path_security::validate_output_path;
46/// let base = Path::new("/tmp/output");
47/// let safe_path = validate_output_path(base, "tool.json")?;
48/// // safe_path is guaranteed to be within /tmp/output
49/// # Ok::<(), turbomcp_cli::error::CliError>(())
50/// ```
51pub fn validate_output_path(base_dir: &Path, requested_path: &str) -> CliResult<PathBuf> {
52    // First, check for obvious path traversal patterns in the raw string
53    // This catches cases that might not be parsed as ParentDir on all platforms
54    if requested_path.contains("..") {
55        return Err(CliError::SecurityViolation {
56            reason: format!("Path traversal detected: '{}'", requested_path),
57            details: "Paths containing '..' are not allowed for security reasons".to_string(),
58        });
59    }
60
61    let requested = PathBuf::from(requested_path);
62
63    // Reject absolute paths
64    if requested.is_absolute() {
65        return Err(CliError::SecurityViolation {
66            reason: format!("Absolute path not allowed: '{}'", requested_path),
67            details: "All output files must use relative paths within the output directory"
68                .to_string(),
69        });
70    }
71
72    // Check for parent directory components (..)
73    // This is redundant with the string check above, but provides defense in depth
74    for component in requested.components() {
75        if matches!(component, Component::ParentDir) {
76            return Err(CliError::SecurityViolation {
77                reason: format!("Path traversal detected: '{}'", requested_path),
78                details: "Paths containing '..' components are not allowed for security reasons"
79                    .to_string(),
80            });
81        }
82    }
83
84    // Build full path
85    let full_path = base_dir.join(&requested);
86
87    // Canonicalize base directory to resolve symlinks
88    let base_canonical = base_dir.canonicalize().map_err(CliError::Io)?;
89
90    // For the full path, we need to handle the case where it doesn't exist yet
91    // If the file exists, canonicalize it directly
92    if full_path.exists() {
93        let canonical = full_path.canonicalize().map_err(CliError::Io)?;
94
95        // Verify it's within base directory
96        if !canonical.starts_with(&base_canonical) {
97            return Err(CliError::SecurityViolation {
98                reason: format!("Path escapes output directory: '{}'", canonical.display()),
99                details: format!(
100                    "Resolved path '{}' is outside base directory '{}'",
101                    canonical.display(),
102                    base_canonical.display()
103                ),
104            });
105        }
106
107        return Ok(canonical);
108    }
109
110    // File doesn't exist - we need to validate it's safe to create
111    // Since we already checked for ".." and absolute paths, the path is safe
112    // However, we need to return a path that's consistent with base_canonical
113    // Build the path relative to the canonical base
114    let relative_to_base =
115        full_path
116            .strip_prefix(base_dir)
117            .map_err(|_| CliError::SecurityViolation {
118                reason: "Internal error: path not relative to base".to_string(),
119                details: "Path validation failed unexpectedly".to_string(),
120            })?;
121
122    Ok(base_canonical.join(relative_to_base))
123}
124
125/// Sanitizes a filename to prevent security issues.
126///
127/// This function:
128/// - Removes or replaces unsafe characters (only allows alphanumeric, `-`, `_`, `.`)
129/// - Rejects reserved filenames (`.`, `..`, Windows device names)
130/// - Enforces maximum length limits
131///
132/// # Security
133///
134/// This function is security-critical. It must ALWAYS be called before using
135/// external input (e.g., tool names) as filenames.
136///
137/// # Arguments
138///
139/// * `name` - The filename to sanitize
140///
141/// # Returns
142///
143/// A sanitized filename if valid, or a SecurityViolation error if the name
144/// cannot be made safe.
145///
146/// # Examples
147///
148/// ```
149/// # use turbomcp_cli::path_security::sanitize_filename;
150/// assert_eq!(sanitize_filename("my_tool")?, "my_tool");
151/// assert_eq!(sanitize_filename("my/tool/../bad")?, "mytool.bad");
152/// # Ok::<(), turbomcp_cli::error::CliError>(())
153/// ```
154pub fn sanitize_filename(name: &str) -> CliResult<String> {
155    if name.is_empty() {
156        return Err(CliError::SecurityViolation {
157            reason: "Empty filename".to_string(),
158            details: "Filename cannot be empty".to_string(),
159        });
160    }
161
162    // Remove or replace unsafe characters
163    // Only allow: alphanumeric, dash, underscore, period
164    let sanitized: String = name
165        .chars()
166        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '_' || *c == '.')
167        .collect();
168
169    if sanitized.is_empty() {
170        return Err(CliError::SecurityViolation {
171            reason: format!("Invalid filename: '{}'", name),
172            details: "Filename must contain at least one alphanumeric character".to_string(),
173        });
174    }
175
176    // Additional check: reject if the sanitized name still contains ".."
177    // This prevents names like "......etcpasswd" which look suspicious
178    if sanitized.contains("..") {
179        return Err(CliError::SecurityViolation {
180            reason: format!("Invalid filename pattern: '{}'", sanitized),
181            details: "Filenames containing '..' patterns are not allowed".to_string(),
182        });
183    }
184
185    // Check length
186    if sanitized.len() > MAX_FILENAME_LENGTH {
187        return Err(CliError::SecurityViolation {
188            reason: format!("Filename too long: {} characters", sanitized.len()),
189            details: format!(
190                "Filename must be at most {} characters",
191                MAX_FILENAME_LENGTH
192            ),
193        });
194    }
195
196    // Check for reserved names (case-insensitive)
197    let lower = sanitized.to_lowercase();
198    if RESERVED_FILENAMES.contains(&lower.as_str()) {
199        return Err(CliError::SecurityViolation {
200            reason: format!("Reserved filename: '{}'", sanitized),
201            details: "This filename is reserved by the operating system".to_string(),
202        });
203    }
204
205    // Also reject if it starts with a period (hidden files can be problematic)
206    if sanitized.starts_with('.') && sanitized.len() <= 2 {
207        return Err(CliError::SecurityViolation {
208            reason: format!("Invalid filename: '{}'", sanitized),
209            details: "Filenames starting with '.' are not allowed".to_string(),
210        });
211    }
212
213    Ok(sanitized)
214}
215
216/// Validates and sanitizes a filename, then constructs a safe output path.
217///
218/// This is a convenience function that combines `sanitize_filename` and
219/// `validate_output_path` with automatic `.json` extension.
220///
221/// # Security
222///
223/// This function performs all necessary security validations.
224///
225/// # Arguments
226///
227/// * `base_dir` - The base directory for output files
228/// * `name` - The filename to sanitize (e.g., tool name)
229/// * `extension` - The file extension to add (e.g., "json")
230///
231/// # Returns
232///
233/// A validated, safe output path.
234pub fn safe_output_path(base_dir: &Path, name: &str, extension: &str) -> CliResult<PathBuf> {
235    let sanitized = sanitize_filename(name)?;
236    let filename = if extension.is_empty() {
237        sanitized
238    } else {
239        format!("{}.{}", sanitized, extension)
240    };
241    validate_output_path(base_dir, &filename)
242}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247    use std::fs;
248    use tempfile::TempDir;
249
250    #[test]
251    fn test_sanitize_valid_filenames() {
252        assert_eq!(sanitize_filename("my_tool").unwrap(), "my_tool");
253        assert_eq!(sanitize_filename("tool-123").unwrap(), "tool-123");
254        assert_eq!(sanitize_filename("tool.v1").unwrap(), "tool.v1");
255        assert_eq!(sanitize_filename("Tool_Name_123").unwrap(), "Tool_Name_123");
256    }
257
258    #[test]
259    fn test_sanitize_removes_unsafe_chars() {
260        // Slashes and other path separators should be removed
261        assert_eq!(sanitize_filename("my/tool").unwrap(), "mytool");
262        assert_eq!(sanitize_filename("my\\tool").unwrap(), "mytool");
263        assert_eq!(sanitize_filename("tool:name").unwrap(), "toolname");
264        assert_eq!(sanitize_filename("tool*name").unwrap(), "toolname");
265    }
266
267    #[test]
268    fn test_sanitize_rejects_reserved_names() {
269        assert!(sanitize_filename(".").is_err());
270        assert!(sanitize_filename("..").is_err());
271        assert!(sanitize_filename("con").is_err());
272        assert!(sanitize_filename("CON").is_err());
273        assert!(sanitize_filename("prn").is_err());
274        assert!(sanitize_filename("aux").is_err());
275        assert!(sanitize_filename("nul").is_err());
276        assert!(sanitize_filename("com1").is_err());
277        assert!(sanitize_filename("lpt1").is_err());
278    }
279
280    #[test]
281    fn test_sanitize_rejects_empty() {
282        assert!(sanitize_filename("").is_err());
283        assert!(sanitize_filename("///").is_err()); // becomes empty after sanitization
284        assert!(sanitize_filename("***").is_err()); // becomes empty after sanitization
285    }
286
287    #[test]
288    fn test_validate_accepts_relative_paths() {
289        let temp_dir = TempDir::new().unwrap();
290        let base = temp_dir.path();
291
292        // Simple filename
293        let result = validate_output_path(base, "tool.json");
294        assert!(result.is_ok());
295
296        // Subdirectory (create it first)
297        fs::create_dir_all(base.join("subdir")).unwrap();
298        let result = validate_output_path(base, "subdir/tool.json");
299        assert!(result.is_ok());
300    }
301
302    #[test]
303    fn test_validate_rejects_absolute_paths() {
304        let temp_dir = TempDir::new().unwrap();
305        let base = temp_dir.path();
306
307        assert!(validate_output_path(base, "/etc/passwd").is_err());
308        assert!(validate_output_path(base, "/tmp/evil").is_err());
309
310        // Windows-style absolute paths
311        #[cfg(windows)]
312        {
313            assert!(validate_output_path(base, "C:\\Windows\\System32").is_err());
314        }
315    }
316
317    #[test]
318    fn test_validate_rejects_parent_directory() {
319        let temp_dir = TempDir::new().unwrap();
320        let base = temp_dir.path();
321
322        assert!(validate_output_path(base, "..").is_err());
323        assert!(validate_output_path(base, "../etc/passwd").is_err());
324        assert!(validate_output_path(base, "../../.ssh/authorized_keys").is_err());
325        assert!(validate_output_path(base, "subdir/../../../etc/passwd").is_err());
326    }
327
328    #[test]
329    fn test_validate_handles_existing_files() {
330        let temp_dir = TempDir::new().unwrap();
331        let base = temp_dir.path();
332
333        // Create a file
334        let test_file = base.join("test.json");
335        fs::write(&test_file, "{}").unwrap();
336
337        // Should validate successfully
338        let result = validate_output_path(base, "test.json");
339        assert!(result.is_ok());
340    }
341
342    #[test]
343    fn test_validate_handles_nonexistent_files() {
344        let temp_dir = TempDir::new().unwrap();
345        let base = temp_dir.path();
346
347        // File doesn't exist yet, but should be valid
348        let result = validate_output_path(base, "new_file.json");
349        assert!(result.is_ok());
350
351        // Subdirectory doesn't exist, but path should be valid
352        let result = validate_output_path(base, "newdir/file.json");
353        assert!(result.is_ok());
354    }
355
356    #[test]
357    fn test_safe_output_path_integration() {
358        let temp_dir = TempDir::new().unwrap();
359        let base = temp_dir.path();
360
361        // Valid tool name
362        let result = safe_output_path(base, "my_tool", "json");
363        assert!(result.is_ok());
364        assert!(result.unwrap().ends_with("my_tool.json"));
365
366        // Malicious tool name with path traversal - should be rejected during sanitization
367        let result = safe_output_path(base, "../../../etc/passwd", "json");
368        assert!(result.is_err(), "Should reject path traversal attempts");
369    }
370
371    #[test]
372    fn test_comprehensive_attack_scenarios() {
373        let temp_dir = TempDir::new().unwrap();
374        let base = temp_dir.path();
375        // Canonicalize base to match what validate_output_path returns
376        let base_canonical = base.canonicalize().unwrap();
377
378        // Collection of real-world path traversal attack patterns
379        let malicious_inputs = vec![
380            "../../../etc/passwd",
381            "../../.ssh/authorized_keys",
382            "../../../.bash_history",
383            "/etc/shadow",
384            "../../../../../../../../etc/passwd",
385            "..\\..\\..\\windows\\system32",
386            "subdir/../../etc/passwd",
387        ];
388
389        for input in malicious_inputs {
390            // Direct validation should fail
391            let result = validate_output_path(base, input);
392            assert!(
393                result.is_err(),
394                "Should reject malicious path directly: {}",
395                input
396            );
397
398            // Sanitization should either:
399            // 1. Fail (reject the malicious input)
400            // 2. Succeed and produce a safe filename within base_dir
401            match sanitize_filename(input) {
402                Ok(sanitized) => {
403                    // If sanitization succeeds, validation must also succeed
404                    // and the result must be within base_dir
405                    let result = validate_output_path(base, &sanitized);
406                    if let Ok(path) = result {
407                        assert!(
408                            path.starts_with(&base_canonical),
409                            "Sanitized path must be within base dir: {} -> {} (base: {})",
410                            input,
411                            path.display(),
412                            base_canonical.display()
413                        );
414                    }
415                }
416                Err(_) => {
417                    // It's OK (and often preferable) for sanitization to fail
418                    // on obviously malicious inputs
419                }
420            }
421        }
422    }
423}