turbomcp_cli/
path_security.rs

1//! Path validation and sanitization to prevent path traversal attacks
2//!
3//! This module provides security-critical functions to validate output paths and
4//! sanitize filenames, preventing malicious servers from writing arbitrary files
5//! via crafted tool names.
6
7use crate::error::{CliError, CliResult};
8use std::path::{Component, Path, PathBuf};
9
10/// Maximum allowed filename length (to stay within filesystem limits)
11const MAX_FILENAME_LENGTH: usize = 255;
12
13/// Reserved filenames that are not allowed (Windows + Unix special cases)
14const RESERVED_FILENAMES: &[&str] = &[
15    ".", "..", "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", "com6", "com7",
16    "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9",
17];
18
19/// Validates an output path to ensure it's within the base directory.
20///
21/// This function prevents path traversal attacks by:
22/// - Rejecting absolute paths
23/// - Rejecting paths with parent directory components (`..`)
24/// - Canonicalizing paths to resolve symlinks
25/// - Verifying the resolved path is within the base directory
26///
27/// # Security
28///
29/// This function is security-critical. It must ALWAYS be called before writing
30/// files based on external input (e.g., tool names from MCP servers).
31///
32/// # Arguments
33///
34/// * `base_dir` - The base directory that all output files must be within
35/// * `requested_path` - The requested path (relative to base_dir)
36///
37/// # Returns
38///
39/// The canonicalized path if valid, or a SecurityViolation error if invalid.
40///
41/// # Examples
42///
43/// ```no_run
44/// # use std::path::Path;
45/// # use turbomcp_cli::path_security::validate_output_path;
46/// let base = Path::new("/tmp/output");
47/// let safe_path = validate_output_path(base, "tool.json")?;
48/// // safe_path is guaranteed to be within /tmp/output
49/// # Ok::<(), turbomcp_cli::error::CliError>(())
50/// ```
51pub fn validate_output_path(base_dir: &Path, requested_path: &str) -> CliResult<PathBuf> {
52    // First, check for obvious path traversal patterns in the raw string
53    // This catches cases that might not be parsed as ParentDir on all platforms
54    if requested_path.contains("..") {
55        return Err(CliError::SecurityViolation {
56            reason: format!("Path traversal detected: '{}'", requested_path),
57            details: "Paths containing '..' are not allowed for security reasons".to_string(),
58        });
59    }
60
61    let requested = PathBuf::from(requested_path);
62
63    // Reject absolute paths
64    if requested.is_absolute() {
65        return Err(CliError::SecurityViolation {
66            reason: format!("Absolute path not allowed: '{}'", requested_path),
67            details: "All output files must use relative paths within the output directory"
68                .to_string(),
69        });
70    }
71
72    // Check for parent directory components (..)
73    // This is redundant with the string check above, but provides defense in depth
74    for component in requested.components() {
75        if matches!(component, Component::ParentDir) {
76            return Err(CliError::SecurityViolation {
77                reason: format!("Path traversal detected: '{}'", requested_path),
78                details: "Paths containing '..' components are not allowed for security reasons"
79                    .to_string(),
80            });
81        }
82    }
83
84    // Build full path
85    let full_path = base_dir.join(&requested);
86
87    // Canonicalize base directory to resolve symlinks
88    let base_canonical = base_dir.canonicalize().map_err(CliError::Io)?;
89
90    // For the full path, we need to handle the case where it doesn't exist yet
91    // If the file exists, canonicalize it directly
92    if full_path.exists() {
93        let canonical = full_path.canonicalize().map_err(CliError::Io)?;
94
95        // Verify it's within base directory
96        if !canonical.starts_with(&base_canonical) {
97            return Err(CliError::SecurityViolation {
98                reason: format!("Path escapes output directory: '{}'", canonical.display()),
99                details: format!(
100                    "Resolved path '{}' is outside base directory '{}'",
101                    canonical.display(),
102                    base_canonical.display()
103                ),
104            });
105        }
106
107        return Ok(canonical);
108    }
109
110    // File doesn't exist - we need to validate it's safe to create
111    // Since we already checked for ".." and absolute paths, the path is safe
112    // However, we need to return a path that's consistent with base_canonical
113    // Build the path relative to the canonical base
114    let relative_to_base =
115        full_path
116            .strip_prefix(base_dir)
117            .map_err(|_| CliError::SecurityViolation {
118                reason: "Internal error: path not relative to base".to_string(),
119                details: "Path validation failed unexpectedly".to_string(),
120            })?;
121
122    Ok(base_canonical.join(relative_to_base))
123}
124
125/// Sanitizes a filename to prevent security issues.
126///
127/// This function:
128/// - Removes or replaces unsafe characters (only allows alphanumeric, `-`, `_`, `.`)
129/// - Rejects reserved filenames (`.`, `..`, Windows device names)
130/// - Enforces maximum length limits
131///
132/// # Security
133///
134/// This function is security-critical. It must ALWAYS be called before using
135/// external input (e.g., tool names) as filenames.
136///
137/// # Arguments
138///
139/// * `name` - The filename to sanitize
140///
141/// # Returns
142///
143/// A sanitized filename if valid, or a SecurityViolation error if the name
144/// cannot be made safe.
145///
146/// # Examples
147///
148/// ```
149/// # use turbomcp_cli::path_security::sanitize_filename;
150/// assert_eq!(sanitize_filename("my_tool")?, "my_tool");
151/// assert_eq!(sanitize_filename("my-file.txt")?, "my-file.txt");
152/// // Paths with ".." are rejected for security
153/// assert!(sanitize_filename("my/tool/../bad").is_err());
154/// # Ok::<(), turbomcp_cli::error::CliError>(())
155/// ```
156pub fn sanitize_filename(name: &str) -> CliResult<String> {
157    if name.is_empty() {
158        return Err(CliError::SecurityViolation {
159            reason: "Empty filename".to_string(),
160            details: "Filename cannot be empty".to_string(),
161        });
162    }
163
164    // Remove or replace unsafe characters
165    // Only allow: alphanumeric, dash, underscore, period
166    let sanitized: String = name
167        .chars()
168        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '_' || *c == '.')
169        .collect();
170
171    if sanitized.is_empty() {
172        return Err(CliError::SecurityViolation {
173            reason: format!("Invalid filename: '{}'", name),
174            details: "Filename must contain at least one alphanumeric character".to_string(),
175        });
176    }
177
178    // Additional check: reject if the sanitized name still contains ".."
179    // This prevents names like "......etcpasswd" which look suspicious
180    if sanitized.contains("..") {
181        return Err(CliError::SecurityViolation {
182            reason: format!("Invalid filename pattern: '{}'", sanitized),
183            details: "Filenames containing '..' patterns are not allowed".to_string(),
184        });
185    }
186
187    // Check length
188    if sanitized.len() > MAX_FILENAME_LENGTH {
189        return Err(CliError::SecurityViolation {
190            reason: format!("Filename too long: {} characters", sanitized.len()),
191            details: format!(
192                "Filename must be at most {} characters",
193                MAX_FILENAME_LENGTH
194            ),
195        });
196    }
197
198    // Check for reserved names (case-insensitive)
199    let lower = sanitized.to_lowercase();
200    if RESERVED_FILENAMES.contains(&lower.as_str()) {
201        return Err(CliError::SecurityViolation {
202            reason: format!("Reserved filename: '{}'", sanitized),
203            details: "This filename is reserved by the operating system".to_string(),
204        });
205    }
206
207    // Also reject if it starts with a period (hidden files can be problematic)
208    if sanitized.starts_with('.') && sanitized.len() <= 2 {
209        return Err(CliError::SecurityViolation {
210            reason: format!("Invalid filename: '{}'", sanitized),
211            details: "Filenames starting with '.' are not allowed".to_string(),
212        });
213    }
214
215    Ok(sanitized)
216}
217
218/// Validates and sanitizes a filename, then constructs a safe output path.
219///
220/// This is a convenience function that combines `sanitize_filename` and
221/// `validate_output_path` with automatic `.json` extension.
222///
223/// # Security
224///
225/// This function performs all necessary security validations.
226///
227/// # Arguments
228///
229/// * `base_dir` - The base directory for output files
230/// * `name` - The filename to sanitize (e.g., tool name)
231/// * `extension` - The file extension to add (e.g., "json")
232///
233/// # Returns
234///
235/// A validated, safe output path.
236pub fn safe_output_path(base_dir: &Path, name: &str, extension: &str) -> CliResult<PathBuf> {
237    let sanitized = sanitize_filename(name)?;
238    let filename = if extension.is_empty() {
239        sanitized
240    } else {
241        format!("{}.{}", sanitized, extension)
242    };
243    validate_output_path(base_dir, &filename)
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249    use std::fs;
250    use tempfile::TempDir;
251
252    #[test]
253    fn test_sanitize_valid_filenames() {
254        assert_eq!(sanitize_filename("my_tool").unwrap(), "my_tool");
255        assert_eq!(sanitize_filename("tool-123").unwrap(), "tool-123");
256        assert_eq!(sanitize_filename("tool.v1").unwrap(), "tool.v1");
257        assert_eq!(sanitize_filename("Tool_Name_123").unwrap(), "Tool_Name_123");
258    }
259
260    #[test]
261    fn test_sanitize_removes_unsafe_chars() {
262        // Slashes and other path separators should be removed
263        assert_eq!(sanitize_filename("my/tool").unwrap(), "mytool");
264        assert_eq!(sanitize_filename("my\\tool").unwrap(), "mytool");
265        assert_eq!(sanitize_filename("tool:name").unwrap(), "toolname");
266        assert_eq!(sanitize_filename("tool*name").unwrap(), "toolname");
267    }
268
269    #[test]
270    fn test_sanitize_rejects_reserved_names() {
271        assert!(sanitize_filename(".").is_err());
272        assert!(sanitize_filename("..").is_err());
273        assert!(sanitize_filename("con").is_err());
274        assert!(sanitize_filename("CON").is_err());
275        assert!(sanitize_filename("prn").is_err());
276        assert!(sanitize_filename("aux").is_err());
277        assert!(sanitize_filename("nul").is_err());
278        assert!(sanitize_filename("com1").is_err());
279        assert!(sanitize_filename("lpt1").is_err());
280    }
281
282    #[test]
283    fn test_sanitize_rejects_empty() {
284        assert!(sanitize_filename("").is_err());
285        assert!(sanitize_filename("///").is_err()); // becomes empty after sanitization
286        assert!(sanitize_filename("***").is_err()); // becomes empty after sanitization
287    }
288
289    #[test]
290    fn test_validate_accepts_relative_paths() {
291        let temp_dir = TempDir::new().unwrap();
292        let base = temp_dir.path();
293
294        // Simple filename
295        let result = validate_output_path(base, "tool.json");
296        assert!(result.is_ok());
297
298        // Subdirectory (create it first)
299        fs::create_dir_all(base.join("subdir")).unwrap();
300        let result = validate_output_path(base, "subdir/tool.json");
301        assert!(result.is_ok());
302    }
303
304    #[test]
305    fn test_validate_rejects_absolute_paths() {
306        let temp_dir = TempDir::new().unwrap();
307        let base = temp_dir.path();
308
309        assert!(validate_output_path(base, "/etc/passwd").is_err());
310        assert!(validate_output_path(base, "/tmp/evil").is_err());
311
312        // Windows-style absolute paths
313        #[cfg(windows)]
314        {
315            assert!(validate_output_path(base, "C:\\Windows\\System32").is_err());
316        }
317    }
318
319    #[test]
320    fn test_validate_rejects_parent_directory() {
321        let temp_dir = TempDir::new().unwrap();
322        let base = temp_dir.path();
323
324        assert!(validate_output_path(base, "..").is_err());
325        assert!(validate_output_path(base, "../etc/passwd").is_err());
326        assert!(validate_output_path(base, "../../.ssh/authorized_keys").is_err());
327        assert!(validate_output_path(base, "subdir/../../../etc/passwd").is_err());
328    }
329
330    #[test]
331    fn test_validate_handles_existing_files() {
332        let temp_dir = TempDir::new().unwrap();
333        let base = temp_dir.path();
334
335        // Create a file
336        let test_file = base.join("test.json");
337        fs::write(&test_file, "{}").unwrap();
338
339        // Should validate successfully
340        let result = validate_output_path(base, "test.json");
341        assert!(result.is_ok());
342    }
343
344    #[test]
345    fn test_validate_handles_nonexistent_files() {
346        let temp_dir = TempDir::new().unwrap();
347        let base = temp_dir.path();
348
349        // File doesn't exist yet, but should be valid
350        let result = validate_output_path(base, "new_file.json");
351        assert!(result.is_ok());
352
353        // Subdirectory doesn't exist, but path should be valid
354        let result = validate_output_path(base, "newdir/file.json");
355        assert!(result.is_ok());
356    }
357
358    #[test]
359    fn test_safe_output_path_integration() {
360        let temp_dir = TempDir::new().unwrap();
361        let base = temp_dir.path();
362
363        // Valid tool name
364        let result = safe_output_path(base, "my_tool", "json");
365        assert!(result.is_ok());
366        assert!(result.unwrap().ends_with("my_tool.json"));
367
368        // Malicious tool name with path traversal - should be rejected during sanitization
369        let result = safe_output_path(base, "../../../etc/passwd", "json");
370        assert!(result.is_err(), "Should reject path traversal attempts");
371    }
372
373    #[test]
374    fn test_comprehensive_attack_scenarios() {
375        let temp_dir = TempDir::new().unwrap();
376        let base = temp_dir.path();
377        // Canonicalize base to match what validate_output_path returns
378        let base_canonical = base.canonicalize().unwrap();
379
380        // Collection of real-world path traversal attack patterns
381        let malicious_inputs = vec![
382            "../../../etc/passwd",
383            "../../.ssh/authorized_keys",
384            "../../../.bash_history",
385            "/etc/shadow",
386            "../../../../../../../../etc/passwd",
387            "..\\..\\..\\windows\\system32",
388            "subdir/../../etc/passwd",
389        ];
390
391        for input in malicious_inputs {
392            // Direct validation should fail
393            let result = validate_output_path(base, input);
394            assert!(
395                result.is_err(),
396                "Should reject malicious path directly: {}",
397                input
398            );
399
400            // Sanitization should either:
401            // 1. Fail (reject the malicious input)
402            // 2. Succeed and produce a safe filename within base_dir
403            match sanitize_filename(input) {
404                Ok(sanitized) => {
405                    // If sanitization succeeds, validation must also succeed
406                    // and the result must be within base_dir
407                    let result = validate_output_path(base, &sanitized);
408                    if let Ok(path) = result {
409                        assert!(
410                            path.starts_with(&base_canonical),
411                            "Sanitized path must be within base dir: {} -> {} (base: {})",
412                            input,
413                            path.display(),
414                            base_canonical.display()
415                        );
416                    }
417                }
418                Err(_) => {
419                    // It's OK (and often preferable) for sanitization to fail
420                    // on obviously malicious inputs
421                }
422            }
423        }
424    }
425}