Skip to main content

turbomcp_cli/
path_security.rs

1//! Path validation and sanitization to prevent path traversal attacks
2//!
3//! This module provides security-critical functions to validate output paths and
4//! sanitize filenames, preventing malicious servers from writing arbitrary files
5//! via crafted tool names.
6
7use crate::error::{CliError, CliResult};
8use std::path::{Component, Path, PathBuf};
9
10/// Maximum allowed filename length (to stay within filesystem limits)
11const MAX_FILENAME_LENGTH: usize = 255;
12
13/// Reserved filenames that are not allowed (Windows + Unix special cases)
14const RESERVED_FILENAMES: &[&str] = &[
15    ".", "..", "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", "com6", "com7",
16    "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9",
17];
18
19/// Validates an output path to ensure it's within the base directory.
20///
21/// This function prevents path traversal attacks by:
22/// - Rejecting absolute paths
23/// - Rejecting paths with parent directory components (`..`)
24/// - Canonicalizing paths to resolve symlinks
25/// - Verifying the resolved path is within the base directory
26///
27/// # Security
28///
29/// This function is security-critical. It must ALWAYS be called before writing
30/// files based on external input (e.g., tool names from MCP servers).
31///
32/// # Arguments
33///
34/// * `base_dir` - The base directory that all output files must be within
35/// * `requested_path` - The requested path (relative to base_dir)
36///
37/// # Returns
38///
39/// The canonicalized path if valid, or a SecurityViolation error if invalid.
40///
41/// # Examples
42///
43/// ```no_run
44/// # use std::path::Path;
45/// # use turbomcp_cli::path_security::validate_output_path;
46/// let base = Path::new("/tmp/output");
47/// let safe_path = validate_output_path(base, "tool.json")?;
48/// // safe_path is guaranteed to be within /tmp/output
49/// # Ok::<(), turbomcp_cli::error::CliError>(())
50/// ```
51pub fn validate_output_path(base_dir: &Path, requested_path: &str) -> CliResult<PathBuf> {
52    // First, check for obvious path traversal patterns in the raw string
53    // This catches cases that might not be parsed as ParentDir on all platforms
54    if requested_path.contains("..") {
55        return Err(CliError::SecurityViolation {
56            reason: format!("Path traversal detected: '{}'", requested_path),
57            details: "Paths containing '..' are not allowed for security reasons".to_string(),
58        });
59    }
60
61    let requested = PathBuf::from(requested_path);
62
63    // Reject absolute paths
64    if requested.is_absolute() {
65        return Err(CliError::SecurityViolation {
66            reason: format!("Absolute path not allowed: '{}'", requested_path),
67            details: "All output files must use relative paths within the output directory"
68                .to_string(),
69        });
70    }
71
72    // Check for parent directory components (..)
73    // This is redundant with the string check above, but provides defense in depth
74    for component in requested.components() {
75        if matches!(component, Component::ParentDir) {
76            return Err(CliError::SecurityViolation {
77                reason: format!("Path traversal detected: '{}'", requested_path),
78                details: "Paths containing '..' components are not allowed for security reasons"
79                    .to_string(),
80            });
81        }
82    }
83
84    // Build full path
85    let full_path = base_dir.join(&requested);
86
87    // Canonicalize base directory to resolve symlinks
88    let base_canonical = base_dir.canonicalize().map_err(CliError::Io)?;
89
90    // For the full path, we need to handle the case where it doesn't exist yet
91    // If the file exists, canonicalize it directly
92    if full_path.exists() {
93        let canonical = full_path.canonicalize().map_err(CliError::Io)?;
94
95        // Verify it's within base directory
96        if !canonical.starts_with(&base_canonical) {
97            return Err(CliError::SecurityViolation {
98                reason: format!("Path escapes output directory: '{}'", canonical.display()),
99                details: format!(
100                    "Resolved path '{}' is outside base directory '{}'",
101                    canonical.display(),
102                    base_canonical.display()
103                ),
104            });
105        }
106
107        return Ok(canonical);
108    }
109
110    // File doesn't exist - we need to validate it's safe to create
111    // We must resolve symlinks in any existing ancestor directories to detect
112    // symlink-based escapes (e.g., "symlink_to_outside/file.json")
113    let relative_to_base =
114        full_path
115            .strip_prefix(base_dir)
116            .map_err(|_| CliError::SecurityViolation {
117                reason: "Internal error: path not relative to base".to_string(),
118                details: "Path validation failed unexpectedly".to_string(),
119            })?;
120
121    // Walk up the path to find the deepest existing ancestor and canonicalize it
122    // This catches symlinks that point outside the base directory
123    let mut existing_ancestor = full_path.to_path_buf();
124    let mut remaining_components: Vec<std::ffi::OsString> = Vec::new();
125    loop {
126        if existing_ancestor.exists() {
127            let canonical_ancestor = existing_ancestor.canonicalize().map_err(CliError::Io)?;
128            if !canonical_ancestor.starts_with(&base_canonical) {
129                return Err(CliError::SecurityViolation {
130                    reason: format!(
131                        "Path escapes output directory via symlink: '{}'",
132                        requested_path
133                    ),
134                    details: format!(
135                        "Resolved ancestor '{}' is outside base directory '{}'",
136                        canonical_ancestor.display(),
137                        base_canonical.display()
138                    ),
139                });
140            }
141            // Reconstruct the final path using canonical ancestor + remaining
142            let mut result = canonical_ancestor;
143            for component in remaining_components.into_iter().rev() {
144                result = result.join(component);
145            }
146            return Ok(result);
147        }
148        if let Some(file_name) = existing_ancestor.file_name() {
149            remaining_components.push(file_name.to_os_string());
150        }
151        if !existing_ancestor.pop() {
152            break;
153        }
154    }
155
156    // Fallback: no ancestor exists (shouldn't happen since base_dir exists)
157    Ok(base_canonical.join(relative_to_base))
158}
159
160/// Sanitizes a filename to prevent security issues.
161///
162/// This function:
163/// - Removes or replaces unsafe characters (only allows alphanumeric, `-`, `_`, `.`)
164/// - Rejects reserved filenames (`.`, `..`, Windows device names)
165/// - Enforces maximum length limits
166///
167/// # Security
168///
169/// This function is security-critical. It must ALWAYS be called before using
170/// external input (e.g., tool names) as filenames.
171///
172/// # Arguments
173///
174/// * `name` - The filename to sanitize
175///
176/// # Returns
177///
178/// A sanitized filename if valid, or a SecurityViolation error if the name
179/// cannot be made safe.
180///
181/// # Examples
182///
183/// ```
184/// # use turbomcp_cli::path_security::sanitize_filename;
185/// assert_eq!(sanitize_filename("my_tool")?, "my_tool");
186/// assert_eq!(sanitize_filename("my-file.txt")?, "my-file.txt");
187/// // Paths with ".." are rejected for security
188/// assert!(sanitize_filename("my/tool/../bad").is_err());
189/// # Ok::<(), turbomcp_cli::error::CliError>(())
190/// ```
191pub fn sanitize_filename(name: &str) -> CliResult<String> {
192    if name.is_empty() {
193        return Err(CliError::SecurityViolation {
194            reason: "Empty filename".to_string(),
195            details: "Filename cannot be empty".to_string(),
196        });
197    }
198
199    // Remove or replace unsafe characters
200    // Only allow: alphanumeric, dash, underscore, period
201    let sanitized: String = name
202        .chars()
203        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '_' || *c == '.')
204        .collect();
205
206    if sanitized.is_empty() {
207        return Err(CliError::SecurityViolation {
208            reason: format!("Invalid filename: '{}'", name),
209            details: "Filename must contain at least one alphanumeric character".to_string(),
210        });
211    }
212
213    // Additional check: reject if the sanitized name still contains ".."
214    // This prevents names like "......etcpasswd" which look suspicious
215    if sanitized.contains("..") {
216        return Err(CliError::SecurityViolation {
217            reason: format!("Invalid filename pattern: '{}'", sanitized),
218            details: "Filenames containing '..' patterns are not allowed".to_string(),
219        });
220    }
221
222    // Check length
223    if sanitized.len() > MAX_FILENAME_LENGTH {
224        return Err(CliError::SecurityViolation {
225            reason: format!("Filename too long: {} characters", sanitized.len()),
226            details: format!(
227                "Filename must be at most {} characters",
228                MAX_FILENAME_LENGTH
229            ),
230        });
231    }
232
233    // Check for reserved names (case-insensitive)
234    let lower = sanitized.to_lowercase();
235    if RESERVED_FILENAMES.contains(&lower.as_str()) {
236        return Err(CliError::SecurityViolation {
237            reason: format!("Reserved filename: '{}'", sanitized),
238            details: "This filename is reserved by the operating system".to_string(),
239        });
240    }
241
242    // Also reject if it starts with a period (hidden files can be problematic)
243    if sanitized.starts_with('.') && sanitized.len() <= 2 {
244        return Err(CliError::SecurityViolation {
245            reason: format!("Invalid filename: '{}'", sanitized),
246            details: "Filenames starting with '.' are not allowed".to_string(),
247        });
248    }
249
250    Ok(sanitized)
251}
252
253/// Validates and sanitizes a filename, then constructs a safe output path.
254///
255/// This is a convenience function that combines `sanitize_filename` and
256/// `validate_output_path` with automatic `.json` extension.
257///
258/// # Security
259///
260/// This function performs all necessary security validations.
261///
262/// # Arguments
263///
264/// * `base_dir` - The base directory for output files
265/// * `name` - The filename to sanitize (e.g., tool name)
266/// * `extension` - The file extension to add (e.g., "json")
267///
268/// # Returns
269///
270/// A validated, safe output path.
271pub fn safe_output_path(base_dir: &Path, name: &str, extension: &str) -> CliResult<PathBuf> {
272    let sanitized = sanitize_filename(name)?;
273    let filename = if extension.is_empty() {
274        sanitized
275    } else {
276        format!("{}.{}", sanitized, extension)
277    };
278    validate_output_path(base_dir, &filename)
279}
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284    use std::fs;
285    use tempfile::TempDir;
286
287    #[test]
288    fn test_sanitize_valid_filenames() {
289        assert_eq!(sanitize_filename("my_tool").unwrap(), "my_tool");
290        assert_eq!(sanitize_filename("tool-123").unwrap(), "tool-123");
291        assert_eq!(sanitize_filename("tool.v1").unwrap(), "tool.v1");
292        assert_eq!(sanitize_filename("Tool_Name_123").unwrap(), "Tool_Name_123");
293    }
294
295    #[test]
296    fn test_sanitize_removes_unsafe_chars() {
297        // Slashes and other path separators should be removed
298        assert_eq!(sanitize_filename("my/tool").unwrap(), "mytool");
299        assert_eq!(sanitize_filename("my\\tool").unwrap(), "mytool");
300        assert_eq!(sanitize_filename("tool:name").unwrap(), "toolname");
301        assert_eq!(sanitize_filename("tool*name").unwrap(), "toolname");
302    }
303
304    #[test]
305    fn test_sanitize_rejects_reserved_names() {
306        assert!(sanitize_filename(".").is_err());
307        assert!(sanitize_filename("..").is_err());
308        assert!(sanitize_filename("con").is_err());
309        assert!(sanitize_filename("CON").is_err());
310        assert!(sanitize_filename("prn").is_err());
311        assert!(sanitize_filename("aux").is_err());
312        assert!(sanitize_filename("nul").is_err());
313        assert!(sanitize_filename("com1").is_err());
314        assert!(sanitize_filename("lpt1").is_err());
315    }
316
317    #[test]
318    fn test_sanitize_rejects_empty() {
319        assert!(sanitize_filename("").is_err());
320        assert!(sanitize_filename("///").is_err()); // becomes empty after sanitization
321        assert!(sanitize_filename("***").is_err()); // becomes empty after sanitization
322    }
323
324    #[test]
325    fn test_validate_accepts_relative_paths() {
326        let temp_dir = TempDir::new().unwrap();
327        let base = temp_dir.path();
328
329        // Simple filename
330        let result = validate_output_path(base, "tool.json");
331        assert!(result.is_ok());
332
333        // Subdirectory (create it first)
334        fs::create_dir_all(base.join("subdir")).unwrap();
335        let result = validate_output_path(base, "subdir/tool.json");
336        assert!(result.is_ok());
337    }
338
339    #[test]
340    fn test_validate_rejects_absolute_paths() {
341        let temp_dir = TempDir::new().unwrap();
342        let base = temp_dir.path();
343
344        assert!(validate_output_path(base, "/etc/passwd").is_err());
345        assert!(validate_output_path(base, "/tmp/evil").is_err());
346
347        // Windows-style absolute paths
348        #[cfg(windows)]
349        {
350            assert!(validate_output_path(base, "C:\\Windows\\System32").is_err());
351        }
352    }
353
354    #[test]
355    fn test_validate_rejects_parent_directory() {
356        let temp_dir = TempDir::new().unwrap();
357        let base = temp_dir.path();
358
359        assert!(validate_output_path(base, "..").is_err());
360        assert!(validate_output_path(base, "../etc/passwd").is_err());
361        assert!(validate_output_path(base, "../../.ssh/authorized_keys").is_err());
362        assert!(validate_output_path(base, "subdir/../../../etc/passwd").is_err());
363    }
364
365    #[test]
366    fn test_validate_handles_existing_files() {
367        let temp_dir = TempDir::new().unwrap();
368        let base = temp_dir.path();
369
370        // Create a file
371        let test_file = base.join("test.json");
372        fs::write(&test_file, "{}").unwrap();
373
374        // Should validate successfully
375        let result = validate_output_path(base, "test.json");
376        assert!(result.is_ok());
377    }
378
379    #[test]
380    fn test_validate_handles_nonexistent_files() {
381        let temp_dir = TempDir::new().unwrap();
382        let base = temp_dir.path();
383
384        // File doesn't exist yet, but should be valid
385        let result = validate_output_path(base, "new_file.json");
386        assert!(result.is_ok());
387
388        // Subdirectory doesn't exist, but path should be valid
389        let result = validate_output_path(base, "newdir/file.json");
390        assert!(result.is_ok());
391    }
392
393    #[test]
394    fn test_safe_output_path_integration() {
395        let temp_dir = TempDir::new().unwrap();
396        let base = temp_dir.path();
397
398        // Valid tool name
399        let result = safe_output_path(base, "my_tool", "json");
400        assert!(result.is_ok());
401        assert!(result.unwrap().ends_with("my_tool.json"));
402
403        // Malicious tool name with path traversal - should be rejected during sanitization
404        let result = safe_output_path(base, "../../../etc/passwd", "json");
405        assert!(result.is_err(), "Should reject path traversal attempts");
406    }
407
408    #[test]
409    fn test_comprehensive_attack_scenarios() {
410        let temp_dir = TempDir::new().unwrap();
411        let base = temp_dir.path();
412        // Canonicalize base to match what validate_output_path returns
413        let base_canonical = base.canonicalize().unwrap();
414
415        // Collection of real-world path traversal attack patterns
416        let malicious_inputs = vec![
417            "../../../etc/passwd",
418            "../../.ssh/authorized_keys",
419            "../../../.bash_history",
420            "/etc/shadow",
421            "../../../../../../../../etc/passwd",
422            "..\\..\\..\\windows\\system32",
423            "subdir/../../etc/passwd",
424        ];
425
426        for input in malicious_inputs {
427            // Direct validation should fail
428            let result = validate_output_path(base, input);
429            assert!(
430                result.is_err(),
431                "Should reject malicious path directly: {}",
432                input
433            );
434
435            // Sanitization should either:
436            // 1. Fail (reject the malicious input)
437            // 2. Succeed and produce a safe filename within base_dir
438            match sanitize_filename(input) {
439                Ok(sanitized) => {
440                    // If sanitization succeeds, validation must also succeed
441                    // and the result must be within base_dir
442                    let result = validate_output_path(base, &sanitized);
443                    if let Ok(path) = result {
444                        assert!(
445                            path.starts_with(&base_canonical),
446                            "Sanitized path must be within base dir: {} -> {} (base: {})",
447                            input,
448                            path.display(),
449                            base_canonical.display()
450                        );
451                    }
452                }
453                Err(_) => {
454                    // It's OK (and often preferable) for sanitization to fail
455                    // on obviously malicious inputs
456                }
457            }
458        }
459    }
460}