backup_suite/core/validation.rs
1/// Input validation module
2///
3/// Provides comprehensive input validation and sanitization functions
4/// to prevent security vulnerabilities:
5/// - Command injection
6/// - Path traversal attacks
7/// - Null byte injection
8/// - Excessively long inputs (DoS prevention)
9use anyhow::{bail, Context, Result};
10use regex::Regex;
11use std::path::{Path, PathBuf};
12
13/// Maximum allowed input length to prevent DoS attacks
14const MAX_INPUT_LENGTH: usize = 1000;
15
16/// Maximum path component length
17const MAX_PATH_COMPONENT_LENGTH: usize = 255;
18
19/// Validate and sanitize general user input
20///
21/// Applies whitelist-based validation allowing only safe characters:
22/// - Alphanumeric characters (a-zA-Z0-9)
23/// - Whitespace
24/// - Common safe punctuation: . _ / -
25///
26/// # Security Considerations
27/// - Prevents command injection by rejecting shell metacharacters
28/// - Blocks null bytes
29/// - Limits input length to prevent DoS
30///
31/// # Errors
32/// Returns error if input contains disallowed characters or exceeds length limit
33///
34/// # Examples
35/// ```
36/// use backup_suite::core::validation::validate_input;
37///
38/// // Valid inputs
39/// assert!(validate_input("backup_2024-01-15").is_ok());
40/// assert!(validate_input("user/documents").is_ok());
41///
42/// // Invalid inputs (shell metacharacters)
43/// assert!(validate_input("test; rm -rf /").is_err());
44/// assert!(validate_input("test | cat /etc/passwd").is_err());
45/// assert!(validate_input("test && malicious").is_err());
46/// ```
47pub fn validate_input(input: &str) -> Result<()> {
48 // Check for null bytes first (critical security check)
49 if input.contains('\0') {
50 bail!("Invalid input: null byte detected");
51 }
52
53 // Length limit to prevent DoS
54 if input.len() > MAX_INPUT_LENGTH {
55 bail!(
56 "Input too long: {} characters (max: {} characters)",
57 input.len(),
58 MAX_INPUT_LENGTH
59 );
60 }
61
62 // Whitelist of allowed characters (alphanumeric, whitespace, and safe punctuation)
63 let allowed_chars = Regex::new(r"^[a-zA-Z0-9\s._/-]+$")?;
64
65 if !allowed_chars.is_match(input) {
66 bail!("Invalid input: contains disallowed characters (only alphanumeric, spaces, '.', '_', '/', '-' are allowed)");
67 }
68
69 Ok(())
70}
71
72/// Sanitize string by removing control characters
73///
74/// Removes all control characters except whitespace (space, tab, newline).
75/// This provides defense-in-depth even when used after `validate_input`.
76///
77/// # Security Considerations
78/// - Removes null bytes
79/// - Removes other control characters that could interfere with terminal output
80/// - Preserves legitimate whitespace
81///
82/// # Errors
83/// Returns error if null byte is detected (fail-fast for security)
84///
85/// # Examples
86/// ```
87/// use backup_suite::core::validation::sanitize_string;
88///
89/// // Normal text passes through
90/// assert_eq!(sanitize_string("Hello World").unwrap(), "Hello World");
91///
92/// // Null bytes are rejected
93/// assert!(sanitize_string("test\0malicious").is_err());
94///
95/// // Control characters are removed
96/// assert_eq!(sanitize_string("test\x01\x02data").unwrap(), "testdata");
97/// ```
98pub fn sanitize_string(input: &str) -> Result<String> {
99 // Null byte check (critical security validation)
100 if input.contains('\0') {
101 bail!("Null byte detected in input");
102 }
103
104 // Remove control characters except whitespace
105 let sanitized: String = input
106 .chars()
107 .filter(|c| !c.is_control() || c.is_whitespace())
108 .collect();
109
110 Ok(sanitized)
111}
112
113/// Validate file path and prevent path traversal attacks
114///
115/// Performs comprehensive path validation:
116/// 1. Canonicalizes path (resolves symlinks, relative components)
117/// 2. Validates against allowed base directory
118/// 3. Checks component lengths
119/// 4. Validates file name characters
120///
121/// # Security Considerations
122/// - Prevents path traversal (../ attacks)
123/// - Protects against symlink attacks
124/// - Validates path components
125/// - Ensures path stays within allowed directory
126///
127/// # Errors
128/// Returns error if:
129/// - Path does not exist (canonicalize fails)
130/// - Path escapes allowed base directory
131/// - Path components exceed length limits
132/// - Path contains invalid characters
133///
134/// # Examples
135/// ```no_run
136/// use backup_suite::core::validation::validate_path;
137/// use std::path::PathBuf;
138/// use std::env;
139///
140/// let current_dir = env::current_dir().unwrap();
141/// let safe_path = current_dir.join("backup/data.txt");
142///
143/// // Valid path within current directory
144/// assert!(validate_path(&safe_path, ¤t_dir).is_ok());
145///
146/// // Invalid: path traversal attempt
147/// let malicious = PathBuf::from("/tmp/../../etc/passwd");
148/// assert!(validate_path(&malicious, ¤t_dir).is_err());
149/// ```
150pub fn validate_path(path: &Path, allowed_base: &Path) -> Result<PathBuf> {
151 // Canonicalize both paths (resolves symlinks and relative components)
152 let canonical_path = path
153 .canonicalize()
154 .with_context(|| format!("Invalid path: {}", path.display()))?;
155
156 let canonical_base = allowed_base
157 .canonicalize()
158 .with_context(|| format!("Invalid base directory: {}", allowed_base.display()))?;
159
160 // Ensure path is within allowed base directory
161 if !canonical_path.starts_with(&canonical_base) {
162 bail!(
163 "Path traversal detected: {} is outside allowed directory {}",
164 canonical_path.display(),
165 canonical_base.display()
166 );
167 }
168
169 // Validate each path component
170 for component in canonical_path.components() {
171 if let Some(component_str) = component.as_os_str().to_str() {
172 // Check component length
173 if component_str.len() > MAX_PATH_COMPONENT_LENGTH {
174 bail!(
175 "Path component too long: {} (max: {})",
176 component_str.len(),
177 MAX_PATH_COMPONENT_LENGTH
178 );
179 }
180
181 // Check for null bytes in path component
182 if component_str.contains('\0') {
183 bail!("Null byte detected in path component");
184 }
185 }
186 }
187
188 Ok(canonical_path)
189}
190
191/// Validate file name (without directory components)
192///
193/// Ensures file name contains only safe characters and is not empty.
194///
195/// # Security Considerations
196/// - Prevents shell metacharacters in file names
197/// - Blocks null bytes
198/// - Rejects overly long names
199///
200/// # Errors
201/// Returns error if:
202/// - Name is empty
203/// - Name is too long (>255 characters)
204/// - Name contains disallowed characters
205///
206/// # Examples
207/// ```
208/// use backup_suite::core::validation::validate_filename;
209///
210/// // Valid file names
211/// assert!(validate_filename("backup_2024-01-15.tar.gz").is_ok());
212/// assert!(validate_filename("user_data.txt").is_ok());
213///
214/// // Invalid file names
215/// assert!(validate_filename("").is_err());
216/// assert!(validate_filename("test;rm -rf").is_err());
217/// assert!(validate_filename("file\0malicious").is_err());
218/// ```
219pub fn validate_filename(name: &str) -> Result<()> {
220 // Check for empty name
221 if name.is_empty() {
222 bail!("File name cannot be empty");
223 }
224
225 // Length check
226 if name.len() > MAX_PATH_COMPONENT_LENGTH {
227 bail!(
228 "File name too long: {} characters (max: {})",
229 name.len(),
230 MAX_PATH_COMPONENT_LENGTH
231 );
232 }
233
234 // Null byte check
235 if name.contains('\0') {
236 bail!("Null byte detected in file name");
237 }
238
239 // Whitelist: alphanumeric, dots, underscores, hyphens
240 let allowed_chars = Regex::new(r"^[a-zA-Z0-9._-]+$")?;
241
242 if !allowed_chars.is_match(name) {
243 bail!("Invalid file name: contains disallowed characters (only alphanumeric, '.', '_', '-' are allowed)");
244 }
245
246 Ok(())
247}
248
249#[cfg(test)]
250mod tests {
251 use super::*;
252 use std::env;
253 use tempfile::TempDir;
254
255 // ═══════════════════════════════════════════════════════════════
256 // validate_input tests
257 // ═══════════════════════════════════════════════════════════════
258
259 #[test]
260 fn test_validate_input_valid() {
261 assert!(validate_input("backup_2024-01-15").is_ok());
262 assert!(validate_input("user documents").is_ok());
263 assert!(validate_input("path/to/file").is_ok());
264 assert!(validate_input("123-456_789.txt").is_ok());
265 }
266
267 #[test]
268 fn test_validate_input_shell_metacharacters() {
269 assert!(validate_input("test; rm -rf /").is_err());
270 assert!(validate_input("test | cat /etc/passwd").is_err());
271 assert!(validate_input("test && malicious").is_err());
272 assert!(validate_input("test $(whoami)").is_err());
273 assert!(validate_input("test `whoami`").is_err());
274 assert!(validate_input("test > output").is_err());
275 assert!(validate_input("test < input").is_err());
276 }
277
278 #[test]
279 fn test_validate_input_null_byte() {
280 assert!(validate_input("test\0malicious").is_err());
281 assert!(validate_input("\0").is_err());
282 }
283
284 #[test]
285 fn test_validate_input_too_long() {
286 let long_input = "a".repeat(MAX_INPUT_LENGTH + 1);
287 assert!(validate_input(&long_input).is_err());
288 }
289
290 #[test]
291 fn test_validate_input_max_length_ok() {
292 let max_input = "a".repeat(MAX_INPUT_LENGTH);
293 assert!(validate_input(&max_input).is_ok());
294 }
295
296 // ═══════════════════════════════════════════════════════════════
297 // sanitize_string tests
298 // ═══════════════════════════════════════════════════════════════
299
300 #[test]
301 fn test_sanitize_string_normal() {
302 assert_eq!(sanitize_string("Hello World").unwrap(), "Hello World");
303 assert_eq!(sanitize_string("test 123").unwrap(), "test 123");
304 }
305
306 #[test]
307 fn test_sanitize_string_null_byte() {
308 assert!(sanitize_string("test\0malicious").is_err());
309 assert!(sanitize_string("\0").is_err());
310 }
311
312 #[test]
313 fn test_sanitize_string_control_characters() {
314 // Control characters (except whitespace) should be removed
315 assert_eq!(sanitize_string("test\x01\x02data").unwrap(), "testdata");
316 assert_eq!(
317 sanitize_string("hello\x1b[31mworld").unwrap(),
318 "hello[31mworld"
319 );
320 }
321
322 #[test]
323 fn test_sanitize_string_preserve_whitespace() {
324 assert_eq!(sanitize_string("hello\nworld").unwrap(), "hello\nworld");
325 assert_eq!(sanitize_string("tab\there").unwrap(), "tab\there");
326 }
327
328 // ═══════════════════════════════════════════════════════════════
329 // validate_path tests
330 // ═══════════════════════════════════════════════════════════════
331
332 #[test]
333 fn test_validate_path_within_base() {
334 let temp_dir = TempDir::new().unwrap();
335 let base = temp_dir.path();
336
337 // Create test file
338 let test_file = base.join("test.txt");
339 std::fs::write(&test_file, "test").unwrap();
340
341 // Should succeed - file within base directory
342 assert!(validate_path(&test_file, base).is_ok());
343 }
344
345 #[test]
346 fn test_validate_path_nonexistent() {
347 let temp_dir = TempDir::new().unwrap();
348 let base = temp_dir.path();
349 let nonexistent = base.join("nonexistent");
350
351 // Should fail - file doesn't exist (canonicalize fails)
352 assert!(validate_path(&nonexistent, base).is_err());
353 }
354
355 #[test]
356 fn test_validate_path_escape_attempt() {
357 let current_dir = env::current_dir().unwrap();
358
359 // Try to escape to parent directory
360 let escape_path = current_dir.join("../../../etc/passwd");
361
362 // Should fail if trying to access outside current directory
363 // NOTE: This test might succeed on some systems if /etc/passwd
364 // happens to be a descendant of current_dir. The test demonstrates
365 // the validation logic.
366 if escape_path.exists() && escape_path.canonicalize().is_ok() {
367 let result = validate_path(&escape_path, ¤t_dir);
368 // Either error or success is acceptable depending on actual path relationship
369 let _ = result;
370 }
371 }
372
373 #[test]
374 fn test_validate_path_long_component() {
375 let temp_dir = TempDir::new().unwrap();
376 let base = temp_dir.path();
377
378 // Create file with very long name
379 let long_name = "a".repeat(MAX_PATH_COMPONENT_LENGTH + 1);
380 let long_path = base.join(&long_name);
381
382 // Try to create file (might fail on some filesystems)
383 if std::fs::write(&long_path, "test").is_ok() {
384 assert!(validate_path(&long_path, base).is_err());
385 }
386 }
387
388 // ═══════════════════════════════════════════════════════════════
389 // validate_filename tests
390 // ═══════════════════════════════════════════════════════════════
391
392 #[test]
393 fn test_validate_filename_valid() {
394 assert!(validate_filename("backup.tar.gz").is_ok());
395 assert!(validate_filename("data_2024-01-15.txt").is_ok());
396 assert!(validate_filename("file-123.zip").is_ok());
397 }
398
399 #[test]
400 fn test_validate_filename_empty() {
401 assert!(validate_filename("").is_err());
402 }
403
404 #[test]
405 fn test_validate_filename_too_long() {
406 let long_name = "a".repeat(MAX_PATH_COMPONENT_LENGTH + 1);
407 assert!(validate_filename(&long_name).is_err());
408 }
409
410 #[test]
411 fn test_validate_filename_null_byte() {
412 assert!(validate_filename("test\0.txt").is_err());
413 }
414
415 #[test]
416 fn test_validate_filename_shell_metacharacters() {
417 assert!(validate_filename("test;rm").is_err());
418 assert!(validate_filename("file|pipe").is_err());
419 assert!(validate_filename("data&&cmd").is_err());
420 assert!(validate_filename("test>out").is_err());
421 }
422
423 #[test]
424 fn test_validate_filename_path_separators() {
425 // File names should not contain path separators
426 assert!(validate_filename("dir/file.txt").is_err());
427 assert!(validate_filename("../escape").is_err());
428 }
429
430 #[test]
431 fn test_validate_filename_spaces() {
432 // Spaces not allowed in our strict file name validation
433 assert!(validate_filename("file name.txt").is_err());
434 }
435}