magellan 3.1.7

Deterministic codebase mapping tool for local development
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
//! Path validation and canonicalization utilities.
//!
//! Provides security-critical path validation to prevent directory traversal attacks.
//! All file access operations MUST validate paths before accessing filesystem resources.

use anyhow::Result;
use camino::Utf8Path;
use std::path::{Path, PathBuf};

/// Error types for path validation.
#[derive(Debug, thiserror::Error)]
pub enum PathValidationError {
    /// Path cannot be canonicalized (doesn't exist or permission denied)
    #[error("cannot canonicalize path: {0}")]
    CannotCanonicalize(String),

    /// Resolved path escapes the project root
    #[error("path escapes project root: {0} (root: {1})")]
    OutsideRoot(String, String),

    /// Path contains suspicious traversal patterns
    #[error("path contains suspicious traversal patterns: {0}")]
    SuspiciousTraversal(String),

    /// Symlink points outside project root
    #[error("symlink escapes project root: {0} -> {1}")]
    SymlinkEscape(String, String),
}

/// Canonicalize a path using std::fs::canonicalize.
///
/// This resolves all symlinks, `..`, and `.` components to produce an absolute path.
/// Returns an error if the path doesn't exist or cannot be accessed.
///
/// # Arguments
/// * `path` - Path to canonicalize
///
/// # Returns
/// Canonicalized absolute path, or error if path cannot be canonicalized
pub fn canonicalize_path(path: &Path) -> Result<PathBuf, PathValidationError> {
    std::fs::canonicalize(path)
        .map_err(|_| PathValidationError::CannotCanonicalize(path.to_string_lossy().to_string()))
}

/// Normalize a path to a consistent format.
///
/// This function provides lenient path normalization that works for both
/// existing and non-existing paths (important for watcher delete events).
///
/// # Behavior
/// - If path exists: canonicalize it (resolves symlinks, `..`, `.`)
/// - If path doesn't exist: strip `./` prefix and return as-is
/// - Returns a String for easier database storage
///
/// # Arguments
/// * `path` - Path to normalize
///
/// # Returns
/// Normalized path string
///
/// # Examples
/// ```
/// # use magellan::validation::normalize_path;
/// # use std::path::Path;
/// // Existing file: returns canonical path
/// // Non-existing: strips ./ prefix
/// let normalized = normalize_path(Path::new("./src/lib.rs")).unwrap();
/// assert!(normalized.contains("src/lib.rs"));
/// assert!(!normalized.starts_with("./"));
/// ```
pub fn normalize_path(path: &Path) -> Result<String> {
    // Try canonicalize first (works for existing files)
    if let Ok(canonical) = std::fs::canonicalize(path) {
        return Ok(canonical.to_string_lossy().to_string());
    }

    // Fallback for non-existent paths: strip ./ prefix manually
    // This is important for watcher delete events where the file is already gone
    let path_str = path.to_string_lossy().to_string();
    let normalized = if path_str.starts_with("./") {
        path_str[2..].to_string()
    } else {
        path_str
    };

    Ok(normalized)
}

/// Validate that a path is within the given root directory.
///
/// This function:
/// 1. Canonicalizes the input path (resolves symlinks, ., ..)
/// 2. Checks that the canonicalized path starts with the canonicalized root
/// 3. Returns the validated canonical path on success
///
/// # Arguments
/// * `path` - Path to validate
/// * `root` - Project root directory
///
/// # Returns
/// Canonicalized path if valid, error if path escapes root
///
/// # Security
/// This is the PRIMARY defense against directory traversal attacks.
/// All file access MUST go through this validation.
pub fn validate_path_within_root(path: &Path, root: &Path) -> Result<PathBuf, PathValidationError> {
    // First, check for obvious traversal patterns before canonicalization
    // This catches attacks like "../../../etc/passwd" even if some ancestor
    // doesn't exist (which would cause canonicalize to fail)
    let path_str = path.to_string_lossy();
    if has_suspicious_traversal(&path_str) {
        return Err(PathValidationError::SuspiciousTraversal(
            path_str.to_string(),
        ));
    }

    // Canonicalize both paths to absolute form
    let canonical_path = canonicalize_path(path)?;
    let canonical_root = canonicalize_path(root)
        .map_err(|_| PathValidationError::CannotCanonicalize(root.to_string_lossy().to_string()))?;

    // Check if canonical path starts with canonical root
    if !canonical_path.starts_with(&canonical_root) {
        return Err(PathValidationError::OutsideRoot(
            canonical_path.to_string_lossy().to_string(),
            canonical_root.to_string_lossy().to_string(),
        ));
    }

    Ok(canonical_path)
}

/// Check for suspicious path traversal patterns.
///
/// This is a pre-check to catch obvious attacks even when canonicalization
/// might fail (e.g., if intermediate directories don't exist).
///
/// The threshold is >=3 parent directory patterns - legitimate use cases
/// may use 1-2 levels of parent traversal, but bare parent references
/// (like `../config`) are still flagged as suspicious.
pub fn has_suspicious_traversal(path: &str) -> bool {
    // Check for parent directory patterns
    // Must handle both Unix (../) and Windows (..\\) patterns
    let path_normalized = path.replace('\\', "/");

    // Count "../" occurrences - 3 or more is highly suspicious
    // (legitimate use cases rarely go up more than a couple levels)
    let parent_count = path_normalized.matches("../").count();
    if parent_count >= 3 {
        return true;
    }

    // Check for bare parent references (paths starting with ../ that look like attacks)
    // Only flag single-parent references like ../config or ../config/file
    // Multi-parent paths like ../../dir or ../parent/sub are allowed
    if path_normalized.starts_with("../") && !path_normalized.starts_with("../../") {
        // Single parent: flag if it looks like an attack (few subdirectories)
        let depth = path_normalized.matches('/').count();
        if depth <= 2 {
            return true;
        }
    }

    // Windows-specific: check for ..\ at start
    // Only flag single-parent references
    let path_win = path.replace('/', "\\");
    if path_win.starts_with("..\\") && !path_win.starts_with("..\\..\\") {
        let depth = path_win.matches('\\').count();
        if depth <= 2 {
            return true;
        }
    }

    // Check for mixed traversal patterns like "./subdir/../../etc"
    // These combine forward navigation with parent traversal to hide intent
    // This is suspicious even with just 2 parents because it obfuscates the attack
    // We need to check for "./" followed by "../" (not "../../" which is just parents)
    let parts: Vec<&str> = path_normalized.split('/').collect();
    for (i, part) in parts.iter().enumerate() {
        if *part == "." && i < parts.len() - 1 {
            // Found "./", check if any later part is ".."
            if parts[i + 1..].contains(&"..") {
                return true;
            }
        }
    }

    // Windows-specific mixed pattern: ".\" followed by "..\"
    let parts_win: Vec<&str> = path_win.split('\\').collect();
    for (i, part) in parts_win.iter().enumerate() {
        if *part == "." && i < parts_win.len() - 1 && parts_win[i + 1..].contains(&"..") {
            return true;
        }
    }

    false
}

/// Check if a symlink is safe (doesn't escape project root).
///
/// This function resolves the symlink target and validates it's within root.
///
/// # Arguments
/// * `symlink_path` - Path to the symlink itself
/// * `root` - Project root directory
///
/// # Returns
/// Ok if symlink is safe, Err if symlink target escapes root
pub fn is_safe_symlink(symlink_path: &Path, root: &Path) -> Result<bool, PathValidationError> {
    // Read the symlink target
    let target = std::fs::read_link(symlink_path).map_err(|_| {
        PathValidationError::CannotCanonicalize(symlink_path.to_string_lossy().to_string())
    })?;

    // If target is absolute, validate it directly
    if target.is_absolute() {
        match validate_path_within_root(&target, root) {
            Ok(_) => return Ok(true),
            Err(PathValidationError::OutsideRoot(_, _)) => {
                return Err(PathValidationError::SymlinkEscape(
                    symlink_path.to_string_lossy().to_string(),
                    target.to_string_lossy().to_string(),
                ))
            }
            Err(e) => return Err(e),
        }
    }

    // If relative, resolve relative to parent directory
    let parent = symlink_path.parent().unwrap_or(symlink_path);
    let resolved = parent.join(&target);

    match validate_path_within_root(&resolved, root) {
        Ok(_) => Ok(true),
        Err(PathValidationError::OutsideRoot(_, _)) => Err(PathValidationError::SymlinkEscape(
            symlink_path.to_string_lossy().to_string(),
            target.to_string_lossy().to_string(),
        )),
        Err(e) => Err(e),
    }
}

/// Validate a UTF-8 path using camino's Utf8Path.
///
/// This is a convenience wrapper for UTF-8 path handling.
pub fn validate_utf8_path(
    utf8_path: &Utf8Path,
    root: &Path,
) -> Result<PathBuf, PathValidationError> {
    let path = Path::new(utf8_path.as_str());
    validate_path_within_root(path, root)
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;
    use tempfile::TempDir;

    #[test]
    fn test_normalize_path_relative_prefix() {
        let temp_dir = TempDir::new().unwrap();

        // Test with ./ prefix on non-existing path (in temp dir so it doesn't exist)
        let nonexist = temp_dir.path().join("./src/lib.rs");
        let result = normalize_path(&nonexist).unwrap();
        assert!(result.contains("src/lib.rs"));
        assert!(!result.starts_with("./"));

        // Test without ./ prefix
        let nonexist2 = temp_dir.path().join("src/main.rs");
        let result = normalize_path(&nonexist2).unwrap();
        assert!(result.contains("src/main.rs"));

        // Test absolute path returns canonical (if exists)
        let test_file = temp_dir.path().join("test.rs");
        std::fs::write(&test_file, b"fn test() {}").unwrap();
        let result = normalize_path(&test_file).unwrap();
        assert!(result.contains("test.rs"));

        // Test ./ prefix on existing file still canonicalizes
        let relative_to_temp = temp_dir.path().join("./test.rs");
        let result = normalize_path(&relative_to_temp).unwrap();
        assert!(result.contains("test.rs"));
        assert!(!result.starts_with("./"));
    }

    #[test]
    fn test_normalize_path_absolute() {
        let temp_dir = TempDir::new().unwrap();

        // Create a test file
        let test_file = temp_dir.path().join("absolute.rs");
        std::fs::write(&test_file, b"fn abs() {}").unwrap();

        // Absolute path should canonicalize
        let result = normalize_path(&test_file).unwrap();
        assert!(result.contains("absolute.rs"));
        // On most systems, canonicalized paths are absolute
        assert!(!result.starts_with("./"));
    }

    #[test]
    fn test_normalize_path_non_existing() {
        // Non-existing path should strip ./ but not fail
        let result = normalize_path(Path::new("./does/not/exist.rs")).unwrap();
        assert_eq!(result, "does/not/exist.rs");

        // Non-existing without ./ should be unchanged
        let result = normalize_path(Path::new("nonexistent/path.rs")).unwrap();
        assert_eq!(result, "nonexistent/path.rs");
    }

    #[test]
    fn test_normalize_path_redundant_dots() {
        let temp_dir = TempDir::new().unwrap();

        // Create nested structure
        let subdir = temp_dir.path().join("a/b");
        std::fs::create_dir_all(&subdir).unwrap();
        let test_file = subdir.join("test.rs");
        std::fs::write(&test_file, b"fn test() {}").unwrap();

        // Canonicalize should resolve the path completely
        let result = normalize_path(&test_file).unwrap();
        assert!(result.contains("test.rs"));
        // Should not contain .. or . components
        assert!(!result.contains(".."));
    }

    #[test]
    fn test_has_suspicious_traversal_parent_patterns() {
        assert!(has_suspicious_traversal("../../../etc/passwd"));
        assert!(has_suspicious_traversal(
            "..\\\\..\\\\..\\\\windows\\\\system32"
        ));
        assert!(has_suspicious_traversal("../config"));
        assert!(has_suspicious_traversal("..\\config"));
    }

    #[test]
    fn test_has_suspicious_traversal_mixed_patterns() {
        assert!(has_suspicious_traversal("./subdir/../../etc"));
        assert!(has_suspicious_traversal(".\\subdir\\..\\..\\etc"));
    }

    #[test]
    fn test_has_suspicious_traversal_normal_paths() {
        assert!(!has_suspicious_traversal("src/main.rs"));
        assert!(!has_suspicious_traversal("./src/lib.rs"));
        assert!(!has_suspicious_traversal("../parent/src/lib.rs")); // Only 1 parent
        assert!(!has_suspicious_traversal("../../normal")); // Only 2 parents
    }

    #[test]
    fn test_validate_path_within_root_valid() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();

        // Create a file inside root
        let file_path = root.join("test.rs");
        fs::write(&file_path, b"fn test() {}").unwrap();

        let result = validate_path_within_root(&file_path, root);
        assert!(result.is_ok());
        assert!(result.unwrap().starts_with(root));
    }

    #[test]
    fn test_validate_path_within_root_traversal_rejected() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();

        // Try to access file outside root using parent traversal
        let outside = root.join("../../../etc/passwd");

        let result = validate_path_within_root(&outside, root);
        assert!(result.is_err());
        assert!(matches!(
            result.unwrap_err(),
            PathValidationError::SuspiciousTraversal(_)
        ));
    }

    #[test]
    fn test_validate_path_within_root_absolute_outside() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();

        // Try to access absolute path outside root
        let outside = Path::new("/etc/passwd");

        let result = validate_path_within_root(outside, root);
        assert!(result.is_err());

        // Either SuspiciousTraversal or OutsideRoot depending on whether path exists
        match result.unwrap_err() {
            PathValidationError::SuspiciousTraversal(_) => {}
            PathValidationError::OutsideRoot(_, _) => {}
            _ => panic!("Expected traversal or outside error"),
        }
    }

    #[test]
    fn test_is_safe_symlink_inside_root() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();

        // Create a target file
        let target = root.join("target.rs");
        fs::write(&target, b"fn target() {}").unwrap();

        // Create symlink pointing to target
        let symlink = root.join("link.rs");
        #[cfg(unix)]
        std::os::unix::fs::symlink(&target, &symlink).unwrap();

        #[cfg(windows)]
        std::os::windows::fs::symlink_file(&target, &symlink).unwrap();

        // On supported platforms, verify symlink is safe
        #[cfg(any(unix, windows))]
        {
            let result = is_safe_symlink(&symlink, root);
            assert!(result.is_ok());
            assert!(result.unwrap());
        }
    }

    #[test]
    fn test_is_safe_symlink_outside_root() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();

        // Create a target file outside root
        let outside_dir = TempDir::new().unwrap();
        let target = outside_dir.path().join("outside.rs");
        fs::write(&target, b"fn outside() {}").unwrap();

        // Create symlink inside root pointing outside
        let symlink = root.join("link.rs");
        #[cfg(unix)]
        std::os::unix::fs::symlink(&target, &symlink).unwrap();

        #[cfg(windows)]
        std::os::windows::fs::symlink_file(&target, &symlink).unwrap();

        // On supported platforms, verify symlink is detected as unsafe
        #[cfg(any(unix, windows))]
        {
            let result = is_safe_symlink(&symlink, root);
            assert!(result.is_err());
            // Absolute symlinks pointing outside root should produce SymlinkEscape
            match result.unwrap_err() {
                PathValidationError::SymlinkEscape(_, _) => {}
                PathValidationError::CannotCanonicalize(_) => {
                    // Broken symlinks are also unsafe
                }
                other => panic!(
                    "Expected SymlinkEscape or CannotCanonicalize, got: {:?}",
                    other
                ),
            }
        }
    }

    #[test]
    fn test_cross_platform_path_handling() {
        let temp_dir = TempDir::new().unwrap();
        let root = temp_dir.path();

        // Create a file with subdirectory
        let subdir = root.join("src");
        fs::create_dir(&subdir).unwrap();
        let file_path = subdir.join("main.rs");
        fs::write(&file_path, b"fn main() {}").unwrap();

        // Test with forward slash path (Unix-style)
        let path_str = file_path.to_string_lossy().replace('\\', "/");
        let result = validate_path_within_root(Path::new(&path_str), root);
        assert!(result.is_ok());

        // Test with backslash path (Windows-style) - this may not work on Unix
        // but the canonicalization should handle it if the OS supports it
        if cfg!(windows) {
            let path_str_win = file_path.to_string_lossy().replace('/', "\\");
            let result_win = validate_path_within_root(Path::new(&path_str_win), root);
            assert!(result_win.is_ok());
        }
    }
}