Skip to main content

vtcode_commons/
paths.rs

1use anyhow::{Context, Result, anyhow, bail};
2use std::path::{Component, Path, PathBuf};
3use tracing::warn;
4
5/// Normalize a path by resolving `.` and `..` components lexically.
6pub fn normalize_path(path: &Path) -> PathBuf {
7    let mut normalized = PathBuf::new();
8    for component in path.components() {
9        match component {
10            Component::ParentDir => {
11                normalized.pop();
12            }
13            Component::CurDir => {}
14            Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
15            Component::RootDir => normalized.push(component.as_os_str()),
16            Component::Normal(part) => normalized.push(part),
17        }
18    }
19    normalized
20}
21
22/// Canonicalize a path with fallback to the original path if canonicalization fails.
23pub fn canonicalize_workspace(workspace_root: &Path) -> PathBuf {
24    std::fs::canonicalize(workspace_root).unwrap_or_else(|error| {
25        warn!(
26            path = %workspace_root.display(),
27            %error,
28            "Failed to canonicalize workspace root; falling back to provided path"
29        );
30        workspace_root.to_path_buf()
31    })
32}
33
34/// Resolve a path relative to a workspace root and ensure it stays within it.
35pub fn resolve_workspace_path(workspace_root: &Path, user_path: &Path) -> Result<PathBuf> {
36    let candidate = if user_path.is_absolute() {
37        user_path.to_path_buf()
38    } else {
39        workspace_root.join(user_path)
40    };
41
42    let canonical = std::fs::canonicalize(&candidate)
43        .with_context(|| format!("Failed to canonicalize path {}", candidate.display()))?;
44
45    let workspace_canonical = std::fs::canonicalize(workspace_root).with_context(|| {
46        format!(
47            "Failed to canonicalize workspace root {}",
48            workspace_root.display()
49        )
50    })?;
51
52    if !canonical.starts_with(&workspace_canonical) {
53        return Err(anyhow!(
54            "Path {} escapes workspace root {}",
55            canonical.display(),
56            workspace_canonical.display()
57        ));
58    }
59
60    Ok(canonical)
61}
62
63/// Return a canonicalised absolute path that is guaranteed to reside inside the
64/// provided `workspace_root`.  If the path is outside the workspace an error is
65/// returned.
66pub fn secure_path(workspace_root: &Path, user_path: &Path) -> Result<PathBuf> {
67    // Resolve relative paths against the workspace root.
68    resolve_workspace_path(workspace_root, user_path)
69}
70
71/// Ensure a candidate path is inside the workspace root after lexical
72/// normalization.
73///
74/// Returns the normalized candidate path on success.
75pub fn ensure_path_within_workspace(candidate: &Path, workspace_root: &Path) -> Result<PathBuf> {
76    let normalized_candidate = normalize_path(candidate);
77    let normalized_workspace = normalize_path(workspace_root);
78
79    if !normalized_candidate.starts_with(&normalized_workspace) {
80        bail!(
81            "Path '{}' escapes workspace '{}'",
82            candidate.display(),
83            workspace_root.display()
84        );
85    }
86
87    Ok(normalized_candidate)
88}
89
90/// Normalize identifiers to ASCII alphanumerics with lowercase output.
91pub fn normalize_ascii_identifier(value: &str) -> String {
92    let mut normalized = String::new();
93    for ch in value.chars() {
94        if ch.is_ascii_alphanumeric() {
95            normalized.push(ch.to_ascii_lowercase());
96        }
97    }
98    normalized
99}
100
101/// Check if a path string is a safe relative path (no traversal, no absolute).
102pub fn is_safe_relative_path(path: &str) -> bool {
103    let path = path.trim();
104    if path.is_empty() {
105        return false;
106    }
107
108    // Check for path traversal attempts
109    if path.contains("..") {
110        return false;
111    }
112
113    // Block absolute paths for security
114    if path.starts_with('/') || path.contains(':') {
115        return false;
116    }
117
118    true
119}
120
121/// Validates that a path is safe to use.
122/// Preventing traversal, absolute system paths, and dangerous characters.
123///
124/// Optimization: Uses early returns and byte-level checks for common patterns
125pub fn validate_path_safety(path: &str) -> Result<()> {
126    // Optimization: Fast path for empty or very short paths
127    if path.is_empty() {
128        return Ok(());
129    }
130
131    // Reject path traversal attempts
132    // Optimization: Use contains on bytes for simple patterns
133    if path.contains("..") {
134        bail!("Path traversal attempt detected ('..')");
135    }
136
137    // Additional traversal patterns
138    if path.contains("~/../") || path.contains("/.../") {
139        bail!("Advanced path traversal detected");
140    }
141
142    // Optimization: Only check Unix critical paths if path starts with '/'
143    if path.starts_with('/') {
144        // Reject absolute paths outside workspace
145        // Note: We can't strictly block all absolute paths as the agent might need to access
146        // explicitly allowed directories, but we can block obvious system critical paths.
147        static UNIX_CRITICAL: &[&str] = &[
148            "/etc", "/usr", "/bin", "/sbin", "/var", "/boot", "/root", "/dev",
149        ];
150        for prefix in UNIX_CRITICAL {
151            let is_var_temp_exception = *prefix == "/var"
152                && (path.starts_with("/var/folders/")
153                    || path == "/var/folders"
154                    || path.starts_with("/var/tmp/")
155                    || path == "/var/tmp");
156
157            if !is_var_temp_exception && matches_critical_prefix(path, prefix) {
158                bail!("Access to system directory denied: {}", prefix);
159            }
160        }
161    }
162
163    // Windows critical paths
164    #[cfg(windows)]
165    {
166        let path_lower = path.to_lowercase();
167        static WIN_CRITICAL: &[&str] = &["c:\\windows", "c:\\program files", "c:\\system32"];
168        for prefix in WIN_CRITICAL {
169            if path_lower.starts_with(prefix) {
170                bail!("Access to Windows system directory denied");
171            }
172        }
173    }
174
175    // Reject dangerous shell characters in paths (including null byte)
176    // Optimization: Check bytes directly for faster character detection
177    static DANGEROUS_CHARS: &[u8] = b"$`|;&\n\r><\0";
178    for &c in path.as_bytes() {
179        if DANGEROUS_CHARS.contains(&c) {
180            bail!("Path contains dangerous shell characters");
181        }
182    }
183
184    Ok(())
185}
186
187fn matches_critical_prefix(path: &str, prefix: &str) -> bool {
188    path == prefix
189        || path
190            .strip_prefix(prefix)
191            .is_some_and(|rest| rest.starts_with('/'))
192}
193
194/// Extract the filename from a path, with fallback to the full path.
195pub fn file_name_from_path(path: &str) -> String {
196    Path::new(path)
197        .file_name()
198        .and_then(|name| name.to_str())
199        .map(|s| s.to_string())
200        .unwrap_or_else(|| path.to_string())
201}
202
203/// Canonicalize a path, walking up to find the nearest existing ancestor for new files.
204///
205/// This function handles paths to files that may not yet exist by finding the
206/// nearest existing parent directory, canonicalizing that, and then appending
207/// the remaining path components.
208///
209/// # Safety
210/// This function is critical for security. It prevents symlink escapes by:
211/// 1. Finding the nearest existing ancestor directory
212/// 2. Canonicalizing that directory (resolves symlinks)
213/// 3. Appending the remaining path components
214///
215/// # Arguments
216/// * `normalized` - A normalized path (output from `normalize_path`)
217///
218/// # Returns
219/// The canonical path, or the normalized path if no parent exists
220pub async fn canonicalize_allow_missing(normalized: &Path) -> Result<PathBuf> {
221    // If the path exists, canonicalize it directly
222    if tokio::fs::try_exists(normalized).await.unwrap_or(false) {
223        return tokio::fs::canonicalize(normalized).await.map_err(|e| {
224            anyhow!(
225                "Failed to resolve canonical path for '{}': {}",
226                normalized.display(),
227                e
228            )
229        });
230    }
231
232    // Walk up the directory tree to find the nearest existing ancestor
233    let mut current = normalized.to_path_buf();
234    while let Some(parent) = current.parent() {
235        if tokio::fs::try_exists(parent).await.unwrap_or(false) {
236            // Canonicalize the existing parent
237            let canonical_parent = tokio::fs::canonicalize(parent).await.map_err(|e| {
238                anyhow!(
239                    "Failed to resolve canonical path for '{}': {}",
240                    parent.display(),
241                    e
242                )
243            })?;
244
245            // Get the remaining path components
246            let remainder = normalized
247                .strip_prefix(parent)
248                .unwrap_or_else(|_| Path::new(""));
249
250            // Return the canonical parent + remaining components
251            return if remainder.as_os_str().is_empty() {
252                Ok(canonical_parent)
253            } else {
254                Ok(canonical_parent.join(remainder))
255            };
256        }
257        current = parent.to_path_buf();
258    }
259
260    // No existing parent found, return normalized path as-is
261    Ok(normalized.to_path_buf())
262}
263
264/// Provides the root directories an application uses to store data.
265pub trait WorkspacePaths: Send + Sync {
266    /// Absolute path to the application's workspace root.
267    fn workspace_root(&self) -> &Path;
268
269    /// Returns the directory where configuration files should be stored.
270    fn config_dir(&self) -> PathBuf;
271
272    /// Returns an optional cache directory for transient data.
273    fn cache_dir(&self) -> Option<PathBuf> {
274        None
275    }
276
277    /// Returns an optional directory for telemetry or log artifacts.
278    fn telemetry_dir(&self) -> Option<PathBuf> {
279        None
280    }
281
282    /// Determine the [`PathScope`] for a given path based on workspace directories.
283    ///
284    /// Returns the most specific scope matching the path:
285    /// - `Workspace` if under `workspace_root()`
286    /// - `Config` if under `config_dir()`
287    /// - `Cache` if under `cache_dir()`
288    /// - `Telemetry` if under `telemetry_dir()`
289    /// - Falls back to `Cache` if no match
290    fn scope_for_path(&self, path: &Path) -> PathScope {
291        if path.starts_with(self.workspace_root()) {
292            return PathScope::Workspace;
293        }
294
295        let config_dir = self.config_dir();
296        if path.starts_with(&config_dir) {
297            return PathScope::Config;
298        }
299
300        if let Some(cache_dir) = self.cache_dir()
301            && path.starts_with(&cache_dir)
302        {
303            return PathScope::Cache;
304        }
305
306        if let Some(telemetry_dir) = self.telemetry_dir()
307            && path.starts_with(&telemetry_dir)
308        {
309            return PathScope::Telemetry;
310        }
311
312        PathScope::Cache
313    }
314}
315
316/// Helper trait that adds path resolution helpers on top of [`WorkspacePaths`].
317pub trait PathResolver: WorkspacePaths {
318    /// Resolve a path relative to the workspace root.
319    fn resolve<P>(&self, relative: P) -> PathBuf
320    where
321        P: AsRef<Path>,
322    {
323        self.workspace_root().join(relative)
324    }
325
326    /// Resolve a path within the configuration directory.
327    fn resolve_config<P>(&self, relative: P) -> PathBuf
328    where
329        P: AsRef<Path>,
330    {
331        self.config_dir().join(relative)
332    }
333}
334
335impl<T> PathResolver for T where T: WorkspacePaths + ?Sized {}
336
337/// Enumeration describing the conceptual scope of a file path.
338#[derive(Debug, Clone, Copy, PartialEq, Eq)]
339pub enum PathScope {
340    Workspace,
341    Config,
342    Cache,
343    Telemetry,
344}
345
346impl PathScope {
347    /// Returns a human-readable description used in error messages.
348    pub fn description(self) -> &'static str {
349        match self {
350            Self::Workspace => "workspace",
351            Self::Config => "configuration",
352            Self::Cache => "cache",
353            Self::Telemetry => "telemetry",
354        }
355    }
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361    use std::path::{Path, PathBuf};
362
363    struct StaticPaths {
364        root: PathBuf,
365        config: PathBuf,
366    }
367
368    impl WorkspacePaths for StaticPaths {
369        fn workspace_root(&self) -> &Path {
370            &self.root
371        }
372
373        fn config_dir(&self) -> PathBuf {
374            self.config.clone()
375        }
376
377        fn cache_dir(&self) -> Option<PathBuf> {
378            Some(self.root.join("cache"))
379        }
380    }
381
382    #[test]
383    fn resolves_relative_paths() {
384        let paths = StaticPaths {
385            root: PathBuf::from("/tmp/project"),
386            config: PathBuf::from("/tmp/project/config"),
387        };
388
389        assert_eq!(
390            PathResolver::resolve(&paths, "subdir/file.txt"),
391            PathBuf::from("/tmp/project/subdir/file.txt")
392        );
393        assert_eq!(
394            PathResolver::resolve_config(&paths, "settings.toml"),
395            PathBuf::from("/tmp/project/config/settings.toml")
396        );
397        assert_eq!(paths.cache_dir(), Some(PathBuf::from("/tmp/project/cache")));
398    }
399
400    #[test]
401    fn ensures_path_within_workspace_accepts_nested_path() {
402        let workspace = Path::new("/tmp/project");
403        let candidate = Path::new("/tmp/project/src/../src/lib.rs");
404        let normalized = ensure_path_within_workspace(candidate, workspace).unwrap();
405        assert_eq!(normalized, PathBuf::from("/tmp/project/src/lib.rs"));
406    }
407
408    #[test]
409    fn ensures_path_within_workspace_rejects_escape() {
410        let workspace = Path::new("/tmp/project");
411        let candidate = Path::new("/tmp/project/../../etc/passwd");
412        assert!(ensure_path_within_workspace(candidate, workspace).is_err());
413    }
414
415    #[tokio::test]
416    async fn test_canonicalize_existing_file() {
417        // Create a temporary directory and file
418        let temp_dir = std::env::temp_dir();
419        let test_file = temp_dir.join("vtcode_test_existing.txt");
420        tokio::fs::write(&test_file, b"test").await.unwrap();
421
422        let canonical = canonicalize_allow_missing(&test_file).await.unwrap();
423
424        // Should get the canonical path
425        assert!(canonical.is_absolute());
426        assert!(canonical.exists());
427
428        // Cleanup
429        tokio::fs::remove_file(&test_file).await.ok();
430    }
431
432    #[tokio::test]
433    async fn test_canonicalize_missing_file() {
434        // Use a path that doesn't exist but has an existing parent
435        let temp_dir = std::env::temp_dir();
436        let missing_file = temp_dir.join("vtcode_test_missing_dir/missing_file.txt");
437
438        let canonical = canonicalize_allow_missing(&missing_file).await.unwrap();
439
440        // Should get canonical parent + missing components
441        assert!(canonical.is_absolute());
442        assert!(canonical.to_string_lossy().contains("missing_file.txt"));
443    }
444
445    #[tokio::test]
446    async fn test_canonicalize_deeply_missing_path() {
447        // Use a path with multiple missing parent directories
448        let temp_dir = std::env::temp_dir();
449        let deep_missing = temp_dir.join("vtcode_test_a/b/c/d/file.txt");
450
451        let canonical = canonicalize_allow_missing(&deep_missing).await.unwrap();
452
453        // Should get canonical temp_dir + missing components
454        assert!(canonical.is_absolute());
455        assert!(canonical.to_string_lossy().contains("vtcode_test_a"));
456    }
457
458    #[tokio::test]
459    async fn test_canonicalize_missing_file_with_existing_parent() {
460        // Create a parent directory
461        let temp_dir = std::env::temp_dir();
462        let test_dir = temp_dir.join("vtcode_test_parent");
463        tokio::fs::create_dir_all(&test_dir).await.unwrap();
464
465        let missing_file = test_dir.join("missing.txt");
466        let canonical = canonicalize_allow_missing(&missing_file).await.unwrap();
467
468        // Should get canonical parent + missing filename
469        assert!(canonical.is_absolute());
470        assert!(canonical.to_string_lossy().ends_with("missing.txt"));
471
472        // Cleanup
473        tokio::fs::remove_dir(&test_dir).await.ok();
474    }
475}