Skip to main content

vtcode_commons/
paths.rs

1use anyhow::{Context, Result, anyhow, bail};
2use std::path::{Component, Path, PathBuf};
3use tracing::warn;
4
5/// Normalize a path by resolving `.` and `..` components lexically.
6pub fn normalize_path(path: &Path) -> PathBuf {
7    let mut normalized = PathBuf::new();
8    for component in path.components() {
9        match component {
10            Component::ParentDir => {
11                normalized.pop();
12            }
13            Component::CurDir => {}
14            Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
15            Component::RootDir => normalized.push(component.as_os_str()),
16            Component::Normal(part) => normalized.push(part),
17        }
18    }
19    normalized
20}
21
22/// Expand a leading `~` or `~/` to the user's home directory. The function is
23/// intentionally forgiving: paths that don't start with `~` are returned as-is,
24/// and when the home directory cannot be determined the original path is
25/// preserved so callers can surface a downstream error rather than panicking.
26///
27/// This is the canonical implementation used by the tool registry and the
28/// sandbox runtime; both call sites previously carried near-identical copies.
29pub fn expand_tilde(path: &str) -> PathBuf {
30    if path == "~" {
31        return dirs::home_dir().unwrap_or_else(|| PathBuf::from(path));
32    }
33    if let Some(rest) = path.strip_prefix("~/")
34        && let Some(home) = dirs::home_dir()
35    {
36        return home.join(rest);
37    }
38    PathBuf::from(path)
39}
40
41/// Canonicalize a path with fallback to the original path if canonicalization fails.
42pub fn canonicalize_workspace(workspace_root: &Path) -> PathBuf {
43    std::fs::canonicalize(workspace_root).unwrap_or_else(|error| {
44        warn!(
45            path = %workspace_root.display(),
46            %error,
47            "Failed to canonicalize workspace root; falling back to provided path"
48        );
49        workspace_root.to_path_buf()
50    })
51}
52
53/// Resolve a path relative to a workspace root and ensure it stays within it.
54pub fn resolve_workspace_path(workspace_root: &Path, user_path: &Path) -> Result<PathBuf> {
55    let candidate = if user_path.is_absolute() {
56        user_path.to_path_buf()
57    } else {
58        workspace_root.join(user_path)
59    };
60
61    let canonical = std::fs::canonicalize(&candidate)
62        .with_context(|| format!("Failed to canonicalize path {}", candidate.display()))?;
63
64    let workspace_canonical = std::fs::canonicalize(workspace_root).with_context(|| {
65        format!(
66            "Failed to canonicalize workspace root {}",
67            workspace_root.display()
68        )
69    })?;
70
71    if !canonical.starts_with(&workspace_canonical) {
72        return Err(anyhow!(
73            "Path {} escapes workspace root {}",
74            canonical.display(),
75            workspace_canonical.display()
76        ));
77    }
78
79    Ok(canonical)
80}
81
82/// Return a canonicalised absolute path that is guaranteed to reside inside the
83/// provided `workspace_root`.  If the path is outside the workspace an error is
84/// returned.
85pub fn secure_path(workspace_root: &Path, user_path: &Path) -> Result<PathBuf> {
86    // Resolve relative paths against the workspace root.
87    resolve_workspace_path(workspace_root, user_path)
88}
89
90/// Ensure a candidate path is inside the workspace root after lexical
91/// normalization.
92///
93/// Returns the normalized candidate path on success.
94pub fn ensure_path_within_workspace(candidate: &Path, workspace_root: &Path) -> Result<PathBuf> {
95    let normalized_candidate = normalize_path(candidate);
96    let normalized_workspace = normalize_path(workspace_root);
97
98    if !normalized_candidate.starts_with(&normalized_workspace) {
99        bail!(
100            "Path '{}' escapes workspace '{}'",
101            candidate.display(),
102            workspace_root.display()
103        );
104    }
105
106    Ok(normalized_candidate)
107}
108
109/// Normalize identifiers to ASCII alphanumerics with lowercase output.
110pub fn normalize_ascii_identifier(value: &str) -> String {
111    let mut normalized = String::new();
112    for ch in value.chars() {
113        if ch.is_ascii_alphanumeric() {
114            normalized.push(ch.to_ascii_lowercase());
115        }
116    }
117    normalized
118}
119
120/// Check if a path string is a safe relative path (no traversal, no absolute).
121pub fn is_safe_relative_path(path: &str) -> bool {
122    let path = path.trim();
123    if path.is_empty() {
124        return false;
125    }
126
127    // Check for path traversal attempts
128    if path.contains("..") {
129        return false;
130    }
131
132    // Block absolute paths for security
133    if path.starts_with('/') || path.contains(':') {
134        return false;
135    }
136
137    true
138}
139
140/// Validates that a path is safe to use.
141/// Preventing traversal, absolute system paths, and dangerous characters.
142///
143/// Optimization: Uses early returns and byte-level checks for common patterns
144pub fn validate_path_safety(path: &str) -> Result<()> {
145    // Optimization: Fast path for empty or very short paths
146    if path.is_empty() {
147        return Ok(());
148    }
149
150    // Reject path traversal attempts
151    // Optimization: Use contains on bytes for simple patterns
152    if path.contains("..") {
153        bail!("Path traversal attempt detected ('..')");
154    }
155
156    // Additional traversal patterns
157    if path.contains("~/../") || path.contains("/.../") {
158        bail!("Advanced path traversal detected");
159    }
160
161    // Optimization: Only check Unix critical paths if path starts with '/'
162    if path.starts_with('/') {
163        // Reject absolute paths outside workspace
164        // Note: We can't strictly block all absolute paths as the agent might need to access
165        // explicitly allowed directories, but we can block obvious system critical paths.
166        static UNIX_CRITICAL: &[&str] = &[
167            "/etc", "/usr", "/bin", "/sbin", "/var", "/boot", "/root", "/dev",
168        ];
169        for prefix in UNIX_CRITICAL {
170            let is_var_temp_exception = *prefix == "/var"
171                && (path.starts_with("/var/folders/")
172                    || path == "/var/folders"
173                    || path.starts_with("/var/tmp/")
174                    || path == "/var/tmp");
175
176            if !is_var_temp_exception && matches_critical_prefix(path, prefix) {
177                bail!("Access to system directory denied: {}", prefix);
178            }
179        }
180    }
181
182    // Windows critical paths
183    #[cfg(windows)]
184    {
185        let path_lower = path.to_lowercase();
186        static WIN_CRITICAL: &[&str] = &["c:\\windows", "c:\\program files", "c:\\system32"];
187        for prefix in WIN_CRITICAL {
188            if path_lower.starts_with(prefix) {
189                bail!("Access to Windows system directory denied");
190            }
191        }
192    }
193
194    // Reject dangerous shell characters in paths (including null byte)
195    // Optimization: Check bytes directly for faster character detection
196    static DANGEROUS_CHARS: &[u8] = b"$`|;&\n\r><\0";
197    for &c in path.as_bytes() {
198        if DANGEROUS_CHARS.contains(&c) {
199            bail!("Path contains dangerous shell characters");
200        }
201    }
202
203    Ok(())
204}
205
206fn matches_critical_prefix(path: &str, prefix: &str) -> bool {
207    path == prefix
208        || path
209            .strip_prefix(prefix)
210            .is_some_and(|rest| rest.starts_with('/'))
211}
212
213/// Extract the filename from a path, with fallback to the full path.
214pub fn file_name_from_path(path: &str) -> String {
215    Path::new(path)
216        .file_name()
217        .and_then(|name| name.to_str())
218        .map(|s| s.to_string())
219        .unwrap_or_else(|| path.to_string())
220}
221
222/// Canonicalize a path, walking up to find the nearest existing ancestor for new files.
223///
224/// This function handles paths to files that may not yet exist by finding the
225/// nearest existing parent directory, canonicalizing that, and then appending
226/// the remaining path components.
227///
228/// # Security
229/// This function is critical for security. It prevents symlink escapes by:
230/// 1. Finding the nearest existing ancestor directory
231/// 2. Canonicalizing that directory (resolves symlinks)
232/// 3. Appending the remaining path components
233///
234/// # Arguments
235/// * `normalized` - A normalized path (output from `normalize_path`)
236///
237/// # Returns
238/// The canonical path, or the normalized path if no parent exists
239pub async fn canonicalize_allow_missing(normalized: &Path) -> Result<PathBuf> {
240    // If the path exists, canonicalize it directly
241    if tokio::fs::try_exists(normalized).await.unwrap_or(false) {
242        return tokio::fs::canonicalize(normalized).await.map_err(|e| {
243            anyhow!(
244                "Failed to resolve canonical path for '{}': {}",
245                normalized.display(),
246                e
247            )
248        });
249    }
250
251    // Walk up the directory tree to find the nearest existing ancestor
252    let mut current = normalized.to_path_buf();
253    while let Some(parent) = current.parent() {
254        if tokio::fs::try_exists(parent).await.unwrap_or(false) {
255            // Canonicalize the existing parent
256            let canonical_parent = tokio::fs::canonicalize(parent).await.map_err(|e| {
257                anyhow!(
258                    "Failed to resolve canonical path for '{}': {}",
259                    parent.display(),
260                    e
261                )
262            })?;
263
264            // Get the remaining path components
265            let remainder = normalized
266                .strip_prefix(parent)
267                .unwrap_or_else(|_| Path::new(""));
268
269            // Return the canonical parent + remaining components
270            return if remainder.as_os_str().is_empty() {
271                Ok(canonical_parent)
272            } else {
273                Ok(canonical_parent.join(remainder))
274            };
275        }
276        current = parent.to_path_buf();
277    }
278
279    // No existing parent found, return normalized path as-is
280    Ok(normalized.to_path_buf())
281}
282
283/// Provides the root directories an application uses to store data.
284pub trait WorkspacePaths: Send + Sync {
285    /// Absolute path to the application's workspace root.
286    fn workspace_root(&self) -> &Path;
287
288    /// Returns the directory where configuration files should be stored.
289    fn config_dir(&self) -> PathBuf;
290
291    /// Returns an optional cache directory for transient data.
292    fn cache_dir(&self) -> Option<PathBuf> {
293        None
294    }
295
296    /// Returns an optional directory for telemetry or log artifacts.
297    fn telemetry_dir(&self) -> Option<PathBuf> {
298        None
299    }
300
301    /// Determine the [`PathScope`] for a given path based on workspace directories.
302    ///
303    /// Returns the most specific scope matching the path:
304    /// - `Workspace` if under `workspace_root()`
305    /// - `Config` if under `config_dir()`
306    /// - `Cache` if under `cache_dir()`
307    /// - `Telemetry` if under `telemetry_dir()`
308    /// - Falls back to `Cache` if no match
309    fn scope_for_path(&self, path: &Path) -> PathScope {
310        if path.starts_with(self.workspace_root()) {
311            return PathScope::Workspace;
312        }
313
314        let config_dir = self.config_dir();
315        if path.starts_with(&config_dir) {
316            return PathScope::Config;
317        }
318
319        if let Some(cache_dir) = self.cache_dir()
320            && path.starts_with(&cache_dir)
321        {
322            return PathScope::Cache;
323        }
324
325        if let Some(telemetry_dir) = self.telemetry_dir()
326            && path.starts_with(&telemetry_dir)
327        {
328            return PathScope::Telemetry;
329        }
330
331        PathScope::Cache
332    }
333}
334
335/// Helper trait that adds path resolution helpers on top of [`WorkspacePaths`].
336pub trait PathResolver: WorkspacePaths {
337    /// Resolve a path relative to the workspace root.
338    fn resolve<P>(&self, relative: P) -> PathBuf
339    where
340        P: AsRef<Path>,
341    {
342        self.workspace_root().join(relative)
343    }
344
345    /// Resolve a path within the configuration directory.
346    fn resolve_config<P>(&self, relative: P) -> PathBuf
347    where
348        P: AsRef<Path>,
349    {
350        self.config_dir().join(relative)
351    }
352}
353
354impl<T> PathResolver for T where T: WorkspacePaths + ?Sized {}
355
356/// Enumeration describing the conceptual scope of a file path.
357#[derive(Debug, Clone, Copy, PartialEq, Eq)]
358pub enum PathScope {
359    Workspace,
360    Config,
361    Cache,
362    Telemetry,
363}
364
365impl PathScope {
366    /// Returns a human-readable description used in error messages.
367    pub fn description(self) -> &'static str {
368        match self {
369            Self::Workspace => "workspace",
370            Self::Config => "configuration",
371            Self::Cache => "cache",
372            Self::Telemetry => "telemetry",
373        }
374    }
375}
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380    use std::path::{Path, PathBuf};
381
382    struct StaticPaths {
383        root: PathBuf,
384        config: PathBuf,
385    }
386
387    impl WorkspacePaths for StaticPaths {
388        fn workspace_root(&self) -> &Path {
389            &self.root
390        }
391
392        fn config_dir(&self) -> PathBuf {
393            self.config.clone()
394        }
395
396        fn cache_dir(&self) -> Option<PathBuf> {
397            Some(self.root.join("cache"))
398        }
399    }
400
401    #[test]
402    fn resolves_relative_paths() {
403        let paths = StaticPaths {
404            root: PathBuf::from("/tmp/project"),
405            config: PathBuf::from("/tmp/project/config"),
406        };
407
408        assert_eq!(
409            PathResolver::resolve(&paths, "subdir/file.txt"),
410            PathBuf::from("/tmp/project/subdir/file.txt")
411        );
412        assert_eq!(
413            PathResolver::resolve_config(&paths, "settings.toml"),
414            PathBuf::from("/tmp/project/config/settings.toml")
415        );
416        assert_eq!(paths.cache_dir(), Some(PathBuf::from("/tmp/project/cache")));
417    }
418
419    #[test]
420    fn ensures_path_within_workspace_accepts_nested_path() {
421        let workspace = Path::new("/tmp/project");
422        let candidate = Path::new("/tmp/project/src/../src/lib.rs");
423        let normalized = ensure_path_within_workspace(candidate, workspace).unwrap();
424        assert_eq!(normalized, PathBuf::from("/tmp/project/src/lib.rs"));
425    }
426
427    #[test]
428    fn ensures_path_within_workspace_rejects_escape() {
429        let workspace = Path::new("/tmp/project");
430        let candidate = Path::new("/tmp/project/../../etc/passwd");
431        assert!(ensure_path_within_workspace(candidate, workspace).is_err());
432    }
433
434    #[tokio::test]
435    async fn test_canonicalize_existing_file() {
436        // Create a temporary directory and file
437        let temp_dir = std::env::temp_dir();
438        let test_file = temp_dir.join("vtcode_test_existing.txt");
439        tokio::fs::write(&test_file, b"test").await.unwrap();
440
441        let canonical = canonicalize_allow_missing(&test_file).await.unwrap();
442
443        // Should get the canonical path
444        assert!(canonical.is_absolute());
445        assert!(canonical.exists());
446
447        // Cleanup
448        tokio::fs::remove_file(&test_file).await.ok();
449    }
450
451    #[tokio::test]
452    async fn test_canonicalize_missing_file() {
453        // Use a path that doesn't exist but has an existing parent
454        let temp_dir = std::env::temp_dir();
455        let missing_file = temp_dir.join("vtcode_test_missing_dir/missing_file.txt");
456
457        let canonical = canonicalize_allow_missing(&missing_file).await.unwrap();
458
459        // Should get canonical parent + missing components
460        assert!(canonical.is_absolute());
461        assert!(canonical.to_string_lossy().contains("missing_file.txt"));
462    }
463
464    #[tokio::test]
465    async fn test_canonicalize_deeply_missing_path() {
466        // Use a path with multiple missing parent directories
467        let temp_dir = std::env::temp_dir();
468        let deep_missing = temp_dir.join("vtcode_test_a/b/c/d/file.txt");
469
470        let canonical = canonicalize_allow_missing(&deep_missing).await.unwrap();
471
472        // Should get canonical temp_dir + missing components
473        assert!(canonical.is_absolute());
474        assert!(canonical.to_string_lossy().contains("vtcode_test_a"));
475    }
476
477    #[tokio::test]
478    async fn test_canonicalize_missing_file_with_existing_parent() {
479        // Create a parent directory
480        let temp_dir = std::env::temp_dir();
481        let test_dir = temp_dir.join("vtcode_test_parent");
482        tokio::fs::create_dir_all(&test_dir).await.unwrap();
483
484        let missing_file = test_dir.join("missing.txt");
485        let canonical = canonicalize_allow_missing(&missing_file).await.unwrap();
486
487        // Should get canonical parent + missing filename
488        assert!(canonical.is_absolute());
489        assert!(canonical.to_string_lossy().ends_with("missing.txt"));
490
491        // Cleanup
492        tokio::fs::remove_dir(&test_dir).await.ok();
493    }
494
495    #[test]
496    fn expand_tilde_passes_through_absolute_paths() {
497        let absolute = "/etc/hosts";
498        assert_eq!(expand_tilde(absolute), PathBuf::from(absolute));
499    }
500
501    #[test]
502    fn expand_tilde_passes_through_relative_paths() {
503        let relative = "src/main.rs";
504        assert_eq!(expand_tilde(relative), PathBuf::from(relative));
505    }
506
507    #[test]
508    fn expand_tilde_resolves_bare_tilde_to_home() {
509        if let Some(home) = dirs::home_dir() {
510            assert_eq!(expand_tilde("~"), home);
511        }
512    }
513
514    #[test]
515    fn expand_tilde_resolves_tilde_slash_prefix() {
516        if let Some(home) = dirs::home_dir() {
517            let resolved = expand_tilde("~/projects/vtcode");
518            assert_eq!(resolved, home.join("projects/vtcode"));
519        }
520    }
521}