Skip to main content

vtcode_commons/
paths.rs

1use anyhow::{Context, Result, anyhow, bail};
2use std::path::{Component, Path, PathBuf};
3use tracing::warn;
4
5/// Normalize a path by resolving `.` and `..` components lexically.
6pub fn normalize_path(path: &Path) -> PathBuf {
7    let mut normalized = PathBuf::new();
8    for component in path.components() {
9        match component {
10            Component::ParentDir => {
11                normalized.pop();
12            }
13            Component::CurDir => {}
14            Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
15            Component::RootDir => normalized.push(component.as_os_str()),
16            Component::Normal(part) => normalized.push(part),
17        }
18    }
19    normalized
20}
21
22/// Expand a leading `~` or `~/` to the user's home directory. The function is
23/// intentionally forgiving: paths that don't start with `~` are returned as-is,
24/// and when the home directory cannot be determined the original path is
25/// preserved so callers can surface a downstream error rather than panicking.
26///
27/// This is the canonical implementation used by the tool registry and the
28/// sandbox runtime; both call sites previously carried near-identical copies.
29pub fn expand_tilde(path: &str) -> PathBuf {
30    if path == "~" {
31        return dirs::home_dir().unwrap_or_else(|| PathBuf::from(path));
32    }
33    if let Some(rest) = path.strip_prefix("~/")
34        && let Some(home) = dirs::home_dir()
35    {
36        return home.join(rest);
37    }
38    PathBuf::from(path)
39}
40
41/// Canonicalize a path with fallback to the original path if canonicalization fails.
42pub fn canonicalize_workspace(workspace_root: &Path) -> PathBuf {
43    std::fs::canonicalize(workspace_root).unwrap_or_else(|error| {
44        warn!(
45            path = %workspace_root.display(),
46            %error,
47            "Failed to canonicalize workspace root; falling back to provided path"
48        );
49        workspace_root.to_path_buf()
50    })
51}
52
53/// Resolve a path relative to a workspace root and ensure it stays within it.
54pub fn resolve_workspace_path(workspace_root: &Path, user_path: &Path) -> Result<PathBuf> {
55    let candidate = if user_path.is_absolute() {
56        user_path.to_path_buf()
57    } else {
58        workspace_root.join(user_path)
59    };
60
61    let canonical = std::fs::canonicalize(&candidate)
62        .with_context(|| format!("Failed to canonicalize path {}", candidate.display()))?;
63
64    let workspace_canonical = std::fs::canonicalize(workspace_root).with_context(|| {
65        format!(
66            "Failed to canonicalize workspace root {}",
67            workspace_root.display()
68        )
69    })?;
70
71    if !canonical.starts_with(&workspace_canonical) {
72        return Err(anyhow!(
73            "Path {} escapes workspace root {}",
74            canonical.display(),
75            workspace_canonical.display()
76        ));
77    }
78
79    Ok(canonical)
80}
81
82/// Return a canonicalised absolute path that is guaranteed to reside inside the
83/// provided `workspace_root`.  If the path is outside the workspace an error is
84/// returned.
85pub fn secure_path(workspace_root: &Path, user_path: &Path) -> Result<PathBuf> {
86    // Resolve relative paths against the workspace root.
87    resolve_workspace_path(workspace_root, user_path)
88}
89
90/// Ensure a candidate path is inside the workspace root after lexical
91/// normalization.
92///
93/// Returns the normalized candidate path on success.
94pub fn ensure_path_within_workspace(candidate: &Path, workspace_root: &Path) -> Result<PathBuf> {
95    let normalized_candidate = normalize_path(candidate);
96    let normalized_workspace = normalize_path(workspace_root);
97
98    if !normalized_candidate.starts_with(&normalized_workspace) {
99        bail!(
100            "Path '{}' escapes workspace '{}'",
101            candidate.display(),
102            workspace_root.display()
103        );
104    }
105
106    Ok(normalized_candidate)
107}
108
109/// Normalize identifiers to ASCII alphanumerics with lowercase output.
110pub fn normalize_ascii_identifier(value: &str) -> String {
111    let mut normalized = String::new();
112    for ch in value.chars() {
113        if ch.is_ascii_alphanumeric() {
114            normalized.push(ch.to_ascii_lowercase());
115        }
116    }
117    normalized
118}
119
120/// Check if a path string is a safe relative path (no traversal, no absolute).
121pub fn is_safe_relative_path(path: &str) -> bool {
122    let path = path.trim();
123    if path.is_empty() {
124        return false;
125    }
126
127    // Check for path traversal attempts
128    if path.contains("..") {
129        return false;
130    }
131
132    // Block absolute paths for security
133    if path.starts_with('/') || path.contains(':') {
134        return false;
135    }
136
137    true
138}
139
140/// Validates that a path is safe to use.
141/// Preventing traversal, absolute system paths, and dangerous characters.
142///
143/// Optimization: Uses early returns and byte-level checks for common patterns
144pub fn validate_path_safety(path: &str) -> Result<()> {
145    // Optimization: Fast path for empty or very short paths
146    if path.is_empty() {
147        return Ok(());
148    }
149
150    // Reject path traversal attempts
151    // Optimization: Use contains on bytes for simple patterns
152    if path.contains("..") {
153        bail!("Path traversal attempt detected ('..')");
154    }
155
156    // Additional traversal patterns
157    if path.contains("~/../") || path.contains("/.../") {
158        bail!("Advanced path traversal detected");
159    }
160
161    // Optimization: Only check Unix critical paths if path starts with '/'
162    if path.starts_with('/') {
163        // Reject absolute paths outside workspace
164        // Note: We can't strictly block all absolute paths as the agent might need to access
165        // explicitly allowed directories, but we can block obvious system critical paths.
166        static UNIX_CRITICAL: &[&str] = &[
167            "/etc", "/usr", "/bin", "/sbin", "/var", "/boot", "/root", "/dev",
168        ];
169        for prefix in UNIX_CRITICAL {
170            let is_var_temp_exception = *prefix == "/var"
171                && (path.starts_with("/var/folders/")
172                    || path == "/var/folders"
173                    || path.starts_with("/var/tmp/")
174                    || path == "/var/tmp");
175
176            if !is_var_temp_exception && matches_critical_prefix(path, prefix) {
177                bail!("Access to system directory denied: {}", prefix);
178            }
179        }
180    }
181
182    // Windows critical paths
183    #[cfg(windows)]
184    {
185        let path_lower = path.to_lowercase();
186        static WIN_CRITICAL: &[&str] = &["c:\\windows", "c:\\program files", "c:\\system32"];
187        for prefix in WIN_CRITICAL {
188            if path_lower.starts_with(prefix) {
189                bail!("Access to Windows system directory denied");
190            }
191        }
192    }
193
194    // Reject dangerous shell characters in paths (including null byte)
195    // Optimization: Check bytes directly for faster character detection
196    static DANGEROUS_CHARS: &[u8] = b"$`|;&\n\r><\0";
197    for &c in path.as_bytes() {
198        if DANGEROUS_CHARS.contains(&c) {
199            bail!("Path contains dangerous shell characters");
200        }
201    }
202
203    Ok(())
204}
205
206fn matches_critical_prefix(path: &str, prefix: &str) -> bool {
207    path == prefix
208        || path
209            .strip_prefix(prefix)
210            .is_some_and(|rest| rest.starts_with('/'))
211}
212
213/// Extract the filename from a path, with fallback to the full path.
214pub fn file_name_from_path(path: &str) -> String {
215    Path::new(path)
216        .file_name()
217        .and_then(|name| name.to_str())
218        .map(|s| s.to_string())
219        .unwrap_or_else(|| path.to_string())
220}
221
222/// Canonicalize a path, walking up to find the nearest existing ancestor for new files.
223///
224/// This function handles paths to files that may not yet exist by finding the
225/// nearest existing parent directory, canonicalizing that, and then appending
226/// the remaining path components.
227///
228/// # Security
229/// This function is critical for security. It prevents symlink escapes by:
230/// 1. Finding the nearest existing ancestor directory
231/// 2. Canonicalizing that directory (resolves symlinks)
232/// 3. Appending the remaining path components
233///
234/// # Arguments
235/// * `normalized` - A normalized path (output from `normalize_path`)
236///
237/// # Returns
238/// The canonical path, or the normalized path if no parent exists
239pub async fn canonicalize_allow_missing(normalized: &Path) -> Result<PathBuf> {
240    // If the path exists, canonicalize it directly
241    if tokio::fs::try_exists(normalized).await.unwrap_or(false) {
242        return tokio::fs::canonicalize(normalized).await.map_err(|e| {
243            anyhow!(
244                "Failed to resolve canonical path for '{}': {}",
245                normalized.display(),
246                e
247            )
248        });
249    }
250
251    // Walk up the directory tree to find the nearest existing ancestor
252    let mut current = normalized.to_path_buf();
253    while let Some(parent) = current.parent() {
254        if tokio::fs::try_exists(parent).await.unwrap_or(false) {
255            // Canonicalize the existing parent
256            let canonical_parent = tokio::fs::canonicalize(parent).await.map_err(|e| {
257                anyhow!(
258                    "Failed to resolve canonical path for '{}': {}",
259                    parent.display(),
260                    e
261                )
262            })?;
263
264            // Get the remaining path components
265            let remainder = normalized
266                .strip_prefix(parent)
267                .unwrap_or_else(|_| Path::new(""));
268
269            // Return the canonical parent + remaining components
270            return if remainder.as_os_str().is_empty() {
271                Ok(canonical_parent)
272            } else {
273                Ok(canonical_parent.join(remainder))
274            };
275        }
276        current = parent.to_path_buf();
277    }
278
279    // No existing parent found, return normalized path as-is
280    Ok(normalized.to_path_buf())
281}
282
283/// Provides the root directories an application uses to store data.
284pub trait WorkspacePaths: Send + Sync {
285    /// Absolute path to the application's workspace root.
286    fn workspace_root(&self) -> &Path;
287
288    /// Returns the directory where configuration files should be stored.
289    fn config_dir(&self) -> PathBuf;
290
291    /// Returns an optional cache directory for transient data.
292    fn cache_dir(&self) -> Option<PathBuf> {
293        None
294    }
295
296    /// Returns an optional directory for telemetry or log artifacts.
297    fn telemetry_dir(&self) -> Option<PathBuf> {
298        None
299    }
300
301    /// Determine the [`PathScope`] for a given path based on workspace directories.
302    ///
303    /// Returns the most specific scope matching the path:
304    /// - `Workspace` if under `workspace_root()`
305    /// - `Config` if under `config_dir()`
306    /// - `Cache` if under `cache_dir()`
307    /// - `Telemetry` if under `telemetry_dir()`
308    /// - Falls back to `Cache` if no match
309    fn scope_for_path(&self, path: &Path) -> PathScope {
310        if path.starts_with(self.workspace_root()) {
311            return PathScope::Workspace;
312        }
313
314        let config_dir = self.config_dir();
315        if path.starts_with(&config_dir) {
316            return PathScope::Config;
317        }
318
319        if let Some(cache_dir) = self.cache_dir()
320            && path.starts_with(&cache_dir)
321        {
322            return PathScope::Cache;
323        }
324
325        if let Some(telemetry_dir) = self.telemetry_dir()
326            && path.starts_with(&telemetry_dir)
327        {
328            return PathScope::Telemetry;
329        }
330
331        PathScope::Cache
332    }
333}
334
335/// Helper trait that adds path resolution helpers on top of [`WorkspacePaths`].
336pub trait PathResolver: WorkspacePaths {
337    /// Resolve a path relative to the workspace root.
338    fn resolve<P>(&self, relative: P) -> PathBuf
339    where
340        P: AsRef<Path>,
341    {
342        self.workspace_root().join(relative)
343    }
344
345    /// Resolve a path within the configuration directory.
346    fn resolve_config<P>(&self, relative: P) -> PathBuf
347    where
348        P: AsRef<Path>,
349    {
350        self.config_dir().join(relative)
351    }
352}
353
354impl<T> PathResolver for T where T: WorkspacePaths + ?Sized {}
355
356/// Enumeration describing the conceptual scope of a file path.
357#[derive(Debug, Clone, Copy, PartialEq, Eq)]
358pub enum PathScope {
359    Workspace,
360    Config,
361    Cache,
362    Telemetry,
363}
364
365impl PathScope {
366    /// Returns a human-readable description used in error messages.
367    pub fn description(self) -> &'static str {
368        match self {
369            Self::Workspace => "workspace",
370            Self::Config => "configuration",
371            Self::Cache => "cache",
372            Self::Telemetry => "telemetry",
373        }
374    }
375}
376
377// ============================================================================
378// Extension Traits (Pattern 3: Extension Traits)
379// ============================================================================
380
381/// Extension trait that adds path normalization and safety methods to `Path`.
382///
383/// Delegates to the existing free functions in this module, providing a more
384/// ergonomic call-site syntax:
385///
386/// ```rust
387/// use vtcode_commons::paths::PathExt;
388/// use std::path::Path;
389///
390/// let normalized = Path::new("/tmp/project/src/../src/lib.rs").normalize();
391/// ```
392pub trait PathExt {
393    /// Normalize a path by resolving `.` and `..` components lexically.
394    fn normalize(&self) -> PathBuf;
395
396    /// Canonicalize with fallback to the original path if canonicalization fails.
397    fn canonicalize_or_self(&self) -> PathBuf;
398
399    /// Extract the filename from a path as a `String`, with fallback to the
400    /// full path when no filename component exists.
401    ///
402    /// Unlike [`Path::file_name`] which returns `Option<&OsStr>`, this method
403    /// always returns a `String` and falls back gracefully.
404    fn file_name_str(&self) -> String;
405}
406
407impl PathExt for Path {
408    fn normalize(&self) -> PathBuf {
409        normalize_path(self)
410    }
411
412    fn canonicalize_or_self(&self) -> PathBuf {
413        canonicalize_workspace(self)
414    }
415
416    fn file_name_str(&self) -> String {
417        self.file_name()
418            .and_then(|name| name.to_str())
419            .map(|s| s.to_string())
420            .unwrap_or_else(|| self.to_string_lossy().into_owned())
421    }
422}
423
424/// Extension trait that adds path-related methods to `str`.
425///
426/// Provides ergonomic access to tilde expansion and path safety checks:
427///
428/// ```rust
429/// use vtcode_commons::paths::StrPathExt;
430///
431/// let expanded = "~/projects/vtcode".expand_tilde();
432/// assert!(StrPathExt::is_safe_path("src/main.rs"));
433/// ```
434pub trait StrPathExt {
435    /// Expand a leading `~` or `~/` to the user's home directory.
436    fn expand_tilde(&self) -> PathBuf;
437
438    /// Check if this path string is a safe relative path (no traversal, no absolute).
439    fn is_safe_path(&self) -> bool;
440
441    /// Validate that this path is safe to use (no traversal, no dangerous characters).
442    fn validate_safety(&self) -> Result<()>;
443
444    /// Extract the filename from this path string.
445    fn file_name_str(&self) -> String;
446}
447
448impl StrPathExt for str {
449    fn expand_tilde(&self) -> PathBuf {
450        expand_tilde(self)
451    }
452
453    fn is_safe_path(&self) -> bool {
454        is_safe_relative_path(self)
455    }
456
457    fn validate_safety(&self) -> Result<()> {
458        validate_path_safety(self)
459    }
460
461    fn file_name_str(&self) -> String {
462        file_name_from_path(self)
463    }
464}
465
466#[cfg(test)]
467mod tests {
468    use super::*;
469    use std::path::{Path, PathBuf};
470
471    struct StaticPaths {
472        root: PathBuf,
473        config: PathBuf,
474    }
475
476    impl WorkspacePaths for StaticPaths {
477        fn workspace_root(&self) -> &Path {
478            &self.root
479        }
480
481        fn config_dir(&self) -> PathBuf {
482            self.config.clone()
483        }
484
485        fn cache_dir(&self) -> Option<PathBuf> {
486            Some(self.root.join("cache"))
487        }
488    }
489
490    #[test]
491    fn resolves_relative_paths() {
492        let paths = StaticPaths {
493            root: PathBuf::from("/tmp/project"),
494            config: PathBuf::from("/tmp/project/config"),
495        };
496
497        assert_eq!(
498            PathResolver::resolve(&paths, "subdir/file.txt"),
499            PathBuf::from("/tmp/project/subdir/file.txt")
500        );
501        assert_eq!(
502            PathResolver::resolve_config(&paths, "settings.toml"),
503            PathBuf::from("/tmp/project/config/settings.toml")
504        );
505        assert_eq!(paths.cache_dir(), Some(PathBuf::from("/tmp/project/cache")));
506    }
507
508    #[test]
509    fn ensures_path_within_workspace_accepts_nested_path() {
510        let workspace = Path::new("/tmp/project");
511        let candidate = Path::new("/tmp/project/src/../src/lib.rs");
512        let normalized = ensure_path_within_workspace(candidate, workspace).unwrap();
513        assert_eq!(normalized, PathBuf::from("/tmp/project/src/lib.rs"));
514    }
515
516    #[test]
517    fn ensures_path_within_workspace_rejects_escape() {
518        let workspace = Path::new("/tmp/project");
519        let candidate = Path::new("/tmp/project/../../etc/passwd");
520        assert!(ensure_path_within_workspace(candidate, workspace).is_err());
521    }
522
523    #[tokio::test]
524    async fn test_canonicalize_existing_file() {
525        // Create a temporary directory and file
526        let temp_dir = std::env::temp_dir();
527        let test_file = temp_dir.join("vtcode_test_existing.txt");
528        tokio::fs::write(&test_file, b"test").await.unwrap();
529
530        let canonical = canonicalize_allow_missing(&test_file).await.unwrap();
531
532        // Should get the canonical path
533        assert!(canonical.is_absolute());
534        assert!(canonical.exists());
535
536        // Cleanup
537        tokio::fs::remove_file(&test_file).await.ok();
538    }
539
540    #[tokio::test]
541    async fn test_canonicalize_missing_file() {
542        // Use a path that doesn't exist but has an existing parent
543        let temp_dir = std::env::temp_dir();
544        let missing_file = temp_dir.join("vtcode_test_missing_dir/missing_file.txt");
545
546        let canonical = canonicalize_allow_missing(&missing_file).await.unwrap();
547
548        // Should get canonical parent + missing components
549        assert!(canonical.is_absolute());
550        assert!(canonical.to_string_lossy().contains("missing_file.txt"));
551    }
552
553    #[tokio::test]
554    async fn test_canonicalize_deeply_missing_path() {
555        // Use a path with multiple missing parent directories
556        let temp_dir = std::env::temp_dir();
557        let deep_missing = temp_dir.join("vtcode_test_a/b/c/d/file.txt");
558
559        let canonical = canonicalize_allow_missing(&deep_missing).await.unwrap();
560
561        // Should get canonical temp_dir + missing components
562        assert!(canonical.is_absolute());
563        assert!(canonical.to_string_lossy().contains("vtcode_test_a"));
564    }
565
566    #[tokio::test]
567    async fn test_canonicalize_missing_file_with_existing_parent() {
568        // Create a parent directory
569        let temp_dir = std::env::temp_dir();
570        let test_dir = temp_dir.join("vtcode_test_parent");
571        tokio::fs::create_dir_all(&test_dir).await.unwrap();
572
573        let missing_file = test_dir.join("missing.txt");
574        let canonical = canonicalize_allow_missing(&missing_file).await.unwrap();
575
576        // Should get canonical parent + missing filename
577        assert!(canonical.is_absolute());
578        assert!(canonical.to_string_lossy().ends_with("missing.txt"));
579
580        // Cleanup
581        tokio::fs::remove_dir(&test_dir).await.ok();
582    }
583
584    #[test]
585    fn expand_tilde_passes_through_absolute_paths() {
586        let absolute = "/etc/hosts";
587        assert_eq!(expand_tilde(absolute), PathBuf::from(absolute));
588    }
589
590    #[test]
591    fn expand_tilde_passes_through_relative_paths() {
592        let relative = "src/main.rs";
593        assert_eq!(expand_tilde(relative), PathBuf::from(relative));
594    }
595
596    #[test]
597    fn expand_tilde_resolves_bare_tilde_to_home() {
598        if let Some(home) = dirs::home_dir() {
599            assert_eq!(expand_tilde("~"), home);
600        }
601    }
602
603    #[test]
604    fn expand_tilde_resolves_tilde_slash_prefix() {
605        if let Some(home) = dirs::home_dir() {
606            let resolved = expand_tilde("~/projects/vtcode");
607            assert_eq!(resolved, home.join("projects/vtcode"));
608        }
609    }
610}