Skip to main content

vtcode_commons/
paths.rs

1use anyhow::{Context, Result, anyhow};
2use std::path::{Component, Path, PathBuf};
3use tracing::warn;
4
5/// Normalize a path by resolving `.` and `..` components lexically.
6pub fn normalize_path(path: &Path) -> PathBuf {
7    let mut normalized = PathBuf::new();
8    for component in path.components() {
9        match component {
10            Component::ParentDir => {
11                normalized.pop();
12            }
13            Component::CurDir => {}
14            Component::Prefix(prefix) => normalized.push(prefix.as_os_str()),
15            Component::RootDir => normalized.push(component.as_os_str()),
16            Component::Normal(part) => normalized.push(part),
17        }
18    }
19    normalized
20}
21
22/// Canonicalize a path with fallback to the original path if canonicalization fails.
23pub fn canonicalize_workspace(workspace_root: &Path) -> PathBuf {
24    std::fs::canonicalize(workspace_root).unwrap_or_else(|error| {
25        warn!(
26            path = %workspace_root.display(),
27            %error,
28            "Failed to canonicalize workspace root; falling back to provided path"
29        );
30        workspace_root.to_path_buf()
31    })
32}
33
34/// Resolve a path relative to a workspace root and ensure it stays within it.
35pub fn resolve_workspace_path(workspace_root: &Path, user_path: &Path) -> Result<PathBuf> {
36    let candidate = if user_path.is_absolute() {
37        user_path.to_path_buf()
38    } else {
39        workspace_root.join(user_path)
40    };
41
42    let canonical = std::fs::canonicalize(&candidate)
43        .with_context(|| format!("Failed to canonicalize path {}", candidate.display()))?;
44
45    let workspace_canonical = std::fs::canonicalize(workspace_root).with_context(|| {
46        format!(
47            "Failed to canonicalize workspace root {}",
48            workspace_root.display()
49        )
50    })?;
51
52    if !canonical.starts_with(&workspace_canonical) {
53        return Err(anyhow!(
54            "Path {} escapes workspace root {}",
55            canonical.display(),
56            workspace_canonical.display()
57        ));
58    }
59
60    Ok(canonical)
61}
62
63/// Return a canonicalised absolute path that is guaranteed to reside inside the
64/// provided `workspace_root`.  If the path is outside the workspace an error is
65/// returned.
66pub fn secure_path(workspace_root: &Path, user_path: &Path) -> Result<PathBuf> {
67    // Resolve relative paths against the workspace root.
68    resolve_workspace_path(workspace_root, user_path)
69}
70
71/// Normalize identifiers to ASCII alphanumerics with lowercase output.
72pub fn normalize_ascii_identifier(value: &str) -> String {
73    let mut normalized = String::new();
74    for ch in value.chars() {
75        if ch.is_ascii_alphanumeric() {
76            normalized.push(ch.to_ascii_lowercase());
77        }
78    }
79    normalized
80}
81
82/// Check if a path string is a safe relative path (no traversal, no absolute).
83pub fn is_safe_relative_path(path: &str) -> bool {
84    let path = path.trim();
85    if path.is_empty() {
86        return false;
87    }
88
89    // Check for path traversal attempts
90    if path.contains("..") {
91        return false;
92    }
93
94    // Block absolute paths for security
95    if path.starts_with('/') || path.contains(':') {
96        return false;
97    }
98
99    true
100}
101
102/// Extract the filename from a path, with fallback to the full path.
103pub fn file_name_from_path(path: &str) -> String {
104    Path::new(path)
105        .file_name()
106        .and_then(|name| name.to_str())
107        .map(|s| s.to_string())
108        .unwrap_or_else(|| path.to_string())
109}
110
111/// Canonicalize a path, walking up to find the nearest existing ancestor for new files.
112///
113/// This function handles paths to files that may not yet exist by finding the
114/// nearest existing parent directory, canonicalizing that, and then appending
115/// the remaining path components.
116///
117/// # Safety
118/// This function is critical for security. It prevents symlink escapes by:
119/// 1. Finding the nearest existing ancestor directory
120/// 2. Canonicalizing that directory (resolves symlinks)
121/// 3. Appending the remaining path components
122///
123/// # Arguments
124/// * `normalized` - A normalized path (output from `normalize_path`)
125///
126/// # Returns
127/// The canonical path, or the normalized path if no parent exists
128pub async fn canonicalize_allow_missing(normalized: &Path) -> Result<PathBuf> {
129    // If the path exists, canonicalize it directly
130    if tokio::fs::try_exists(normalized).await.unwrap_or(false) {
131        return tokio::fs::canonicalize(normalized).await.map_err(|e| {
132            anyhow!(
133                "Failed to resolve canonical path for '{}': {}",
134                normalized.display(),
135                e
136            )
137        });
138    }
139
140    // Walk up the directory tree to find the nearest existing ancestor
141    let mut current = normalized.to_path_buf();
142    while let Some(parent) = current.parent() {
143        if tokio::fs::try_exists(parent).await.unwrap_or(false) {
144            // Canonicalize the existing parent
145            let canonical_parent = tokio::fs::canonicalize(parent).await.map_err(|e| {
146                anyhow!(
147                    "Failed to resolve canonical path for '{}': {}",
148                    parent.display(),
149                    e
150                )
151            })?;
152
153            // Get the remaining path components
154            let remainder = normalized
155                .strip_prefix(parent)
156                .unwrap_or_else(|_| Path::new(""));
157
158            // Return the canonical parent + remaining components
159            return if remainder.as_os_str().is_empty() {
160                Ok(canonical_parent)
161            } else {
162                Ok(canonical_parent.join(remainder))
163            };
164        }
165        current = parent.to_path_buf();
166    }
167
168    // No existing parent found, return normalized path as-is
169    Ok(normalized.to_path_buf())
170}
171
172/// Provides the root directories an application uses to store data.
173pub trait WorkspacePaths: Send + Sync {
174    /// Absolute path to the application's workspace root.
175    fn workspace_root(&self) -> &Path;
176
177    /// Returns the directory where configuration files should be stored.
178    fn config_dir(&self) -> PathBuf;
179
180    /// Returns an optional cache directory for transient data.
181    fn cache_dir(&self) -> Option<PathBuf> {
182        None
183    }
184
185    /// Returns an optional directory for telemetry or log artifacts.
186    fn telemetry_dir(&self) -> Option<PathBuf> {
187        None
188    }
189
190    /// Determine the [`PathScope`] for a given path based on workspace directories.
191    ///
192    /// Returns the most specific scope matching the path:
193    /// - `Workspace` if under `workspace_root()`
194    /// - `Config` if under `config_dir()`
195    /// - `Cache` if under `cache_dir()`
196    /// - `Telemetry` if under `telemetry_dir()`
197    /// - Falls back to `Cache` if no match
198    fn scope_for_path(&self, path: &Path) -> PathScope {
199        if path.starts_with(self.workspace_root()) {
200            return PathScope::Workspace;
201        }
202
203        let config_dir = self.config_dir();
204        if path.starts_with(&config_dir) {
205            return PathScope::Config;
206        }
207
208        if let Some(cache_dir) = self.cache_dir() {
209            if path.starts_with(&cache_dir) {
210                return PathScope::Cache;
211            }
212        }
213
214        if let Some(telemetry_dir) = self.telemetry_dir() {
215            if path.starts_with(&telemetry_dir) {
216                return PathScope::Telemetry;
217            }
218        }
219
220        PathScope::Cache
221    }
222}
223
224/// Helper trait that adds path resolution helpers on top of [`WorkspacePaths`].
225pub trait PathResolver: WorkspacePaths {
226    /// Resolve a path relative to the workspace root.
227    fn resolve<P>(&self, relative: P) -> PathBuf
228    where
229        P: AsRef<Path>,
230    {
231        self.workspace_root().join(relative)
232    }
233
234    /// Resolve a path within the configuration directory.
235    fn resolve_config<P>(&self, relative: P) -> PathBuf
236    where
237        P: AsRef<Path>,
238    {
239        self.config_dir().join(relative)
240    }
241}
242
243impl<T> PathResolver for T where T: WorkspacePaths + ?Sized {}
244
245/// Enumeration describing the conceptual scope of a file path.
246#[derive(Debug, Clone, Copy, PartialEq, Eq)]
247pub enum PathScope {
248    Workspace,
249    Config,
250    Cache,
251    Telemetry,
252}
253
254impl PathScope {
255    /// Returns a human-readable description used in error messages.
256    pub fn description(self) -> &'static str {
257        match self {
258            Self::Workspace => "workspace",
259            Self::Config => "configuration",
260            Self::Cache => "cache",
261            Self::Telemetry => "telemetry",
262        }
263    }
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use std::path::PathBuf;
270
271    struct StaticPaths {
272        root: PathBuf,
273        config: PathBuf,
274    }
275
276    impl WorkspacePaths for StaticPaths {
277        fn workspace_root(&self) -> &Path {
278            &self.root
279        }
280
281        fn config_dir(&self) -> PathBuf {
282            self.config.clone()
283        }
284
285        fn cache_dir(&self) -> Option<PathBuf> {
286            Some(self.root.join("cache"))
287        }
288    }
289
290    #[test]
291    fn resolves_relative_paths() {
292        let paths = StaticPaths {
293            root: PathBuf::from("/tmp/project"),
294            config: PathBuf::from("/tmp/project/config"),
295        };
296
297        assert_eq!(
298            PathResolver::resolve(&paths, "subdir/file.txt"),
299            PathBuf::from("/tmp/project/subdir/file.txt")
300        );
301        assert_eq!(
302            PathResolver::resolve_config(&paths, "settings.toml"),
303            PathBuf::from("/tmp/project/config/settings.toml")
304        );
305        assert_eq!(paths.cache_dir(), Some(PathBuf::from("/tmp/project/cache")));
306    }
307
308    #[tokio::test]
309    async fn test_canonicalize_existing_file() {
310        // Create a temporary directory and file
311        let temp_dir = std::env::temp_dir();
312        let test_file = temp_dir.join("vtcode_test_existing.txt");
313        tokio::fs::write(&test_file, b"test").await.unwrap();
314
315        let canonical = canonicalize_allow_missing(&test_file).await.unwrap();
316
317        // Should get the canonical path
318        assert!(canonical.is_absolute());
319        assert!(canonical.exists());
320
321        // Cleanup
322        tokio::fs::remove_file(&test_file).await.ok();
323    }
324
325    #[tokio::test]
326    async fn test_canonicalize_missing_file() {
327        // Use a path that doesn't exist but has an existing parent
328        let temp_dir = std::env::temp_dir();
329        let missing_file = temp_dir.join("vtcode_test_missing_dir/missing_file.txt");
330
331        let canonical = canonicalize_allow_missing(&missing_file).await.unwrap();
332
333        // Should get canonical parent + missing components
334        assert!(canonical.is_absolute());
335        assert!(canonical.to_string_lossy().contains("missing_file.txt"));
336    }
337
338    #[tokio::test]
339    async fn test_canonicalize_deeply_missing_path() {
340        // Use a path with multiple missing parent directories
341        let temp_dir = std::env::temp_dir();
342        let deep_missing = temp_dir.join("vtcode_test_a/b/c/d/file.txt");
343
344        let canonical = canonicalize_allow_missing(&deep_missing).await.unwrap();
345
346        // Should get canonical temp_dir + missing components
347        assert!(canonical.is_absolute());
348        assert!(canonical.to_string_lossy().contains("vtcode_test_a"));
349    }
350
351    #[tokio::test]
352    async fn test_canonicalize_missing_file_with_existing_parent() {
353        // Create a parent directory
354        let temp_dir = std::env::temp_dir();
355        let test_dir = temp_dir.join("vtcode_test_parent");
356        tokio::fs::create_dir_all(&test_dir).await.unwrap();
357
358        let missing_file = test_dir.join("missing.txt");
359        let canonical = canonicalize_allow_missing(&missing_file).await.unwrap();
360
361        // Should get canonical parent + missing filename
362        assert!(canonical.is_absolute());
363        assert!(canonical.to_string_lossy().ends_with("missing.txt"));
364
365        // Cleanup
366        tokio::fs::remove_dir(&test_dir).await.ok();
367    }
368}