Skip to main content

perl_workspace/folder/
mod.rs

1//! Workspace folder URI/path parsing.
2//!
3//! Converts workspace folder entries into local filesystem paths with
4//! deterministic behavior for both plain paths and `file://` URIs.
5
6use std::path::PathBuf;
7
8#[cfg(not(target_arch = "wasm32"))]
9use perl_uri::uri_to_fs_path;
10use serde_json::Value;
11
12/// URI lists extracted from an LSP workspace folder change event.
13#[non_exhaustive]
14#[derive(Debug, Clone, Default, PartialEq, Eq)]
15pub struct WorkspaceFolderChange {
16    /// Added workspace folder URIs.
17    pub added: Vec<String>,
18    /// Removed workspace folder URIs.
19    pub removed: Vec<String>,
20}
21
22/// Parse a workspace folder declaration into a filesystem path.
23///
24/// Workspace folders can be passed as absolute paths or `file://` URIs. For
25/// `file://` URIs this attempts to resolve through `perl_uri::uri_to_fs_path`.
26/// If URI resolution fails, the scheme prefix is trimmed and the remainder is
27/// interpreted as a path fallback.
28#[must_use]
29pub fn workspace_folder_to_path(workspace_folder: &str) -> PathBuf {
30    if has_file_uri_scheme(workspace_folder) {
31        #[cfg(not(target_arch = "wasm32"))]
32        if let Some(path) = uri_to_fs_path(workspace_folder) {
33            return path;
34        }
35
36        if let Some(path) = parse_file_uri_fallback(workspace_folder) {
37            return path;
38        }
39
40        // Only fall back to raw prefix-trim for local file URIs.  A URI with a
41        // non-local host (e.g. `file://evil.example.com/path`) must not reach
42        // this path, because `trim_file_uri_prefix` would strip the leading
43        // `//` and return `"evil.example.com/path"` — still leaking the remote
44        // hostname into a PathBuf that the caller may later open.
45        if file_uri_has_remote_host(workspace_folder) {
46            return PathBuf::from(workspace_folder);
47        }
48
49        return PathBuf::from(trim_file_uri_prefix(workspace_folder));
50    }
51
52    PathBuf::from(workspace_folder)
53}
54
55fn has_file_uri_scheme(value: &str) -> bool {
56    value.get(..5).is_some_and(|prefix| prefix.eq_ignore_ascii_case("file:"))
57}
58
59fn has_file_uri_prefix(value: &str) -> bool {
60    value.get(..7).is_some_and(|prefix| prefix.eq_ignore_ascii_case("file://"))
61}
62
63fn trim_file_uri_prefix(value: &str) -> &str {
64    let suffix = &value[5..];
65    suffix.strip_prefix("//").unwrap_or(suffix)
66}
67
68/// Returns `true` when `value` is a `file://` URI whose authority component
69/// names a non-local host (i.e. something other than empty or `"localhost"`).
70///
71/// Used to block the `trim_file_uri_prefix` last-resort path in
72/// [`workspace_folder_to_path`] so that remote hostnames cannot leak into the
73/// returned `PathBuf`.
74fn file_uri_has_remote_host(value: &str) -> bool {
75    url::Url::parse(value)
76        .ok()
77        .filter(|u| u.scheme() == "file")
78        .and_then(|u| u.host_str().map(|h| !is_local_file_host(h)))
79        .unwrap_or(false)
80}
81
82fn is_local_file_host(host: &str) -> bool {
83    let normalized = host
84        .trim_end_matches('.')
85        .trim_start_matches('[')
86        .trim_end_matches(']')
87        .to_ascii_lowercase();
88    matches!(normalized.as_str(), "" | "localhost" | "127.0.0.1" | "::1")
89}
90
91fn parse_file_uri_fallback(workspace_folder: &str) -> Option<PathBuf> {
92    let parsed = url::Url::parse(workspace_folder).ok()?;
93    if parsed.scheme() != "file" {
94        return None;
95    }
96
97    if let Ok(path) = parsed.to_file_path() {
98        return Some(path);
99    }
100
101    let path = parsed.path();
102    if path.is_empty() {
103        return None;
104    }
105
106    match parsed.host_str() {
107        None => Some(PathBuf::from(path)),
108        Some(host) if is_local_file_host(host) => Some(PathBuf::from(path)),
109        Some(_) => None,
110    }
111}
112
113/// Extract workspace folder URIs from an LSP `workspaceFolders` array.
114///
115/// Invalid entries are ignored.
116#[must_use]
117pub fn extract_workspace_folder_uris(workspace_folders: &[Value]) -> Vec<String> {
118    workspace_folders
119        .iter()
120        .filter_map(|folder| match folder {
121            Value::String(uri) => Some(uri.clone()),
122            Value::Object(_) => folder
123                .get("uri")
124                .and_then(Value::as_str)
125                .map(std::string::ToString::to_string)
126                .or_else(|| folder.get("path").and_then(Value::as_str).map(root_path_to_file_uri)),
127            _ => None,
128        })
129        .collect()
130}
131
132/// Extract URI changes from an LSP `workspace/didChangeWorkspaceFolders` event payload.
133///
134/// Missing/invalid sections are treated as empty.
135#[must_use]
136pub fn extract_workspace_folder_change(event: &Value) -> WorkspaceFolderChange {
137    let added = event
138        .get("added")
139        .and_then(Value::as_array)
140        .map_or_else(Vec::new, |entries| extract_workspace_folder_uris(entries));
141
142    let removed = event
143        .get("removed")
144        .and_then(Value::as_array)
145        .map_or_else(Vec::new, |entries| extract_workspace_folder_uris(entries));
146
147    WorkspaceFolderChange { added, removed }
148}
149
150/// Convert a legacy LSP `rootPath` string to a `file://` URI.
151///
152/// This keeps behavior deterministic across absolute POSIX and Windows-style paths.
153#[must_use]
154pub fn root_path_to_file_uri(root_path: &str) -> String {
155    if has_file_uri_prefix(root_path) {
156        return root_path.to_string();
157    }
158
159    let path = std::path::Path::new(root_path);
160    url::Url::from_file_path(path).map_or_else(
161        |_| {
162            if root_path.starts_with('/') {
163                format!("file://{}", root_path)
164            } else {
165                let normalized = root_path.replace('\\', "/");
166                // Preserve legacy behavior (force an absolute-looking file URI for
167                // non-absolute paths) while ensuring URI-safe percent encoding.
168                let pseudo_absolute = format!("/{normalized}");
169                url::Url::from_file_path(std::path::Path::new(&pseudo_absolute))
170                    .map_or_else(|_| format!("file:///{}", normalized), |uri| uri.to_string())
171            }
172        },
173        |uri| uri.to_string(),
174    )
175}
176
177#[cfg(test)]
178mod tests {
179    use super::{
180        extract_workspace_folder_change, extract_workspace_folder_uris, root_path_to_file_uri,
181        workspace_folder_to_path,
182    };
183    use serde_json::json;
184    use std::path::PathBuf;
185
186    #[test]
187    fn parses_plain_folder_path() {
188        assert_eq!(workspace_folder_to_path("/tmp/project"), PathBuf::from("/tmp/project"));
189    }
190
191    #[cfg(not(target_arch = "wasm32"))]
192    #[test]
193    fn parses_file_uri_when_possible() {
194        let parsed = workspace_folder_to_path("file:///tmp/project");
195        assert!(parsed.to_string_lossy().contains("tmp"));
196        assert!(parsed.to_string_lossy().contains("project"));
197    }
198
199    #[cfg(not(target_arch = "wasm32"))]
200    #[test]
201    fn parses_uppercase_file_uri_when_possible() {
202        let parsed = workspace_folder_to_path("FILE:///tmp/project");
203        assert!(parsed.to_string_lossy().contains("tmp"));
204        assert!(parsed.to_string_lossy().contains("project"));
205    }
206
207    #[cfg(not(target_arch = "wasm32"))]
208    #[test]
209    fn parses_single_slash_file_uri_when_possible() {
210        let parsed = workspace_folder_to_path("file:/tmp/project");
211        assert_eq!(parsed, PathBuf::from("/tmp/project"));
212    }
213
214    #[cfg(not(target_arch = "wasm32"))]
215    #[test]
216    fn parses_uppercase_single_slash_file_uri_when_possible() {
217        let parsed = workspace_folder_to_path("FILE:/tmp/project");
218        assert_eq!(parsed, PathBuf::from("/tmp/project"));
219    }
220
221    #[test]
222    fn parses_localhost_file_uri_without_leaking_host_component() {
223        let parsed = workspace_folder_to_path("file://localhost/tmp/project");
224        let path = parsed.to_string_lossy();
225        assert!(path.contains("tmp"));
226        assert!(path.contains("project"));
227        assert!(!path.contains("localhost/tmp"));
228    }
229
230    #[test]
231    fn extracts_workspace_uris() {
232        let entries = vec![
233            json!({"uri": "file:///one"}),
234            json!({"uri": "file:///two"}),
235            json!({"path": "/three"}),
236            json!("file:///four"),
237            json!({"name": "invalid"}),
238        ];
239        let uris = extract_workspace_folder_uris(&entries);
240        assert_eq!(uris, vec!["file:///one", "file:///two", "file:///three", "file:///four"]);
241    }
242
243    #[test]
244    fn string_form_uri_passes_through_without_normalization() {
245        // Value::String arm passes the string through as-is, matching the behavior
246        // of the Value::Object{"uri": ...} arm which also does not normalize.
247        let entries = vec![json!("file:///a/b/c"), json!("file:///C:/Users/foo")];
248        let uris = extract_workspace_folder_uris(&entries);
249        assert_eq!(uris, vec!["file:///a/b/c", "file:///C:/Users/foo"]);
250    }
251
252    #[test]
253    fn non_file_and_non_object_entries_are_dropped() {
254        // Null, arrays, booleans, and numbers should all be silently skipped.
255        let entries = vec![json!(null), json!(42), json!(true), json!([])];
256        let uris = extract_workspace_folder_uris(&entries);
257        assert!(uris.is_empty(), "expected empty result, got {uris:?}");
258    }
259
260    #[test]
261    fn object_uri_key_takes_precedence_over_path_key() {
262        // When an object contains both "uri" and "path", "uri" wins.
263        let entries = vec![json!({"uri": "file:///from-uri", "path": "/from-path"})];
264        let uris = extract_workspace_folder_uris(&entries);
265        assert_eq!(uris, vec!["file:///from-uri"]);
266    }
267
268    #[test]
269    fn extracts_workspace_change_entries() {
270        let change = extract_workspace_folder_change(&json!({
271            "added": [{"uri": "file:///add"}],
272            "removed": [{"uri": "file:///remove"}],
273        }));
274
275        assert_eq!(change.added, vec!["file:///add"]);
276        assert_eq!(change.removed, vec!["file:///remove"]);
277    }
278
279    #[test]
280    fn converts_legacy_root_path_to_file_uri() {
281        let uri = root_path_to_file_uri("/legacy/workspace");
282        assert_eq!(uri, "file:///legacy/workspace");
283    }
284
285    #[test]
286    fn preserves_file_uri_root_path_input() {
287        let uri = root_path_to_file_uri("file:///already/uri");
288        assert_eq!(uri, "file:///already/uri");
289    }
290
291    #[test]
292    fn encodes_spaces_in_windows_style_root_path() {
293        let uri = root_path_to_file_uri(r"C:\Users\me\My Project");
294        assert_eq!(uri, "file:///C:/Users/me/My%20Project");
295    }
296
297    #[test]
298    fn preserves_uppercase_file_uri_root_path_input() {
299        let uri = root_path_to_file_uri("FILE:///already/uri");
300        assert_eq!(uri, "FILE:///already/uri");
301    }
302
303    #[test]
304    fn parses_file_uri_with_localhost_authority() {
305        let parsed = workspace_folder_to_path("file://localhost/tmp/project");
306        assert!(parsed.to_string_lossy().contains("tmp"));
307        assert!(parsed.to_string_lossy().contains("project"));
308    }
309
310    #[test]
311    fn parses_file_uri_with_localhost_variants() {
312        for uri in [
313            "file://LOCALHOST/tmp/project",
314            "file://localhost./tmp/project",
315            "file://127.0.0.1/tmp/project",
316            "file://[::1]/tmp/project",
317        ] {
318            let parsed = workspace_folder_to_path(uri);
319            assert!(!parsed.to_string_lossy().contains("file://"), "uri leaked: {uri}");
320        }
321    }
322
323    #[test]
324    fn does_not_generate_unc_path_for_non_local_file_uri_host() {
325        let parsed = workspace_folder_to_path("file://evil.example.com/share/project");
326        let path = parsed.to_string_lossy();
327        // Must not contain the remote hostname in any form — neither as a UNC-style
328        // `//evil.example.com/...` prefix nor as a plain leading component
329        // `evil.example.com/...` (which `trim_file_uri_prefix` would previously
330        // produce after stripping the `//`).
331        assert!(
332            !path.starts_with("//evil.example.com") && !path.starts_with("evil.example.com"),
333            "remote hostname leaked into path: {path}"
334        );
335    }
336
337    #[test]
338    fn does_not_resolve_remote_host_with_path_component() {
339        // Ensure the trim_file_uri_prefix last-resort path is also blocked for
340        // URIs that url::Url cannot convert to a file path (remote host present).
341        for uri in &[
342            "file://attacker.example.org/sensitive/data",
343            "file://192.0.2.1/share",
344            "file://[::1]/ipv6-local",
345        ] {
346            let parsed = workspace_folder_to_path(uri);
347            let path = parsed.to_string_lossy();
348            // The raw URI itself should be the fallback — the remote hostname
349            // must not appear as a bare leading path component.
350            assert!(
351                !path.starts_with("attacker.example.org")
352                    && !path.starts_with("192.0.2.1")
353                    && !path.starts_with("[::1]")
354                    && !path.starts_with("::1"),
355                "remote hostname leaked into path for {uri}: {path}"
356            );
357        }
358    }
359}