ls_types/
uri.rs

1use std::{
2    borrow::Cow,
3    hash::Hash,
4    ops::{Deref, DerefMut},
5    path::{Path, PathBuf},
6    str::FromStr,
7};
8
9use percent_encoding::AsciiSet;
10use serde::{Deserialize, Serialize, de::Error};
11
12/// Newtype struct around `fluent_uri::Uri<String>` with serialization implementations that use `as_str()` and '`from_str()`' respectively.
13#[derive(Debug, Clone)]
14pub struct Uri(fluent_uri::Uri<String>);
15
16impl Serialize for Uri {
17    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
18    where
19        S: serde::Serializer,
20    {
21        self.as_str().serialize(serializer)
22    }
23}
24
25impl<'de> Deserialize<'de> for Uri {
26    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
27    where
28        D: serde::Deserializer<'de>,
29    {
30        let string = String::deserialize(deserializer)?;
31        fluent_uri::Uri::<String>::parse(string)
32            .map(Uri)
33            .map_err(|err| Error::custom(err.to_string()))
34    }
35}
36
37impl From<fluent_uri::Uri<String>> for Uri {
38    fn from(uri: fluent_uri::Uri<String>) -> Self {
39        Self(uri)
40    }
41}
42
43impl Ord for Uri {
44    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
45        self.as_str().cmp(other.as_str())
46    }
47}
48
49impl PartialOrd for Uri {
50    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
51        Some(self.cmp(other))
52    }
53}
54
55impl FromStr for Uri {
56    type Err = fluent_uri::error::ParseError;
57
58    fn from_str(s: &str) -> Result<Self, Self::Err> {
59        // TOUCH-UP:
60        // Use upstream `FromStr` implementation if and when
61        // https://github.com/yescallop/fluent-uri-rs/pull/10
62        // gets merged.
63        // fluent_uri::Uri::from_str(s).map(Self)
64        fluent_uri::Uri::parse(s).map(|uri| Self(uri.to_owned()))
65    }
66}
67
68impl Deref for Uri {
69    type Target = fluent_uri::Uri<String>;
70
71    fn deref(&self) -> &Self::Target {
72        &self.0
73    }
74}
75
76impl DerefMut for Uri {
77    fn deref_mut(&mut self) -> &mut Self::Target {
78        &mut self.0
79    }
80}
81
82/*
83    TOUCH-UP: `PartialEq`, `Eq` and `Hash` could all be derived
84    if and when the respective implementations get merged upstream:
85    https://github.com/yescallop/fluent-uri-rs/pull/9
86*/
87impl PartialEq for Uri {
88    fn eq(&self, other: &Self) -> bool {
89        self.as_str() == other.as_str()
90    }
91}
92
93impl Eq for Uri {}
94
95impl Hash for Uri {
96    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
97        self.as_str().hash(state);
98    }
99}
100
101#[cfg(not(windows))]
102pub use std::fs::canonicalize as strict_canonicalize;
103
104/// On Windows, rewrites the wide path prefix `\\?\C:` to `C:`
105/// Source: https://stackoverflow.com/a/70970317
106#[inline]
107#[cfg(windows)]
108fn strict_canonicalize<P: AsRef<Path>>(path: P) -> std::io::Result<PathBuf> {
109    use std::io;
110
111    fn impl_(path: PathBuf) -> std::io::Result<PathBuf> {
112        let head = path
113            .components()
114            .next()
115            .ok_or(io::Error::other("empty path"))?;
116        let disk_;
117        let head = if let std::path::Component::Prefix(prefix) = head {
118            if let std::path::Prefix::VerbatimDisk(disk) = prefix.kind() {
119                disk_ = format!("{}:", disk as char);
120                Path::new(&disk_)
121                    .components()
122                    .next()
123                    .ok_or(io::Error::other("failed to parse disk component"))?
124            } else {
125                head
126            }
127        } else {
128            head
129        };
130        Ok(std::iter::once(head)
131            .chain(path.components().skip(1))
132            .collect())
133    }
134
135    let canon = std::fs::canonicalize(path)?;
136    impl_(canon)
137}
138
139#[cfg(windows)]
140fn capitalize_drive_letter(path: &str) -> String {
141    // Check if it's a Windows path starting with a drive letter like "c:/"
142    if path.len() >= 2 && path.chars().nth(1) == Some(':') {
143        let mut chars = path.chars();
144        let drive_letter = chars.next().unwrap().to_ascii_uppercase();
145        let rest: String = chars.collect();
146        format!("{}{}", drive_letter, rest)
147    } else {
148        path.to_string()
149    }
150}
151
152const ASCII_SET: AsciiSet =
153    // RFC3986 allows only alphanumeric characters, `-`, `.`, `_`, and `~` in the path.
154    percent_encoding::NON_ALPHANUMERIC
155        .remove(b'-')
156        .remove(b'.')
157        .remove(b'_')
158        .remove(b'~')
159        // we do not want path separators to be percent-encoded
160        .remove(b'/');
161
162/// Provide methods to [`Uri`] to fill blanks left by
163/// `fluent_uri` (the underlying type) especially when converting to and from file paths.
164impl Uri {
165    /// Assuming the URL is in the `file` scheme or similar,
166    /// convert its path to an absolute `std::path::Path`.
167    ///
168    /// **Note:** This does not actually check the URL’s `scheme`, and may
169    /// give nonsensical results for other schemes. It is the user’s
170    /// responsibility to check the URL’s scheme before calling this.
171    ///
172    /// e.g. `Uri("file:///etc/passwd")` becomes `PathBuf("/etc/passwd")`
173    #[must_use]
174    pub fn to_file_path(&self) -> Option<Cow<'_, Path>> {
175        let path_str = self.path().decode().into_string_lossy();
176        if path_str.is_empty() {
177            return None;
178        }
179
180        let path = match path_str {
181            Cow::Borrowed(ref_) => Cow::Borrowed(Path::new(ref_)),
182            Cow::Owned(owned) => Cow::Owned(PathBuf::from(owned)),
183        };
184
185        if cfg!(windows) {
186            let auth_host = self.authority().map(|auth| auth.host()).unwrap_or_default();
187
188            if auth_host.is_empty() {
189                // very high chance this is a `file:///c:/...` uri
190                // in which case the path will include a leading slash we
191                // need to remove to get `c:/...`
192                let host = path.to_string_lossy();
193                let host = host.get(1..)?;
194                return Some(Cow::Owned(PathBuf::from(host)));
195            }
196
197            Some(Cow::Owned(
198                // `file://server/...` becomes `server:/`
199                Path::new(&format!("{auth_host}:"))
200                    .components()
201                    .chain(path.components())
202                    .collect(),
203            ))
204        } else {
205            Some(path)
206        }
207    }
208
209    /// Convert a file path to a [`Uri`].
210    ///
211    /// Returns `None` if the file does not exist.
212    pub fn from_file_path<A: AsRef<Path>>(path: A) -> Option<Self> {
213        let path = path.as_ref();
214
215        let fragment = if path.is_absolute() {
216            Cow::Borrowed(path)
217        } else {
218            match strict_canonicalize(path) {
219                Ok(path) => Cow::Owned(path),
220                Err(_) => return None,
221            }
222        };
223
224        #[cfg(windows)]
225        let raw_uri = {
226            // we want to parse a triple-slash path for Windows paths
227            // it's a shorthand for `file://localhost/C:/Windows` with the `localhost` omitted.
228            // We encode the driver Letter `C:` as well. LSP Specification allows it.
229            // https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri
230            format!(
231                "file:///{}",
232                percent_encoding::utf8_percent_encode(
233                    &capitalize_drive_letter(&fragment.to_string_lossy().replace('\\', "/")),
234                    &ASCII_SET
235                )
236            )
237        };
238
239        #[cfg(not(windows))]
240        let raw_uri = {
241            format!(
242                "file://{}",
243                percent_encoding::utf8_percent_encode(&fragment.to_string_lossy(), &ASCII_SET)
244            )
245        };
246
247        Self::from_str(&raw_uri).ok()
248    }
249}
250
251#[cfg(test)]
252mod tests {
253    use super::*;
254
255    use fluent_uri::encoding::EStr;
256    use std::path::{Path, PathBuf};
257    use std::str::FromStr;
258
259    #[test]
260    fn deref_mut_fragment_add() {
261        let mut uri = Uri::from_str("https://www.example.com").unwrap();
262        uri.set_fragment(Some(EStr::new_or_panic("L11")));
263        assert_eq!(uri.as_str(), "https://www.example.com#L11");
264    }
265
266    fn with_schema(path: &str) -> String {
267        const EXPECTED_SCHEMA: &str = if cfg!(windows) { "file:///" } else { "file://" };
268        format!("{EXPECTED_SCHEMA}{path}")
269    }
270
271    #[test]
272    #[cfg(windows)]
273    fn test_idempotent_canonicalization() {
274        let lhs = strict_canonicalize(Path::new(".")).unwrap();
275        let rhs = strict_canonicalize(&lhs).unwrap();
276        assert_eq!(lhs, rhs);
277    }
278
279    #[test]
280    #[cfg(unix)]
281    fn test_path_roundtrip_conversion() {
282        let sources = [
283            strict_canonicalize(Path::new(".")).unwrap(),
284            PathBuf::from("/some/path/to/file.txt"),
285            PathBuf::from("/some/path/to/file with spaces.txt"),
286            PathBuf::from("/some/path/[[...rest]]/file.txt"),
287            PathBuf::from("/some/path/to/файл.txt"),
288            PathBuf::from("/some/path/to/文件.txt"),
289        ];
290
291        for source in sources {
292            let conv = Uri::from_file_path(&source).unwrap();
293            let roundtrip = conv.to_file_path().unwrap();
294            assert_eq!(source, roundtrip, "conv={conv:?}");
295        }
296    }
297
298    #[test]
299    #[cfg(windows)]
300    fn test_path_roundtrip_conversion() {
301        let sources = [
302            strict_canonicalize(Path::new(".")).unwrap(),
303            PathBuf::from("C:\\some\\path\\to\\file.txt"),
304            PathBuf::from("C:\\some\\path\\to\\file with spaces.txt"),
305            PathBuf::from("C:\\some\\path\\[[...rest]]\\file.txt"),
306            PathBuf::from("C:\\some\\path\\to\\файл.txt"),
307            PathBuf::from("C:\\some\\path\\to\\文件.txt"),
308        ];
309
310        for source in sources {
311            let conv = Uri::from_file_path(&source).unwrap();
312            let roundtrip = conv.to_file_path().unwrap();
313            assert_eq!(source, roundtrip, "conv={conv:?}");
314        }
315    }
316
317    #[test]
318    #[cfg(windows)]
319    fn test_windows_uri_roundtrip_conversion() {
320        use std::str::FromStr;
321
322        let uris = [
323            Uri::from_str("file:///C:/some/path/to/file.txt").unwrap(),
324            Uri::from_str("file:///c:/some/path/to/file.txt").unwrap(),
325            Uri::from_str("file:///c%3A/some/path/to/file.txt").unwrap(),
326        ];
327
328        let final_uri = Uri::from_str("file:///C%3A/some/path/to/file.txt").unwrap();
329
330        for uri in uris {
331            let path = uri.to_file_path().unwrap();
332            assert_eq!(
333                &path,
334                Path::new("C:\\some\\path\\to\\file.txt"),
335                "uri={uri:?}"
336            );
337
338            let conv = Uri::from_file_path(&path).unwrap();
339
340            assert_eq!(
341                final_uri,
342                conv,
343                "path={path:?} left={} right={}",
344                final_uri.as_str(),
345                conv.as_str()
346            );
347        }
348    }
349
350    #[test]
351    #[cfg(unix)]
352    fn test_path_to_uri() {
353        let paths = [
354            PathBuf::from("/some/path/to/file.txt"),
355            PathBuf::from("/some/path/to/file with spaces.txt"),
356            PathBuf::from("/some/path/[[...rest]]/file.txt"),
357            PathBuf::from("/some/path/to/файл.txt"),
358            PathBuf::from("/some/path/to/文件.txt"),
359        ];
360
361        let expected = [
362            with_schema("/some/path/to/file.txt"),
363            with_schema("/some/path/to/file%20with%20spaces.txt"),
364            with_schema("/some/path/%5B%5B...rest%5D%5D/file.txt"),
365            with_schema("/some/path/to/%D1%84%D0%B0%D0%B9%D0%BB.txt"),
366            with_schema("/some/path/to/%E6%96%87%E4%BB%B6.txt"),
367        ];
368
369        for (path, expected) in paths.iter().zip(expected) {
370            let uri = Uri::from_file_path(path).unwrap();
371            assert_eq!(uri.to_string(), expected);
372        }
373    }
374
375    #[test]
376    #[cfg(windows)]
377    fn test_path_to_uri_windows() {
378        let paths = [
379            PathBuf::from("C:\\some\\path\\to\\file.txt"),
380            PathBuf::from("C:\\some\\path\\to\\file with spaces.txt"),
381            PathBuf::from("C:\\some\\path\\[[...rest]]\\file.txt"),
382            PathBuf::from("C:\\some\\path\\to\\файл.txt"),
383            PathBuf::from("C:\\some\\path\\to\\文件.txt"),
384        ];
385
386        // yes we encode `:` too, LSP allows it
387        // https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri
388        let expected = [
389            with_schema("C%3A/some/path/to/file.txt"),
390            with_schema("C%3A/some/path/to/file%20with%20spaces.txt"),
391            with_schema("C%3A/some/path/%5B%5B...rest%5D%5D/file.txt"),
392            with_schema("C%3A/some/path/to/%D1%84%D0%B0%D0%B9%D0%BB.txt"),
393            with_schema("C%3A/some/path/to/%E6%96%87%E4%BB%B6.txt"),
394        ];
395
396        for (path, expected) in paths.iter().zip(expected) {
397            let uri = Uri::from_file_path(path).unwrap();
398            assert_eq!(uri.to_string(), expected);
399        }
400    }
401
402    #[test]
403    fn test_invalid_uri_on_windows() {
404        let uri = Uri::from_str("file://").unwrap();
405        let path = uri.to_file_path();
406        assert!(path.is_none());
407    }
408}