unrestrictive_url/
lib.rs

1use std::{fmt, str::Split};
2pub use url::*;
3
4/// A small wrapper around [`url::Url`] that allows free URL modifications.
5///
6/// Since the [`url`] crate strictly follows the [WHATWG](https://url.spec.whatwg.org/) specification, some operations are deemed illegal and can't be performed with the crate. This crate allows such operations.
7///
8/// # Example
9///
10/// ```rust
11/// use unrestrictive_url::{Url, UnrestrictiveUrl};
12///
13/// let url = Url::parse("https://github.com").unwrap();
14/// let mut url = UnrestrictiveUrl::from(&url);
15/// url.scheme = Some("jojo");
16///
17/// assert_eq!("jojo://github.com/", url.to_string());
18/// ```
19pub struct UnrestrictiveUrl<'a> {
20    pub fragment: Option<&'a str>,
21    pub host: Option<url::Host<&'a str>>,
22    pub password: Option<&'a str>,
23    pub path: Option<&'a str>,
24    pub port: Option<u16>,
25    pub query: Option<&'a str>,
26    pub scheme: Option<&'a str>,
27    pub username: Option<&'a str>,
28    // Probably not needed but I kept it in to make it fully compliant with the specification's
29    // serialization description.
30    cannot_be_a_base: bool,
31}
32
33impl<'a> UnrestrictiveUrl<'a> {
34    pub fn path_segments(&self) -> Option<Split<'a, char>> {
35        self.path.and_then(|v| {
36            if v.starts_with('/') {
37                Some(v[1..].split('/'))
38            } else {
39                None
40            }
41        })
42    }
43}
44
45impl fmt::Display for UnrestrictiveUrl<'_> {
46    // https://url.spec.whatwg.org/#url-serializing
47    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48        // 1)
49        if let Some(scheme) = self.scheme {
50            // In reality URLs have to have a schema. But for special use-cases like URL
51            // truncation, an empty schema might be desireable.
52            write!(f, "{}:", scheme)?;
53        }
54
55        // 2)
56        if self.host.is_some() {
57            // 2.1)
58            // XXX: special case for no scheme. In these cases, a double slash is probably
59            // not wanted. The `url` crate won't parse URLs starting with a double slash
60            // anyway without having a base URL specified, which this crate does not allow
61            // to do (and probably won't ever).
62            if self.scheme.is_some() {
63                write!(f, "//")?;
64            }
65
66            if let Some(username) = self.username {
67                // 2.2.1)
68                write!(f, "{}", username)?;
69                if let Some(password) = self.password {
70                    if !password.is_empty() {
71                        // 2.2.2)
72                        write!(f, ":{}", password)?;
73                    }
74                }
75
76                // 2.2.3)
77                write!(f, "@")?;
78            }
79
80            // 2.3)
81            match &self.host {
82                Some(host) => match host {
83                    url::Host::Domain(v) => write!(f, "{}", v)?,
84                    url::Host::Ipv4(v) => write!(f, "{}", v)?,
85                    url::Host::Ipv6(v) => write!(f, "[{}]", v)?,
86                },
87                None => {}
88            }
89
90            // 2.4)
91            if let Some(port) = self.port {
92                write!(f, ":{}", port)?;
93            }
94        }
95
96        // 3)
97        if self.cannot_be_a_base {
98            let first_path_segment = self.path_segments().and_then(|mut v| v.next());
99            if let Some(segment) = first_path_segment {
100                write!(f, "{}", segment)?;
101            }
102        } else {
103            // 4)
104            if let Some(path) = self.path {
105                // Special case '/' only.
106                if path == "/" {
107                    write!(f, "/")?;
108                } else {
109                    let path_segments = path.split('/').collect::<Vec<_>>();
110                    if self.host.is_none() && path_segments.len() > 1 && path_segments[0] == "" {
111                        write!(f, "/.")?;
112                    }
113
114                    for segment in path_segments {
115                        // Rust's `split` functions returns empty strings in cases where two
116                        // separators are next to each other. For a path `/other` splitted at `/`
117                        // this means that we get two values back, `["", "other"]`. This would get
118                        // serialized into `//other`. By ignoring empty strings we correctly
119                        // serialize the URL again.
120                        if !segment.is_empty() {
121                            write!(f, "/{}", segment)?;
122                        }
123                    }
124                }
125            }
126        }
127
128        // 5)
129        if let Some(query) = self.query {
130            write!(f, "?{}", query)?;
131        }
132
133        // 6)
134        if let Some(fragment) = self.fragment {
135            write!(f, "#{}", fragment)?;
136        }
137
138        Ok(())
139    }
140}
141
142impl<'a> From<&'a url::Url> for UnrestrictiveUrl<'a> {
143    fn from(url: &'a url::Url) -> Self {
144        let username = if url.username().is_empty() {
145            None
146        } else {
147            Some(url.username())
148        };
149
150        Self {
151            fragment: url.fragment(),
152            host: url.host(),
153            password: url.password(),
154            path: url.path().into(),
155            port: url.port(),
156            query: url.query(),
157            scheme: url.scheme().into(),
158            username,
159            cannot_be_a_base: url.cannot_be_a_base(),
160        }
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::{UnrestrictiveUrl, Url};
167
168    #[test]
169    fn test_arbitrary_scheme() {
170        let url = "https://github.com";
171        let url = Url::parse(url).unwrap();
172        let mut url: UnrestrictiveUrl = (&url).into();
173        url.scheme = Some("github");
174
175        assert_eq!("github://github.com/", url.to_string());
176    }
177
178    #[test]
179    fn test_remove_scheme() {
180        let url = "https://github.com";
181        let url = Url::parse(url).unwrap();
182        let mut url: UnrestrictiveUrl = (&url).into();
183        url.scheme = None;
184
185        assert_eq!("github.com/", url.to_string());
186    }
187
188    #[test]
189    fn test_remove_fragment() {
190        let url = "https://github.com#fragment";
191        let url = Url::parse(url).unwrap();
192        let mut url: UnrestrictiveUrl = (&url).into();
193        url.fragment = None;
194
195        assert_eq!("https://github.com/", url.to_string());
196    }
197
198    #[test]
199    fn test_remove_query() {
200        let url = "https://github.com?q=search&otherstuff=5";
201        let url = Url::parse(url).unwrap();
202        let mut url: UnrestrictiveUrl = (&url).into();
203        url.query = None;
204
205        assert_eq!("https://github.com/", url.to_string());
206    }
207
208    #[test]
209    fn test_remove_password() {
210        let url = "https://user:pw@github.com";
211        let url = Url::parse(url).unwrap();
212        let mut url: UnrestrictiveUrl = (&url).into();
213        url.password = None;
214
215        assert_eq!("https://user@github.com/", url.to_string());
216    }
217
218    #[test]
219    fn test_remove_username() {
220        let url = "https://user:pw@github.com";
221        let url = Url::parse(url).unwrap();
222        let mut url: UnrestrictiveUrl = (&url).into();
223        url.username = None;
224
225        assert_eq!("https://github.com/", url.to_string());
226    }
227}