markdown_that_url/
url.rs

1use std::fmt::{Display, Write};
2
3#[derive(Debug, Default, PartialEq, Eq, Clone)]
4/// `Url` object is created and returned by the [parse_url](crate::parse_url) function.
5///
6pub struct Url {
7    /// The `protocol` property identifies the URL's protocol scheme.
8    ///
9    /// For example: `"http:"`.
10    pub protocol: Option<String>,
11
12    /// The `slashes` property is a `boolean` with a value of `true` if two ASCII
13    /// forward-slash characters (`/`) are required following the colon in the
14    /// `protocol`.
15    pub slashes: bool,
16
17    /// The `auth` property is the username and password portion of the URL, also
18    /// referred to as _userinfo_. This string subset follows the `protocol` and
19    /// double slashes (if present) and precedes the `host` component, delimited by `@`.
20    /// The string is either the username, or it is the username and password separated
21    /// by `:`.
22    ///
23    /// For example: `"user:pass"`.
24    pub auth: Option<String>,
25
26    /// The `hostname` property is the host name portion of the `host` component
27    /// _without_ the `port` included.
28    ///
29    /// For example: `"sub.example.com"`.
30    pub hostname: Option<String>,
31
32    /// The `port` property is the numeric port portion of the `host` component.
33    ///
34    /// For example: `"8080"`.
35    pub port: Option<String>,
36
37    /// The `pathname` property consists of the entire path section of the URL. This
38    /// is everything following the `host` (including the `port`) and before the start
39    /// of the `query` or `hash` components, delimited by either the ASCII question
40    /// mark (`?`) or hash (`#`) characters.
41    ///
42    /// For example: `'/p/a/t/h'`.
43    ///
44    /// No decoding of the path string is performed.
45    pub pathname: Option<String>,
46
47    /// The `search` property consists of the entire "query string" portion of the
48    /// URL, including the leading ASCII question mark (`?`) character.
49    ///
50    /// For example: `'?query=string'`.
51    ///
52    /// No decoding of the query string is performed.
53    pub search: Option<String>,
54
55    /// The `hash` property is the fragment identifier portion of the URL including the
56    /// leading `#` character.
57    ///
58    /// For example: `"#hash"`.
59    pub hash: Option<String>,
60}
61
62// Return a formatted URL string derived from [Url] object.
63//
64// It simply concatenates whatever is in the input, and does no validation
65// or escaping of any kind.
66//
67// Round-trip is guaranteed, so `format(parse(str))` always equals to `str`,
68// but if you write malformed data to `url`, you may get broken url as the output.
69//
70impl Display for Url {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        if let Some(s) = &self.protocol {
73            f.write_str(s)?;
74        }
75
76        if self.slashes {
77            f.write_str("//")?;
78        }
79
80        if let Some(s) = &self.auth {
81            f.write_str(s)?;
82            f.write_char('@')?;
83        }
84
85        if let Some(s) = &self.hostname {
86            if s.contains(':') {
87                // ipv6 address
88                f.write_char('[')?;
89                f.write_str(s)?;
90                f.write_char(']')?;
91            } else {
92                f.write_str(s)?;
93            }
94        }
95
96        if let Some(s) = &self.port {
97            f.write_char(':')?;
98            f.write_str(s)?;
99        }
100
101        if let Some(s) = &self.pathname {
102            f.write_str(s)?;
103        }
104
105        if let Some(s) = &self.search {
106            f.write_str(s)?;
107        }
108
109        if let Some(s) = &self.hash {
110            f.write_str(s)?;
111        }
112
113        Ok(())
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use crate::parse_url;
120
121    const FIXTURES: [&str; 87] = [
122        "//some_path",
123        "HTTP://www.example.com/",
124        "HTTP://www.example.com",
125        "http://www.ExAmPlE.com/",
126        "http://user:pw@www.ExAmPlE.com/",
127        "http://USER:PW@www.ExAmPlE.com/",
128        "http://user@www.example.com/",
129        "http://user%3Apw@www.example.com/",
130        "http://x.com/path?that\'s#all, folks",
131        "HTTP://X.COM/Y",
132        "http://x.y.com+a/b/c",
133        "HtTp://x.y.cOm;a/b/c?d=e#f g<h>i",
134        "HtTp://x.y.cOm;A/b/c?d=e#f g<h>i",
135        "http://x...y...#p",
136        "http://x/p/\"quoted\"",
137        "<http://goo.corn/bread> Is a URL!",
138        "http://www.narwhaljs.org/blog/categories?id=news",
139        "http://mt0.google.com/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s=",
140        "http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=",
141        "http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=",
142        "file:///etc/passwd",
143        "file://localhost/etc/passwd",
144        "file://foo/etc/passwd",
145        "file:///etc/node/",
146        "file://localhost/etc/node/",
147        "file://foo/etc/node/",
148        "http:/baz/../foo/bar",
149        "http://user:pass@example.com:8000/foo/bar?baz=quux#frag",
150        "//user:pass@example.com:8000/foo/bar?baz=quux#frag",
151        "/foo/bar?baz=quux#frag",
152        "http:/foo/bar?baz=quux#frag",
153        "mailto:foo@bar.com?subject=hello",
154        "javascript:alert(\'hello\');",
155        "xmpp:isaacschlueter@jabber.org",
156        "http://atpass:foo%40bar@127.0.0.1:8080/path?search=foo#bar",
157        "svn+ssh://foo/bar",
158        "dash-test://foo/bar",
159        "dash-test:foo/bar",
160        "dot.test://foo/bar",
161        "dot.test:foo/bar",
162        "http://www.日本語.com/",
163        "http://example.Bücher.com/",
164        "http://www.Äffchen.com/",
165        "http://www.Äffchen.cOm;A/b/c?d=e#f g<h>i",
166        "http://SÉLIER.COM/",
167        "http://ﻞﻴﻬﻣﺎﺒﺘﻜﻠﻣﻮﺸﻋﺮﺒﻳ؟.ﻱ؟/",
168        "http://➡.ws/➡",
169        "http://bucket_name.s3.amazonaws.com/image.jpg",
170        "git+http://github.com/joyent/node.git",
171        "local1@domain1",
172        "www.example.com",
173        "[fe80::1]",
174        "coap://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]",
175        "coap://[1080:0:0:0:8:800:200C:417A]:61616/",
176        "http://user:password@[3ffe:2a00:100:7031::1]:8080",
177        "coap://u:p@[::192.9.5.5]:61616/.well-known/r?n=Temperature",
178        "http://example.com:",
179        "http://example.com:/a/b.html",
180        "http://example.com:?a=b",
181        "http://example.com:#abc",
182        "http://[fe80::1]:/a/b?a=b#abc",
183        "http://-lovemonsterz.tumblr.com/rss",
184        "http://-lovemonsterz.tumblr.com:80/rss",
185        "http://user:pass@-lovemonsterz.tumblr.com/rss",
186        "http://user:pass@-lovemonsterz.tumblr.com:80/rss",
187        "http://_jabber._tcp.google.com/test",
188        "http://user:pass@_jabber._tcp.google.com/test",
189        "http://_jabber._tcp.google.com:80/test",
190        "http://user:pass@_jabber._tcp.google.com:80/test",
191        "http://x:1/' <>\"`/{}|\\^~`/",
192        "http://a@b@c/",
193        "http://a@b?@c",
194        "http://a\r\" \t\n<'b:b@c\r\nd/e?f",
195        "git+ssh://git@github.com:npm/npm",
196        "http://example.com?foo=bar#frag",
197        "http://example.com?foo=@bar#frag",
198        "http://example.com?foo=/bar/#frag",
199        "http://example.com?foo=?bar/#frag",
200        "http://example.com#frag=?bar/#frag",
201        "http://google.com\" onload=\"alert(42)/",
202        "http://a.com/a/b/c?s#h",
203        "http://atpass:foo%40bar@127.0.0.1/",
204        "http://atslash%2F%40:%2F%40@foo/",
205        "coap:u:p@[::1]:61616/.well-known/r?n=Temperature",
206        "coap:[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:61616/s/stopButton",
207        "http://ex.com/foo%3F100%m%23r?abc=the%231?&foo=bar#frag",
208        "http://ex.com/fooA100%mBr?abc=the%231?&foo=bar#frag",
209    ];
210
211    #[test]
212    fn round_trip() {
213        for str in FIXTURES {
214            let url = parse_url(str);
215            assert_eq!(url.to_string(), str);
216        }
217    }
218}