1use regex::Regex;
22use std::borrow::Cow;
23use std::sync::LazyLock;
24
25#[cfg(feature = "html")]
26use crate::tree::LinkLocation;
27
28pub const URL_SCHEMES: [&str; 19] = [
29 "blob:",
30 "chrome-extension://",
31 "chrome://",
32 "content://",
33 "dns:",
34 "feed:",
35 "file://",
36 "ftp://",
37 "git://",
38 "gopher://",
39 "http://",
40 "https://",
41 "irc6://",
42 "irc://",
43 "ircs://",
44 "mailto:",
45 "resource://",
46 "rtmp://",
47 "sftp://",
48];
49
50pub fn is_url(url: &str) -> bool {
51 for scheme in &URL_SCHEMES {
53 if url.starts_with(scheme) {
54 return true;
55 }
56 }
57
58 false
59}
60
61pub fn dangerous_scheme(url: &str) -> bool {
71 static SCHEME_REGEX: LazyLock<Regex> =
72 LazyLock::new(|| Regex::new(r"^[\w\-]+$").unwrap());
73
74 if url.starts_with('/') {
76 return false;
77 }
78
79 url.split_once(':')
81 .map(|(scheme, _)| {
82 if !SCHEME_REGEX.is_match(scheme) {
83 return true;
85 }
86
87 scheme.eq_ignore_ascii_case("javascript")
90 || scheme.eq_ignore_ascii_case("data")
91 })
92 .unwrap_or(false)
93}
94
95#[cfg(feature = "html")]
96pub fn normalize_link<'a>(
97 link: &'a LinkLocation<'a>,
98 helper: &dyn BuildSiteUrl,
99) -> Cow<'a, str> {
100 match link {
101 LinkLocation::Url(url) => normalize_href(url, None),
102 LinkLocation::Page(page_ref) => {
103 let (site, page, extra) = page_ref.fields();
104 match site {
105 Some(site) => Cow::Owned(helper.build_url(site, page, extra)),
106 None => normalize_href(page, extra),
107 }
108 }
109 }
110}
111
112pub fn normalize_href<'a>(url: &'a str, extra: Option<&'a str>) -> Cow<'a, str> {
122 if url == "javascript:;" {
123 trace!("Leaving no-op link as-is");
124 Cow::Borrowed(url)
125 } else if is_url(url) || url.starts_with('/') || url.starts_with('#') {
126 match extra {
127 Some(extra) => {
128 trace!("Leaving safe URL with extra as-is: {url}{extra}");
129 Cow::Owned(format!("{url}{extra}"))
130 }
131 None => {
132 trace!("Leaving safe URL as-is: {url}");
133 Cow::Borrowed(url)
134 }
135 }
136 } else if dangerous_scheme(url) {
137 warn!("Attempt to pass in dangerous URL: {url}");
138 Cow::Borrowed("#invalid-url")
139 } else {
140 let extra = extra.unwrap_or("");
143 trace!("Adding leading slash to URL: {url}{extra}");
144 Cow::Owned(format!("/{url}{extra}"))
145 }
146}
147
148pub trait BuildSiteUrl {
149 fn build_url(&self, site: &str, path: &str, extra: Option<&str>) -> String;
150}
151
152#[test]
153fn detect_dangerous_schemes() {
154 macro_rules! test {
155 ($input:expr, $result:expr $(,)?) => {
156 assert_eq!(
157 dangerous_scheme($input),
158 $result,
159 "For input {:?}, dangerous scheme detection failed",
160 $input,
161 )
162 };
163 }
164
165 test!("http://example.com/", false);
166 test!("https://example.com/", false);
167 test!("irc://irc.scpwiki.com", false);
168 test!("javascript:alert(1)", true);
169 test!("JAVASCRIPT:alert(1)", true);
170 test!(" javascript:alert(1)", true);
171 test!("java\nscript:alert(1)", true);
172 test!("javascript\t:alert(1)", true);
173 test!("wtf$1:foo", true);
174 test!("JaVaScRiPt:alert(document.cookie)", true);
175 test!("data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==", true);
176 test!("data:text/javascript,alert(1)", true);
177 test!("data:text/html,<script>alert('XSS');</script>", true);
178 test!("DATA:text/html,<script>alert('XSS');</script>", true);
179 test!("/page", false);
180 test!("/page#target", false);
181 test!("/page/edit", false);
182 test!("/page/edit#target", false);
183 test!("/category:page", false);
184 test!("/category:page#target", false);
185 test!("/category:page/edit", false);
186 test!("/category:page/edit#target", false);
187}
188
189#[test]
190fn test_normalize_href() {
191 macro_rules! test {
192 ($input:expr => $expected:expr $(,)?) => {{
193 let actual = normalize_href($input, None);
194 assert_eq!(
195 actual.as_ref(),
196 $expected,
197 "For input {:?}, normalize_href() doesn't match expected",
198 $input,
199 );
200 }};
201
202 ($url_input:expr, $extra_input:expr => $expected:expr $(,)?) => {{
203 let actual = normalize_href($url_input, Some($extra_input));
204 assert_eq!(
205 actual.as_ref(),
206 $expected,
207 "For input {:?} / {:?}, normalize_href() doesn't match expected",
208 $url_input,
209 $extra_input,
210 );
211 }};
212
213 ($input:expr) => {
215 test!($input => $input)
216 };
217 }
218
219 test!("#");
221 test!("#target");
222 test!("#edit-area");
223 test!("javascript:;");
224 test!("http://example.net");
225 test!("https://example.net");
226 test!("irc://irc.scpwiki.com");
227 test!("sftp://ftp.example.com/upload");
228
229 test!("javascript:alert(1)" => "#invalid-url");
231 test!(
232 "data:text/html,<script>alert('XSS')</script>" => "#invalid-url",
233 );
234
235 test!("/page");
237 test!("/page", "#target" => "/page#target");
238 test!("/page", "/edit" => "/page/edit");
239 test!("page", "/edit#target" => "/page/edit#target");
240 test!("/category:page");
241 test!("/category:page", "#target" => "/category:page#target");
242 test!("/category:page", "/edit" => "/category:page/edit");
243 test!("/category:page", "/edit#target" => "/category:page/edit#target");
244
245 test!("some-page" => "/some-page");
247 test!("some-page#target" => "/some-page#target");
248 test!("system:some-page" => "/system:some-page");
249 test!("system:some-page#target" => "/system:some-page#target");
250}