1use lazy_static::lazy_static;
2use regex::Regex;
3use url::{ParseError, Url};
4
5fn normalize_surt(surt: &str) -> String {
6 let mut surt = surt.to_string();
7
8 surt = url_escape::decode(&surt).to_string();
10
11 surt = surt.replace(' ', "%20");
13
14 let query_index = surt.find('?').unwrap_or(0);
15
16 if query_index == 0 && surt.ends_with('/') && !surt.ends_with(")/") {
18 surt.pop();
19 }
20
21 let start = &mut surt[..query_index].to_string();
24 if start.ends_with('/') && !start.ends_with(")/") {
25 start.pop();
26 }
27 surt = format!("{}{}", start, &surt[query_index..]);
28
29 surt
30}
31
32lazy_static! {
33 static ref SESSION_REGEXP: Regex = Regex::new(r"(?i)(&|^)(?:jsessionid=[0-9a-z$]{10,}|sessionid=[0-9a-z]{16,}|phpsessid=[0-9a-z]{16,}|sid=[0-9a-z]{16,}|aspsessionid[a-z]{8}=[0-9a-z]{16,}|cfid=[0-9]+&cftoken=[0-9a-z-]+)(&|$)").unwrap();
34 static ref WWW_REGEXP: Regex = Regex::new(r"^www(\w?)+\.(.*\.+)").unwrap();
35}
36
37fn normalize_url(mut parsed: Url) -> String {
38 if parsed.query().is_some() {
40 let mut query = parsed.query().unwrap().split('&').collect::<Vec<&str>>();
41 query.sort();
42 let mut query = query.join("&").to_lowercase();
43 query = SESSION_REGEXP.replace_all(&query, "$1$3").to_string();
44 parsed.set_query(Some(&query));
45 }
46
47 if parsed.host_str().is_some() {
48 let host_str = parsed.host_str().unwrap();
50 let host_str = WWW_REGEXP.replace(host_str, "${2}").to_string();
51
52 let host_str = host_str.to_lowercase();
54
55 parsed.set_host(Some(&host_str)).unwrap();
56 }
57
58 let mut url = parsed.to_string();
59
60 if url.ends_with('/') && parsed.path() != "/" {
62 url.pop();
63 }
64
65 if url.ends_with('?') {
67 url.pop();
68 }
69
70 url
71}
72
73pub fn generate_surt(url: &str) -> Result<String, ParseError> {
74 let mut parsed = Url::parse(url)?;
75 parsed = Url::parse(&normalize_url(parsed))?;
76
77 let scheme = parsed.scheme();
78 match scheme == "https" || scheme == "http" {
79 true => scheme,
80 _ => return Err(ParseError::RelativeUrlWithoutBase),
81 };
82
83 if parsed.host_str().is_none() {
84 return Err(ParseError::RelativeUrlWithoutBase);
85 }
86 let host_str = parsed.host_str().unwrap().to_lowercase();
87 let mut host_split = host_str.split('.').collect::<Vec<&str>>();
88 host_split.reverse();
89 let mut surt = host_split.join(",");
90
91 if parsed.port().is_some() {
92 let port = parsed.port().unwrap();
93 surt += &format!(":{}", port);
94 }
95
96 if parsed.path() != "" {
97 let path = parsed.path().to_lowercase();
98 surt += &format!("){}", path);
99 }
100
101 if parsed.query().is_some() {
102 let query = parsed.query().unwrap().to_lowercase();
103 surt += &format!("?{}", query);
104 }
105
106 if parsed.fragment().is_some() {
107 let fragment = parsed.fragment().unwrap().to_lowercase();
108 surt += &format!("#{}", fragment);
109 }
110
111 surt = normalize_surt(&surt);
112
113 Ok(surt)
114}
115
116#[cfg(test)]
117mod tests {
118 use super::*;
119 use serde_json::Value;
120 use std::collections::HashMap;
121 use std::fs::File;
122 use std::io::BufReader;
123
124 fn load_test_data() -> HashMap<String, HashMap<String, String>> {
125 let file = File::open("./test_data/surt.json").unwrap();
126 let reader = BufReader::new(file);
127 let v: Value = serde_json::from_reader(reader).unwrap();
128 v.as_object()
129 .unwrap()
130 .iter()
131 .map(|(k, v)| {
132 let inner_map = v
133 .as_object()
134 .unwrap()
135 .iter()
136 .map(|(k, v)| (k.clone(), v.as_str().unwrap().to_string()))
137 .collect();
138 (k.clone(), inner_map)
139 })
140 .collect()
141 }
142
143 #[test]
144 fn test_surt() {
145 let test_data = load_test_data();
146
147 for (section, examples) in test_data {
148 if !section.to_lowercase().contains("surt") {
150 continue;
151 }
152 println!("Testing section: {}", section);
153
154 for (input, expected) in examples {
155 println!("Testing example: {}", input);
156 let surt = generate_surt(&input).unwrap();
157 assert_eq!(surt, expected);
158 }
159 }
160 }
161
162 #[test]
163 fn test_url_normalization() {
164 let test_data = load_test_data();
165
166 for (section, examples) in test_data {
167 if !section.to_lowercase().contains("url_normalization") {
169 continue;
170 }
171 println!("Testing section: {}", section);
172
173 for (input, expected) in examples {
174 println!("Testing example: {}", input);
175 let parsed = Url::parse(&input);
176 println!("parsed: {:?}", parsed);
177 let url = normalize_url(Url::parse(&input).unwrap());
178 assert_eq!(url, expected);
179 }
180 }
181 }
182
183 #[test]
184 fn test_generate_surt_with_valid_url() {
185 let url = "http://example.com/path?query=value#fragment";
186 let expected = "com,example)/path?query=value#fragment";
187 assert_eq!(generate_surt(url).unwrap(), expected);
188 }
189
190 #[test]
191 fn test_generate_surt_with_url_without_scheme() {
192 let url = "example.com";
193 assert!(generate_surt(url).is_err());
194 }
195
196 #[test]
197 fn test_generate_surt_with_relative_url() {
198 let url = "/path";
199 assert!(generate_surt(url).is_err());
200 }
201
202 #[test]
203 fn test_generate_surt_with_url_without_host() {
204 let url = "http://";
205 assert!(generate_surt(url).is_err());
206 }
207
208 #[test]
209 fn test_generate_surt_with_url_with_port() {
210 let url = "http://example.com:8080";
211 let expected = "com,example:8080)/";
212 assert_eq!(generate_surt(url).unwrap(), expected);
213 }
214
215 #[test]
216 fn test_generate_surt_with_url_with_query() {
217 let url = "http://example.com?query=value";
218 let expected = "com,example)/?query=value";
219 assert_eq!(generate_surt(url).unwrap(), expected);
220 }
221
222 #[test]
223 fn test_generate_surt_with_url_with_query_and_trailing_slash_after_path() {
224 let url = "http://example.com/foo/bar/?query=value";
225 let expected = "com,example)/foo/bar?query=value";
226 assert_eq!(generate_surt(url).unwrap(), expected);
227 }
228
229 #[test]
230 fn test_generate_surt_with_url_with_fragment() {
231 let url = "http://example.com#fragment";
232 let expected = "com,example)/#fragment";
233 assert_eq!(generate_surt(url).unwrap(), expected);
234 }
235
236 #[test]
237 fn test_generate_surt_with_url_with_uppercase() {
238 let url = "http://EXAMPLE.COM/PATH?QUERY=VALUE#FRAGMENT";
239 let expected = "com,example)/path?query=value#fragment";
240 assert_eq!(generate_surt(url).unwrap(), expected);
241 }
242
243 #[test]
244 fn test_generate_surt_with_url_with_space() {
245 let url = "http://example.com/path with space";
246 let expected = "com,example)/path%20with%20space";
247 assert_eq!(generate_surt(url).unwrap(), expected);
248 }
249
250 #[test]
251 fn test_generate_surt_with_url_with_trailing_slash() {
252 let url = "http://example.com/";
253 let expected = "com,example)/";
254 assert_eq!(generate_surt(url).unwrap(), expected);
255 }
256
257 #[test]
258 fn test_generate_surt_with_url_with_trailing_slash_after_path() {
259 let url = "http://example.com/foo/bar/";
260 let expected = "com,example)/foo/bar";
261 assert_eq!(generate_surt(url).unwrap(), expected);
262 }
263
264 #[test]
265 fn test_generate_surt_with_url_with_www_subdomain() {
266 let url = "http://www.example.com";
267 let expected = "com,example)/";
268 assert_eq!(generate_surt(url).unwrap(), expected);
269 }
270
271 #[test]
272 fn test_generate_surt_with_ftp_url() {
273 let url = "ftp://www.example.com";
274 assert!(generate_surt(url).is_err());
275 }
276
277 #[test]
278 fn test_normalize_url_with_www_subdomain_and_https() {
279 let url = Url::parse("https://www.example.com").unwrap();
280 let expected = "https://example.com/";
281 assert_eq!(normalize_url(url), expected);
282 }
283
284 #[test]
285 fn test_normalize_surt_root_with_trailing_slash() {
286 let url = "com,example)/";
287 let expected = "com,example)/";
288 assert_eq!(normalize_surt(url), expected);
289 }
290
291 #[test]
292 fn test_normalize_surt_with_trailing_slash() {
293 let url = "com,example)/foo/bar/";
294 let expected = "com,example)/foo/bar";
295 assert_eq!(normalize_surt(url), expected);
296 }
297}