url_normalizer/
lib.rs

1/*!
2Purpose of this crate - URL normalization [WHATWG RFC] (https://tools.ietf.org/html/rfc3986#section-6)
3*/
4extern crate url;
5use url::Url;
6use std::collections::BinaryHeap;
7use std::cmp::Ord;
8use std::cmp::Ordering;
9
10#[derive(PartialEq,Eq)]
11struct Pair {
12	key: String,
13	value: String,
14}
15
16impl PartialOrd for Pair {
17	fn partial_cmp(&self, other: &Pair) -> Option<Ordering> {
18		return Some(cmp_string(&self.key, &other.key));
19	}
20}
21
22impl Ord for Pair {
23	fn cmp(&self, other: &Self) -> Ordering {
24		return cmp_string(&self.key, &other.key);
25	}
26}
27
28fn cmp_string(a:&String, b: &String) -> Ordering {
29	let result = a.len().cmp(&b.len());
30	match result {
31		Ordering::Less => Ordering::Less,
32		Ordering::Greater => Ordering::Greater,
33		Ordering::Equal => {
34			return a.cmp(b);
35		}
36	}
37}
38
39/// Normalizes URL
40pub fn normalize(url: Url) -> Result<Url,()> {
41	let url = normalize_query(url);
42	let url = normalize_hash(url);
43	return normalize_scheme(url);
44}
45
46/// Sorts url query in alphabet order.
47pub fn normalize_query(mut url: Url) -> Url {
48	let query_pairs: BinaryHeap<Pair> = url.query_pairs().into_owned().map(
49		|(key,value)| {
50			Pair {
51				key: key,
52				value: value,
53			}
54		}
55	).collect();
56	url.query_pairs_mut().clear();
57	let mut has_query_pairs = false;
58	for pair in query_pairs.iter().rev() {
59		url.query_pairs_mut().append_pair(&pair.key, &pair.value);
60		has_query_pairs = true
61	}
62	if !has_query_pairs {
63		url.set_query(None);
64	}
65	return url;
66}
67
68/// Removes hash part from url
69pub fn normalize_hash(mut url: Url) -> Url {
70	url.set_fragment(None);
71	return url;
72}
73
74/// Changes encrypted scheme to unencrypted
75pub fn normalize_scheme(mut url: Url) -> Result<Url,()> {
76	let new_scheme;
77	{
78		let scheme = url.scheme();
79		new_scheme = match scheme {
80			"https" => Some("http"),
81			"shttp" => Some("http"),
82			"sftp" => Some("ftp"),
83			"wss" => Some("ws"),
84			_ => None,
85		};
86	}
87	if let Some(scheme) = new_scheme {
88		url.set_scheme(scheme)?;
89	}
90	return Ok(url);
91}
92
93#[cfg(test)]
94mod tests {
95	use super::*;
96    #[test]
97    fn test_normalize_query() {
98        let url = Url::parse("https://example.com?c=1&q[]=99&q[5]=44&b=2&a=3#hash").unwrap();
99        let url = normalize_query(url);
100        assert_eq!(url.as_str(), "https://example.com/?a=3&b=2&c=1&q%5B%5D=99&q%5B5%5D=44#hash");
101    }
102
103    #[test]
104    fn test_remove_hash() {
105    	let url = Url::parse("https://example.com?c=1&q[]=99&q[5]=44&b=2&a=3#hash").unwrap();
106    	let url = normalize_hash(url);
107    	assert_eq!(url.as_str(),"https://example.com/?c=1&q[]=99&q[5]=44&b=2&a=3");
108    }
109
110    #[test]
111    fn test_normalize_scheme() {
112    	let url = Url::parse("https://example.com?c=1&q[]=99&q[5]=44&b=2&a=3#hash").unwrap();
113    	let url = normalize_scheme(url).unwrap();
114    	assert_eq!(url.as_str(),"http://example.com/?c=1&q[]=99&q[5]=44&b=2&a=3#hash");
115    }
116
117    fn process_normalize_scheme(secure_scheme: &str, scheme: &str) {
118    	let url = Url::parse(&format!("{}://example.com/",secure_scheme)).unwrap();
119    	let url = normalize_scheme(url).unwrap();
120    	assert_eq!(url.as_str(), &format!("{}://example.com/", scheme));
121    }
122
123    #[test]
124    fn test_normalize_scheme_https() {
125    	process_normalize_scheme("https", "http");
126    }
127
128    #[test]
129    fn test_normalize_scheme_wss() {
130    	process_normalize_scheme("wss", "ws");
131    }
132
133    #[test]
134    fn test_normalize_scheme_sftp() {
135    	process_normalize_scheme("sftp", "ftp");
136    }
137}