1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/*!
Purpose of this crate - URL normalization [WHATWG RFC] (https://tools.ietf.org/html/rfc3986#section-6)
*/
extern crate url;
use url::Url;
use std::collections::BinaryHeap;
use std::cmp::Ord;
use std::cmp::Ordering;

#[derive(PartialEq,Eq)]
struct Pair {
	key: String,
	value: String,
}

impl PartialOrd for Pair {
	fn partial_cmp(&self, other: &Pair) -> Option<Ordering> {
		return Some(cmp_string(&self.key, &other.key));
	}
}

impl Ord for Pair {
	fn cmp(&self, other: &Self) -> Ordering {
		return cmp_string(&self.key, &other.key);
	}
}

fn cmp_string(a:&String, b: &String) -> Ordering {
	let result = a.len().cmp(&b.len());
	match result {
		Ordering::Less => Ordering::Less,
		Ordering::Greater => Ordering::Greater,
		Ordering::Equal => {
			return a.cmp(b);
		}
	}
}

/// Normalizes URL
pub fn normalize(url: Url) -> Result<Url,()> {
	let url = normalize_query(url);
	let url = normalize_hash(url);
	return normalize_scheme(url);
}

/// Sorts url query in alphabet order.
pub fn normalize_query(mut url: Url) -> Url {
	let query_pairs: BinaryHeap<Pair> = url.query_pairs().into_owned().map(
		|(key,value)| {
			Pair {
				key: key,
				value: value,
			}
		}
	).collect();
	url.query_pairs_mut().clear();
	let mut has_query_pairs = false;
	for pair in query_pairs.iter().rev() {
		url.query_pairs_mut().append_pair(&pair.key, &pair.value);
		has_query_pairs = true
	}
	if !has_query_pairs {
		url.set_query(None);
	}
	return url;
}

/// Removes hash part from url
pub fn normalize_hash(mut url: Url) -> Url {
	url.set_fragment(None);
	return url;
}

/// Changes encrypted scheme to unencrypted
pub fn normalize_scheme(mut url: Url) -> Result<Url,()> {
	let new_scheme;
	{
		let scheme = url.scheme();
		new_scheme = match scheme {
			"https" => Some("http"),
			"shttp" => Some("http"),
			"sftp" => Some("ftp"),
			"wss" => Some("ws"),
			_ => None,
		};
	}
	if let Some(scheme) = new_scheme {
		url.set_scheme(scheme)?;
	}
	return Ok(url);
}

#[cfg(test)]
mod tests {
	use super::*;
    #[test]
    fn test_normalize_query() {
        let url = Url::parse("https://example.com?c=1&q[]=99&q[5]=44&b=2&a=3#hash").unwrap();
        let url = normalize_query(url);
        assert_eq!(url.as_str(), "https://example.com/?a=3&b=2&c=1&q%5B%5D=99&q%5B5%5D=44#hash");
    }

    #[test]
    fn test_remove_hash() {
    	let url = Url::parse("https://example.com?c=1&q[]=99&q[5]=44&b=2&a=3#hash").unwrap();
    	let url = normalize_hash(url);
    	assert_eq!(url.as_str(),"https://example.com/?c=1&q[]=99&q[5]=44&b=2&a=3");
    }

    #[test]
    fn test_normalize_scheme() {
    	let url = Url::parse("https://example.com?c=1&q[]=99&q[5]=44&b=2&a=3#hash").unwrap();
    	let url = normalize_scheme(url).unwrap();
    	assert_eq!(url.as_str(),"http://example.com/?c=1&q[]=99&q[5]=44&b=2&a=3#hash");
    }

    fn process_normalize_scheme(secure_scheme: &str, scheme: &str) {
    	let url = Url::parse(&format!("{}://example.com/",secure_scheme)).unwrap();
    	let url = normalize_scheme(url).unwrap();
    	assert_eq!(url.as_str(), &format!("{}://example.com/", scheme));
    }

    #[test]
    fn test_normalize_scheme_https() {
    	process_normalize_scheme("https", "http");
    }

    #[test]
    fn test_normalize_scheme_wss() {
    	process_normalize_scheme("wss", "ws");
    }

    #[test]
    fn test_normalize_scheme_sftp() {
    	process_normalize_scheme("sftp", "ftp");
    }
}