#![warn(
unknown_lints,
// ---------- Stylistic
absolute_paths_not_starting_with_crate,
elided_lifetimes_in_paths,
explicit_outlives_requirements,
macro_use_extern_crate,
nonstandard_style, /* group */
noop_method_call,
rust_2018_idioms,
single_use_lifetimes,
trivial_casts,
trivial_numeric_casts,
// ---------- Future
future_incompatible, /* group */
rust_2021_compatibility, /* group */
// ---------- Public
missing_debug_implementations,
// missing_docs,
unreachable_pub,
// ---------- Unsafe
unsafe_code,
unsafe_op_in_unsafe_fn,
// ---------- Unused
unused, /* group */
)]
#![deny(
// ---------- Public
exported_private_dependencies,
// ---------- Deprecated
anonymous_parameters,
bare_trait_objects,
ellipsis_inclusive_range_patterns,
// ---------- Unsafe
deref_nullptr,
drop_bounds,
dyn_drop,
)]
use std::fmt::Display;
use url::Url;
use ring::digest;
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct UrlHash([u64;4]);
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct UrlShortHash([u64;2]);
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct UrlVeryShortHash(u64);
impl Display for UrlHash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}-{}-{}-{}", self.0[0], self.0[1], self.0[2], self.0[3])
}
}
impl From<Url> for UrlHash {
fn from(value: Url) -> Self {
let url = value.to_string();
let hash = digest::digest(&digest::SHA384, url.as_bytes());
let bytes = hash.as_ref();
assert!(bytes.len() >= digest::SHA256_OUTPUT_LEN);
Self([
u64::from_le_bytes(bytes[0..8].try_into().unwrap()),
u64::from_le_bytes(bytes[8..16].try_into().unwrap()),
u64::from_le_bytes(bytes[16..24].try_into().unwrap()),
u64::from_le_bytes(bytes[24..32].try_into().unwrap()),
])
}
}
impl UrlHash {
#[inline]
pub fn short(&self) -> UrlShortHash {
UrlShortHash(self.0[0..2].try_into().unwrap())
}
#[inline]
pub fn very_short(&self) -> UrlVeryShortHash {
UrlVeryShortHash(self.0[0])
}
#[inline]
pub fn starts_with(&self, short_hash: &UrlShortHash) -> bool {
self.0[0] == short_hash.0[0] && self.0[1] == short_hash.0[1]
}
#[inline]
pub fn starts_with_just(&self, very_short_hash: &UrlVeryShortHash) -> bool {
self.0[0] == very_short_hash.0
}
}
impl Display for UrlShortHash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}-{}", self.0[0], self.0[1])
}
}
impl UrlShortHash {
#[inline]
pub fn very_short(&self) -> UrlVeryShortHash {
UrlVeryShortHash(self.0[0])
}
#[inline]
pub fn starts_with(&self, very_short_hash: &UrlVeryShortHash) -> bool {
self.0[0] == very_short_hash.0
}
}
impl Display for UrlVeryShortHash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hash_url_repeatedly() {
let url = Url::parse("https://doc.rust-lang.org/std/primitive.u8.html#method.to_ascii_lowercase").unwrap();
let first = UrlHash::from(url);
for _ in 1..1000 {
let url = Url::parse("https://doc.rust-lang.org/std/primitive.u8.html#method.to_ascii_lowercase").unwrap();
let again = UrlHash::from(url);
assert_eq!(again, first);
}
}
#[test]
fn test_another_url() {
let url = Url::parse("https://www.google.com/search?q=rust+hash+url+value&rlz=1C5GCEM_enUS1025US1025&oq=rust+hash+url+value&gs_lcrp=EgZjaHJvbWUyBggAEEUYOdIBCDYyOTlqMGo0qAIAsAIA&sourceid=chrome&ie=UTF-8").unwrap();
let _ = UrlHash::from(url);
}
#[test]
fn test_hash_prefixes() {
let url = Url::parse("https://doc.rust-lang.org/std/primitive.u8.html#method.to_ascii_lowercase").unwrap();
let hash = UrlHash::from(url);
println!("{}", hash);
let short = hash.short();
println!("{}", short);
assert!(hash.starts_with(&short));
let very_short = hash.very_short();
println!("{}", very_short);
assert!(short.starts_with(&very_short));
assert!(hash.starts_with_just(&very_short));
}
#[test]
fn test_url_prereq_scheme_case() {
assert_eq!(
Url::parse("hTTpS://example.com/").unwrap().as_str(),
"https://example.com/"
);
}
#[test]
fn test_url_prereq_host_case() {
assert_eq!(
Url::parse("https://Example.COM/").unwrap().as_str(),
"https://example.com/"
);
}
#[test]
fn test_url_prereq_host_punycode() {
assert_eq!(
Url::parse("https://exâmple.com/").unwrap().as_str(),
"https://xn--exmple-xta.com/"
);
assert_eq!(
Url::parse("https://example§.com/").unwrap().as_str(),
"https://xn--example-eja.com/"
);
}
#[test]
fn test_url_prereq_port_default() {
assert_eq!(
Url::parse("http://example.com:80/").unwrap().as_str(),
"http://example.com/"
);
assert_eq!(
Url::parse("https://example.com:443/").unwrap().as_str(),
"https://example.com/"
);
}
#[test]
fn test_url_prereq_path_normalize() {
assert_eq!(
Url::parse("https://example.com/foo/../bar/./baz.jpg").unwrap().as_str(),
"https://example.com/bar/baz.jpg"
);
}
#[test]
fn test_url_prereq_empty_path_slash() {
assert_eq!(
Url::parse("https://example.com").unwrap().as_str(),
"https://example.com/"
);
}
#[test]
fn test_url_prereq_encode_path() {
assert_eq!(
Url::parse("https://example.com/hello world").unwrap().as_str(),
"https://example.com/hello%20world"
);
}
#[test]
fn test_url_prereq_encode_query() {
assert_eq!(
Url::parse("https://example.com/?q=hello world").unwrap().as_str(),
"https://example.com/?q=hello%20world"
);
}
#[test]
fn test_url_prereq_encode_fragment() {
assert_eq!(
Url::parse("https://example.com/?q=hello#to world").unwrap().as_str(),
"https://example.com/?q=hello#to%20world"
);
}
}