progscrape_scrapers/types/
url.rs1use std::{
2 collections::hash_map::DefaultHasher,
3 fmt::Display,
4 hash::{Hash, Hasher},
5};
6
7use serde::{Deserialize, Serialize};
8use url::Url;
9use urlnorm::UrlNormalizer;
10
11lazy_static::lazy_static! {
12 static ref URL_NORMALIZER: UrlNormalizer = UrlNormalizer::default();
13}
14
15#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
17pub struct StoryUrl {
18 url: String,
19 host: String,
20 norm_str: StoryUrlNorm,
21}
22
23impl Serialize for StoryUrl {
24 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
25 where
26 S: serde::Serializer,
27 {
28 let tuple: (&String, &String, &String) = (&self.url, &self.host, &self.norm_str.norm);
29 tuple.serialize(serializer)
30 }
31}
32
33impl<'de> Deserialize<'de> for StoryUrl {
34 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
35 where
36 D: serde::Deserializer<'de>,
37 {
38 #[derive(Deserialize)]
40 #[serde(untagged)]
41 enum StoryUrlSerializationOptions {
42 Raw(String),
43 Bits((String, String, String)),
44 }
45
46 let res: Result<StoryUrlSerializationOptions, D::Error> =
47 Deserialize::deserialize(deserializer);
48 match res {
49 Ok(StoryUrlSerializationOptions::Raw(raw)) => StoryUrl::parse(&raw).ok_or(
50 serde::de::Error::custom(format!("Failed to parse URL '{raw}'")),
51 ),
52 Ok(StoryUrlSerializationOptions::Bits((url, host, norm))) => Ok(StoryUrl {
53 url,
54 host,
55 norm_str: StoryUrlNorm { norm },
56 }),
57 Err(e) => Err(e),
58 }
59 }
60}
61
62impl Display for StoryUrl {
63 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64 self.url.fmt(f)
65 }
66}
67
68impl StoryUrl {
69 pub fn parse<S: AsRef<str>>(s: S) -> Option<Self> {
70 if let Ok(url) = Url::parse(s.as_ref()) {
71 if let Some(host) = URL_NORMALIZER.normalize_host(&url) {
72 let host = host.to_owned();
73 let norm_str = StoryUrlNorm {
74 norm: URL_NORMALIZER.compute_normalization_string(&url),
75 };
76 let url = url.into();
77 return Some(Self {
78 url,
79 host,
80 norm_str,
81 });
82 }
83 }
84 None
85 }
86
87 pub fn host(&self) -> &str {
88 &self.host
89 }
90
91 pub fn raw(&self) -> &str {
92 &self.url
93 }
94
95 pub fn normalization(&self) -> &StoryUrlNorm {
96 &self.norm_str
97 }
98}
99
100#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
101pub struct StoryUrlNorm {
102 norm: String,
103}
104
105impl Serialize for StoryUrlNorm {
106 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
107 where
108 S: serde::Serializer,
109 {
110 self.norm.serialize(serializer)
111 }
112}
113
114impl<'de> Deserialize<'de> for StoryUrlNorm {
115 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
116 where
117 D: serde::Deserializer<'de>,
118 {
119 let res: Result<String, _> = Deserialize::deserialize(deserializer);
120 res.map(|norm| StoryUrlNorm { norm })
121 }
122}
123
124impl StoryUrlNorm {
125 pub fn from_string(norm: String) -> Self {
127 Self { norm }
128 }
129
130 pub fn hash(&self) -> i64 {
131 let mut hasher = DefaultHasher::new();
132 self.norm.hash(&mut hasher);
133
134 hasher.finish() as i64
135 }
136
137 pub fn string(&self) -> &str {
138 &self.norm
139 }
140}