1use std::{str::FromStr, fmt::Display};
2
3use lazy_static::lazy_static;
4use percent_encoding::{percent_decode_str, utf8_percent_encode};
5use regex::Regex;
6use url::{Url, Origin};
7
8use crate::cjk_slug;
9
10#[derive(Debug, PartialEq, Eq, thiserror::Error)]
11pub enum PermalinkError {
12 #[error("invalid url")]
13 InvalidUrl(Box<url::ParseError>),
14 #[error("invalid permalink")]
15 InvalidPermalink,
16 #[error("unknown country code `{0}`")]
17 UnknownCountry(String),
18}
19
20impl From<url::ParseError> for PermalinkError {
21 fn from(err: url::ParseError) -> Self {
22 Self::InvalidUrl(Box::new(err))
23 }
24}
25
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct Permalink {
28 pub country: WellKnownCountry,
29 pub default_language: String,
30 pub service_type: String,
31 pub title: Option<String>,
32 pub id: String,
33 pub data: Option<String>,
34}
35
36impl Permalink {
37 pub fn parse_str(url_like: &str) -> Result<Self, PermalinkError> {
38 let url = Url::parse(url_like)?;
39 Self::parse_url(url)
40 }
41
42 pub fn parse_url(url: Url) -> Result<Self, PermalinkError> {
43 lazy_static! {
44 static ref RE: Regex = Regex::new(
45 r"/(?P<country>[a-zA-Z]{2})/(?P<service_type>[a-z\-]{3,})/(?P<slug>((?P<title>((([a-z0-9]|%[0-9A-F]{2})+)\-?)+?)\-)?(?P<id>[a-zA-Z0-9]{8,}))(/(?P<data>[a-zA-Z0-9\-_]+))?/?",
46 ).unwrap();
47 }
48
49 let pathname = url.path();
50
51 if let Some(caps) = RE.captures(pathname) {
52 let country = match url.origin().to_well_known_country() {
53 Some(country) => country,
54 None => {
55 let value = caps.name("country").unwrap().as_str();
56 value.to_well_known_country()
57 .ok_or_else(|| PermalinkError::UnknownCountry(value.to_string()))?
58 },
59 };
60
61 let default_language = country.default_language();
62
63 let service_type = caps.name("service_type").unwrap().as_str().to_string();
64
65 let title = caps.name("title")
66 .map(|m| percent_decode_str(m.as_str()).decode_utf8().unwrap().to_string());
67
68 let id = caps.name("id").unwrap().as_str().to_string();
69
70 let data = caps.name("data")
71 .map(|m| m.as_str().to_string());
72
73 Ok(Permalink {
74 country,
75 default_language,
76 service_type,
77 title,
78 id,
79 data,
80 })
81 } else {
82 Err(PermalinkError::InvalidPermalink)
83 }
84 }
85
86 pub fn normalize(&self) -> String {
87 format!(
88 "{}/{}/{}/{}/",
89 "https://www.karrotmarket.com",
90 self.country,
91 self.service_type,
92 self.id,
93 )
94 }
95
96 pub fn canonicalize(&self, title: &str) -> String {
97 const NON_URL_SAFE: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
98 .add(b' ')
99 .add(b'!')
100 .add(b'"')
101 .add(b'#')
102 .add(b'$')
103 .add(b'%')
104 .add(b'&')
105 .add(b'\'')
106 .add(b'(')
107 .add(b')')
108 .add(b'*')
109 .add(b'+')
110 .add(b',')
111 .add(b'.')
112 .add(b'/')
113 .add(b':')
114 .add(b';')
115 .add(b'<')
116 .add(b'=')
117 .add(b'>')
118 .add(b'?')
119 .add(b'@')
120 .add(b'[')
121 .add(b'\\')
122 .add(b']')
123 .add(b'^')
124 .add(b'`')
125 .add(b'{')
126 .add(b'|')
127 .add(b'}')
128 .add(b'~');
129
130 let origin = match self.country {
131 WellKnownCountry::CA => "https://ca.karrotmarket.com".to_string(),
132 WellKnownCountry::JP => "https://jp.karrotmarket.com".to_string(),
133 WellKnownCountry::KR => "https://www.daangn.com".to_string(),
134 WellKnownCountry::UK => "https://uk.karrotmarket.com".to_string(),
135 WellKnownCountry::US => "https://us.karrotmarket.com".to_string(),
136 };
137 format!(
138 "{}/{}/{}/{}/",
139 origin,
140 self.country,
141 self.service_type,
142 utf8_percent_encode(
143 cjk_slug::slugify(format!("{}-{}", title, self.id).as_str()).as_str(),
144 NON_URL_SAFE,
145 ),
146 )
147 }
148}
149
150impl Display for Permalink {
151 fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
152 writeln!(fmt, "permalink")?;
153 writeln!(fmt, "\tcountry: {}", self.country)?;
154 writeln!(fmt, "\tdefault_language: {}", self.default_language)?;
155 writeln!(fmt, "\tservice_type: {}", self.service_type)?;
156 writeln!(fmt, "\ttitle: {:?}", self.title)?;
157 writeln!(fmt, "\tid: {}", self.id)?;
158 writeln!(fmt, "\tdata: {:?}", self.data)
159 }
160}
161
162impl FromStr for Permalink {
163 type Err = PermalinkError;
164
165 fn from_str(value: &str) -> Result<Self, Self::Err> {
166 Permalink::parse_str(value)
167 }
168}
169
170impl TryFrom<String> for Permalink {
171 type Error = PermalinkError;
172
173 fn try_from(value: String) -> Result<Self, Self::Error> {
174 Permalink::parse_str(value.as_str())
175 }
176}
177
178impl TryFrom<Url> for Permalink {
179 type Error = PermalinkError;
180
181 fn try_from(value: Url) -> Result<Self, Self::Error> {
182 Self::parse_url(value)
183 }
184}
185
186#[derive(Debug, Clone, Copy, Eq, PartialEq)]
187pub enum WellKnownCountry {
188 CA,
189 JP,
190 KR,
191 UK,
192 US,
193}
194
195impl FromStr for WellKnownCountry {
196 type Err = PermalinkError;
197
198 fn from_str(value: &str) -> Result<Self, Self::Err> {
199 match value {
200 "ca" | "CA" | "cA" | "Ca" => Ok(Self::CA),
201 "jp" | "JP" | "jP" | "Jp" => Ok(Self::JP),
202 "kr" | "KR" | "kR" | "Kr" => Ok(Self::KR),
203 "uk" | "UK" | "uK" | "Uk" => Ok(Self::UK),
204 "us" | "US" | "uS" | "Us" => Ok(Self::US),
205 _ => Err(Self::Err::UnknownCountry(value.to_string())),
206 }
207 }
208}
209
210impl Display for WellKnownCountry {
211 fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212 fmt.write_str(match self {
213 Self::CA => "ca",
214 Self::JP => "jp",
215 Self::KR => "kr",
216 Self::UK => "uk",
217 Self::US => "us",
218 })
219 }
220}
221
222trait ToWellKnownCountry {
223 fn to_well_known_country(&self) -> Option<WellKnownCountry>;
224}
225
226impl ToWellKnownCountry for str {
227 fn to_well_known_country(&self) -> Option<WellKnownCountry> {
228 WellKnownCountry::from_str(self).ok()
229 }
230}
231
232impl ToWellKnownCountry for String {
233 fn to_well_known_country(&self) -> Option<WellKnownCountry> {
234 WellKnownCountry::from_str(self.as_str()).ok()
235 }
236}
237
238impl ToWellKnownCountry for Origin {
239 fn to_well_known_country(&self) -> Option<WellKnownCountry> {
240 let origin = self.ascii_serialization();
241 let origin = match origin.as_str() {
242 "https://daangn.com" => "https://www.daangn.com",
243 "https://karrotmarket.com" => "https://www.karrotmarket.com",
244 _ => origin.as_str(),
245 };
246 match origin {
247 "https://www.daangn.com" => Some(WellKnownCountry::KR),
248 "https://www.karrotmarket.com" => None,
249 "https://ca.karrotmarket.com" => Some(WellKnownCountry::CA),
250 "https://jp.karrotmarket.com" => Some(WellKnownCountry::JP),
251 "https://uk.karrotmarket.com" => Some(WellKnownCountry::UK),
252 "https://us.karrotmarket.com" => Some(WellKnownCountry::US),
253 "https://kr.karrotmarket.com" => Some(WellKnownCountry::KR),
254 _ => None,
255 }
256 }
257}
258
259impl WellKnownCountry {
260 fn default_language(&self) -> String {
261 match self {
262 Self::CA => "en".to_string(),
263 Self::JP => "ja".to_string(),
264 Self::KR => "ko".to_string(),
265 Self::UK => "en".to_string(),
266 Self::US => "en".to_string(),
267 }
268 }
269}
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274
275 #[test]
276 fn test_parse_valid_permalink() {
277 let permalink = Permalink::parse_str("https://www.daangn.com/kr/app/당근마켓-대한민국-1등-동네-앱-id1018769995/").unwrap();
278 assert_eq!(permalink.country, WellKnownCountry::KR);
279 assert_eq!(permalink.default_language, "ko".to_string());
280 assert_eq!(permalink.service_type, "app".to_string());
281 assert_eq!(permalink.title, Some("당근마켓-대한민국-1등-동네-앱".to_string()));
282 assert_eq!(permalink.id, "id1018769995".to_string());
283 }
284
285 #[test]
286 fn test_parse_valid_permalink_without_trailing_slash() {
287 let permalink = Permalink::parse_str("https://www.daangn.com/kr/app/당근마켓-대한민국-1등-동네-앱-id1018769995").unwrap();
288 assert_eq!(permalink.country, WellKnownCountry::KR);
289 assert_eq!(permalink.default_language, "ko".to_string());
290 assert_eq!(permalink.service_type, "app".to_string());
291 assert_eq!(permalink.title, Some("당근마켓-대한민국-1등-동네-앱".to_string()));
292 assert_eq!(permalink.id, "id1018769995".to_string());
293 }
294
295 #[test]
296 fn test_parse_valid_permalink_without_title() {
297 let permalink = Permalink::parse_str("https://www.daangn.com/kr/app/id1018769995/").unwrap();
298 assert_eq!(permalink.country, WellKnownCountry::KR);
299 assert_eq!(permalink.default_language, "ko".to_string());
300 assert_eq!(permalink.service_type, "app".to_string());
301 assert_eq!(permalink.title, None);
302 assert_eq!(permalink.id, "id1018769995".to_string());
303 }
304
305 #[test]
306 fn test_parse_invalid_url() {
307 let result = Permalink::parse_str("invalid/kr/app/id1018769995/");
308 assert!(matches!(result, Err(PermalinkError::InvalidUrl(_))));
309 }
310
311 #[test]
312 fn test_parse_invalid_permalink() {
313 let result = Permalink::parse_str("https://apps.apple.com/kr/app/%EB%8B%B9%EA%B7%BC%EB%A7%88%EC%BC%93/id1018769995");
314 assert!(matches!(result, Err(PermalinkError::InvalidPermalink)));
315 }
316
317 #[test]
318 fn test_parse_well_known_host() {
319 let permalink = Permalink::parse_str("https://www.daangn.com/ca/app/id1018769995/").unwrap();
320 assert_eq!(permalink.country, WellKnownCountry::KR);
321 assert_eq!(permalink.default_language, "ko".to_string());
322 assert_eq!(permalink.service_type, "app".to_string());
323 assert_eq!(permalink.title, None);
324 assert_eq!(permalink.id, "id1018769995".to_string());
325 }
326
327 #[test]
328 fn test_parse_country_case_insensitive() {
329 let permalink = Permalink::parse_str("https://www.daangn.com/KR/app/id1018769995/").unwrap();
330 assert_eq!(permalink.country, WellKnownCountry::KR);
331 assert_eq!(permalink.default_language, "ko".to_string());
332 assert_eq!(permalink.service_type, "app".to_string());
333 assert_eq!(permalink.title, None);
334 assert_eq!(permalink.id, "id1018769995".to_string());
335 }
336
337 #[test]
338 fn test_parse_unknown_country() {
339 let result = Permalink::parse_str("http://localhost/xx/app/id1018769995/");
340 assert_eq!(result, Err(PermalinkError::UnknownCountry("xx".to_string())));
341 }
342
343 #[test]
344 fn test_normalize() {
345 let permalink = Permalink::parse_str("https://www.daangn.com/kr/app/당근마켓-대한민국-1등-동네-앱-id1018769995/").unwrap();
346 assert_eq!(
347 permalink.normalize(),
348 "https://www.karrotmarket.com/kr/app/id1018769995/".to_string(),
349 );
350 }
351
352 #[test]
353 fn test_canonicalize() {
354 let permalink = Permalink::parse_str("https://www.daangn.com/kr/app/id1018769995/").unwrap();
355 assert_eq!(
356 permalink.canonicalize("당근마켓-대한민국-1등-동네-앱"),
357 "https://www.daangn.com/kr/app/%EB%8B%B9%EA%B7%BC%EB%A7%88%EC%BC%93-%EB%8C%80%ED%95%9C%EB%AF%BC%EA%B5%AD-1%EB%93%B1-%EB%8F%99%EB%84%A4-%EC%95%B1-id1018769995/".to_string(),
358 );
359 }
360}