karrot_permalink/
permalink.rs

1use std::{str::FromStr, fmt::Display};
2
3use lazy_static::lazy_static;
4use percent_encoding::{percent_decode_str, utf8_percent_encode};
5use regex::Regex;
6use url::{Url, Origin};
7
8use crate::cjk_slug;
9
10#[derive(Debug, PartialEq, Eq, thiserror::Error)]
11pub enum PermalinkError {
12    #[error("invalid url")]
13    InvalidUrl(Box<url::ParseError>),
14    #[error("invalid permalink")]
15    InvalidPermalink,
16    #[error("unknown country code `{0}`")]
17    UnknownCountry(String),
18}
19
20impl From<url::ParseError> for PermalinkError {
21    fn from(err: url::ParseError) -> Self {
22        Self::InvalidUrl(Box::new(err))
23    }
24}
25
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct Permalink {
28    pub country: WellKnownCountry,
29    pub default_language: String,
30    pub service_type: String,
31    pub title: Option<String>,
32    pub id: String,
33    pub data: Option<String>,
34}
35
36impl Permalink {
37    pub fn parse_str(url_like: &str) -> Result<Self, PermalinkError> {
38        let url = Url::parse(url_like)?;
39        Self::parse_url(url)
40    }
41
42    pub fn parse_url(url: Url) -> Result<Self, PermalinkError> {
43        lazy_static! {
44            static ref RE: Regex = Regex::new(
45                r"/(?P<country>[a-zA-Z]{2})/(?P<service_type>[a-z\-]{3,})/(?P<slug>((?P<title>((([a-z0-9]|%[0-9A-F]{2})+)\-?)+?)\-)?(?P<id>[a-zA-Z0-9]{8,}))(/(?P<data>[a-zA-Z0-9\-_]+))?/?",
46            ).unwrap();
47        }
48
49        let pathname = url.path();
50
51        if let Some(caps) = RE.captures(pathname) {
52            let country = match url.origin().to_well_known_country() {
53                Some(country) => country,
54                None => {
55                    let value = caps.name("country").unwrap().as_str();
56                    value.to_well_known_country()
57                        .ok_or_else(|| PermalinkError::UnknownCountry(value.to_string()))?
58                },
59            };
60
61            let default_language = country.default_language();
62
63            let service_type = caps.name("service_type").unwrap().as_str().to_string();
64
65            let title = caps.name("title")
66                .map(|m| percent_decode_str(m.as_str()).decode_utf8().unwrap().to_string());
67
68            let id = caps.name("id").unwrap().as_str().to_string();
69
70            let data = caps.name("data")
71                .map(|m| m.as_str().to_string());
72
73            Ok(Permalink {
74                country,
75                default_language,
76                service_type,
77                title,
78                id,
79                data,
80            })
81        } else {
82            Err(PermalinkError::InvalidPermalink)
83        }
84    }
85
86    pub fn normalize(&self) -> String {
87        format!(
88            "{}/{}/{}/{}/",
89            "https://www.karrotmarket.com",
90            self.country,
91            self.service_type,
92            self.id,
93        )
94    }
95
96    pub fn canonicalize(&self, title: &str) -> String {
97        const NON_URL_SAFE: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
98            .add(b' ')
99            .add(b'!')
100            .add(b'"')
101            .add(b'#')
102            .add(b'$')
103            .add(b'%')
104            .add(b'&')
105            .add(b'\'')
106            .add(b'(')
107            .add(b')')
108            .add(b'*')
109            .add(b'+')
110            .add(b',')
111            .add(b'.')
112            .add(b'/')
113            .add(b':')
114            .add(b';')
115            .add(b'<')
116            .add(b'=')
117            .add(b'>')
118            .add(b'?')
119            .add(b'@')
120            .add(b'[')
121            .add(b'\\')
122            .add(b']')
123            .add(b'^')
124            .add(b'`')
125            .add(b'{')
126            .add(b'|')
127            .add(b'}')
128            .add(b'~');
129
130        let origin = match self.country {
131            WellKnownCountry::CA => "https://ca.karrotmarket.com".to_string(),
132            WellKnownCountry::JP => "https://jp.karrotmarket.com".to_string(),
133            WellKnownCountry::KR => "https://www.daangn.com".to_string(),
134            WellKnownCountry::UK => "https://uk.karrotmarket.com".to_string(),
135            WellKnownCountry::US => "https://us.karrotmarket.com".to_string(),
136        };
137        format!(
138            "{}/{}/{}/{}/",
139            origin,
140            self.country,
141            self.service_type,
142            utf8_percent_encode(
143                cjk_slug::slugify(format!("{}-{}", title, self.id).as_str()).as_str(),
144                NON_URL_SAFE,
145            ),
146        )
147    }
148}
149
150impl Display for Permalink {
151    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
152        writeln!(fmt, "permalink")?;
153        writeln!(fmt, "\tcountry: {}", self.country)?;
154        writeln!(fmt, "\tdefault_language: {}", self.default_language)?;
155        writeln!(fmt, "\tservice_type: {}", self.service_type)?;
156        writeln!(fmt, "\ttitle: {:?}", self.title)?;
157        writeln!(fmt, "\tid: {}", self.id)?;
158        writeln!(fmt, "\tdata: {:?}", self.data)
159    }
160}
161
162impl FromStr for Permalink {
163    type Err = PermalinkError;
164
165    fn from_str(value: &str) -> Result<Self, Self::Err> {
166        Permalink::parse_str(value)
167    }
168}
169
170impl TryFrom<String> for Permalink {
171    type Error = PermalinkError;
172
173    fn try_from(value: String) -> Result<Self, Self::Error> {
174        Permalink::parse_str(value.as_str())
175    }
176}
177
178impl TryFrom<Url> for Permalink {
179    type Error = PermalinkError;
180
181    fn try_from(value: Url) -> Result<Self, Self::Error> {
182        Self::parse_url(value)
183    }
184}
185
186#[derive(Debug, Clone, Copy, Eq, PartialEq)]
187pub enum WellKnownCountry {
188    CA,
189    JP,
190    KR,
191    UK,
192    US,
193}
194
195impl FromStr for WellKnownCountry {
196    type Err = PermalinkError;
197
198    fn from_str(value: &str) -> Result<Self, Self::Err> {
199        match value {
200            "ca" | "CA" | "cA" | "Ca" => Ok(Self::CA),
201            "jp" | "JP" | "jP" | "Jp" => Ok(Self::JP),
202            "kr" | "KR" | "kR" | "Kr"  => Ok(Self::KR),
203            "uk" | "UK" | "uK" | "Uk" => Ok(Self::UK),
204            "us" | "US" | "uS" | "Us" => Ok(Self::US),
205            _ => Err(Self::Err::UnknownCountry(value.to_string())),
206        }
207    }
208}
209
210impl Display for WellKnownCountry {
211    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        fmt.write_str(match self {
213            Self::CA => "ca",
214            Self::JP => "jp",
215            Self::KR => "kr",
216            Self::UK => "uk",
217            Self::US => "us",
218        })
219    }
220}
221
222trait ToWellKnownCountry {
223    fn to_well_known_country(&self) -> Option<WellKnownCountry>;
224}
225
226impl ToWellKnownCountry for str {
227    fn to_well_known_country(&self) -> Option<WellKnownCountry> {
228        WellKnownCountry::from_str(self).ok()
229    }
230}
231
232impl ToWellKnownCountry for String {
233    fn to_well_known_country(&self) -> Option<WellKnownCountry> {
234        WellKnownCountry::from_str(self.as_str()).ok()
235    }
236}
237
238impl ToWellKnownCountry for Origin {
239    fn to_well_known_country(&self) -> Option<WellKnownCountry> {
240        let origin = self.ascii_serialization();
241        let origin = match origin.as_str() {
242            "https://daangn.com" => "https://www.daangn.com",
243            "https://karrotmarket.com" => "https://www.karrotmarket.com",
244            _ => origin.as_str(),
245        };
246        match origin {
247            "https://www.daangn.com" => Some(WellKnownCountry::KR),
248            "https://www.karrotmarket.com" => None,
249            "https://ca.karrotmarket.com" => Some(WellKnownCountry::CA),
250            "https://jp.karrotmarket.com" => Some(WellKnownCountry::JP),
251            "https://uk.karrotmarket.com" => Some(WellKnownCountry::UK),
252            "https://us.karrotmarket.com" => Some(WellKnownCountry::US),
253            "https://kr.karrotmarket.com" => Some(WellKnownCountry::KR),
254            _ => None,
255        }       
256    }
257}
258
259impl WellKnownCountry {
260    fn default_language(&self) -> String {
261        match self {
262            Self::CA => "en".to_string(),
263            Self::JP => "ja".to_string(),
264            Self::KR => "ko".to_string(),
265            Self::UK => "en".to_string(),
266            Self::US => "en".to_string(),
267        }
268    }
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274
275    #[test]
276    fn test_parse_valid_permalink() {
277        let permalink = Permalink::parse_str("https://www.daangn.com/kr/app/당근마켓-대한민국-1등-동네-앱-id1018769995/").unwrap();
278        assert_eq!(permalink.country, WellKnownCountry::KR);
279        assert_eq!(permalink.default_language, "ko".to_string());
280        assert_eq!(permalink.service_type, "app".to_string());
281        assert_eq!(permalink.title, Some("당근마켓-대한민국-1등-동네-앱".to_string()));
282        assert_eq!(permalink.id, "id1018769995".to_string());
283    }
284
285    #[test]
286    fn test_parse_valid_permalink_without_trailing_slash() {
287        let permalink = Permalink::parse_str("https://www.daangn.com/kr/app/당근마켓-대한민국-1등-동네-앱-id1018769995").unwrap();
288        assert_eq!(permalink.country, WellKnownCountry::KR);
289        assert_eq!(permalink.default_language, "ko".to_string());
290        assert_eq!(permalink.service_type, "app".to_string());
291        assert_eq!(permalink.title, Some("당근마켓-대한민국-1등-동네-앱".to_string()));
292        assert_eq!(permalink.id, "id1018769995".to_string());
293    }
294
295    #[test]
296    fn test_parse_valid_permalink_without_title() {
297        let permalink = Permalink::parse_str("https://www.daangn.com/kr/app/id1018769995/").unwrap();
298        assert_eq!(permalink.country, WellKnownCountry::KR);
299        assert_eq!(permalink.default_language, "ko".to_string());
300        assert_eq!(permalink.service_type, "app".to_string());
301        assert_eq!(permalink.title, None);
302        assert_eq!(permalink.id, "id1018769995".to_string());
303    }
304
305    #[test]
306    fn test_parse_invalid_url() {
307        let result = Permalink::parse_str("invalid/kr/app/id1018769995/");
308        assert!(matches!(result, Err(PermalinkError::InvalidUrl(_))));
309    }
310
311    #[test]
312    fn test_parse_invalid_permalink() {
313        let result = Permalink::parse_str("https://apps.apple.com/kr/app/%EB%8B%B9%EA%B7%BC%EB%A7%88%EC%BC%93/id1018769995");
314        assert!(matches!(result, Err(PermalinkError::InvalidPermalink)));
315    }
316
317    #[test]
318    fn test_parse_well_known_host() {
319        let permalink = Permalink::parse_str("https://www.daangn.com/ca/app/id1018769995/").unwrap();
320        assert_eq!(permalink.country, WellKnownCountry::KR);
321        assert_eq!(permalink.default_language, "ko".to_string());
322        assert_eq!(permalink.service_type, "app".to_string());
323        assert_eq!(permalink.title, None);
324        assert_eq!(permalink.id, "id1018769995".to_string());
325    }
326
327    #[test]
328    fn test_parse_country_case_insensitive() {
329        let permalink = Permalink::parse_str("https://www.daangn.com/KR/app/id1018769995/").unwrap();
330        assert_eq!(permalink.country, WellKnownCountry::KR);
331        assert_eq!(permalink.default_language, "ko".to_string());
332        assert_eq!(permalink.service_type, "app".to_string());
333        assert_eq!(permalink.title, None);
334        assert_eq!(permalink.id, "id1018769995".to_string());
335    }
336
337    #[test]
338    fn test_parse_unknown_country() {
339        let result = Permalink::parse_str("http://localhost/xx/app/id1018769995/");
340        assert_eq!(result, Err(PermalinkError::UnknownCountry("xx".to_string())));
341    }
342
343    #[test]
344    fn test_normalize() {
345        let permalink = Permalink::parse_str("https://www.daangn.com/kr/app/당근마켓-대한민국-1등-동네-앱-id1018769995/").unwrap();
346        assert_eq!(
347            permalink.normalize(),
348            "https://www.karrotmarket.com/kr/app/id1018769995/".to_string(),
349        );
350    }
351
352    #[test]
353    fn test_canonicalize() {
354        let permalink = Permalink::parse_str("https://www.daangn.com/kr/app/id1018769995/").unwrap();
355        assert_eq!(
356            permalink.canonicalize("당근마켓-대한민국-1등-동네-앱"),
357            "https://www.daangn.com/kr/app/%EB%8B%B9%EA%B7%BC%EB%A7%88%EC%BC%93-%EB%8C%80%ED%95%9C%EB%AF%BC%EA%B5%AD-1%EB%93%B1-%EB%8F%99%EB%84%A4-%EC%95%B1-id1018769995/".to_string(),
358        );
359    }
360}