1use psl::Psl;
2use publicsuffix::List;
3
4use crate::compiler::prelude::*;
5use std::{collections::BTreeMap, path::Path};
6
7#[derive(Clone, Copy, Debug)]
8pub struct ParseEtld;
9
10impl Function for ParseEtld {
11 fn identifier(&self) -> &'static str {
12 "parse_etld"
13 }
14
15 fn usage(&self) -> &'static str {
16 "Parses the [eTLD](https://developer.mozilla.org/en-US/docs/Glossary/eTLD) from `value` representing domain name."
17 }
18
19 fn parameters(&self) -> &'static [Parameter] {
20 &[
21 Parameter {
22 keyword: "value",
23 kind: kind::BYTES,
24 required: true,
25 },
26 Parameter {
27 keyword: "plus_parts",
28 kind: kind::INTEGER,
29 required: false,
30 },
31 Parameter {
32 keyword: "psl",
33 kind: kind::BYTES,
34 required: false,
35 },
36 ]
37 }
38
39 fn examples(&self) -> &'static [Example] {
40 &[
41 example! {
42 title: "Parse eTLD",
43 source: r#"parse_etld!("sub.sussex.ac.uk")"#,
44 result: Ok(indoc! {r#"
45 {
46 "etld": "ac.uk",
47 "etld_plus": "ac.uk",
48 "known_suffix": true
49 }
50 "#}),
51 },
52 example! {
53 title: "Parse eTLD+1",
54 source: r#"parse_etld!("sub.sussex.ac.uk", plus_parts: 1)"#,
55 result: Ok(indoc! {r#"
56 {
57 "etld": "ac.uk",
58 "etld_plus": "sussex.ac.uk",
59 "known_suffix": true
60 }
61 "#}),
62 },
63 example! {
64 title: "Parse eTLD with unknown suffix",
65 source: r#"parse_etld!("vector.acmecorp")"#,
66 result: Ok(indoc! {r#"
67 {
68 "etld": "acmecorp",
69 "etld_plus": "acmecorp",
70 "known_suffix": false
71 }
72 "#}),
73 },
74 example! {
75 title: "Parse eTLD with custom PSL",
76 source: r#"parse_etld!("vector.acmecorp", psl: "lib/tests/tests/functions/custom_public_suffix_list.dat")"#,
77 result: Ok(indoc! {r#"
78 {
79 "etld": "acmecorp",
80 "etld_plus": "acmecorp",
81 "known_suffix": false
82 }
83 "#}),
84 },
85 ]
86 }
87
88 fn compile(
89 &self,
90 state: &state::TypeState,
91 _ctx: &mut FunctionCompileContext,
92 arguments: ArgumentList,
93 ) -> Compiled {
94 let value = arguments.required("value");
95 let plus_parts = arguments.optional("plus_parts").unwrap_or_else(|| expr!(0));
96
97 let psl_expr = arguments.optional_expr("psl");
98 let mut psl: Option<List> = None;
99 if let Some(psl_expr) = psl_expr {
100 let psl_location = psl_expr
101 .clone()
102 .resolve_constant(state)
103 .ok_or(function::Error::ExpectedStaticExpression {
104 keyword: "psl",
105 expr: psl_expr.clone(),
106 })?
107 .try_bytes_utf8_lossy()
108 .map_err(|_| function::Error::InvalidArgument {
109 keyword: "psl",
110 value: format!("{psl_expr:?}").into(),
111 error: "psl should be a string",
112 })?
113 .into_owned();
114
115 let path = Path::new(&psl_location);
116 psl = Some(
117 std::fs::read_to_string(path)
118 .map_err(|_| function::Error::InvalidArgument {
119 keyword: "psl",
120 value: format!("{}", path.display()).into(),
121 error: "Unable to read psl file",
122 })?
123 .parse()
124 .map_err(|_| function::Error::InvalidArgument {
125 keyword: "psl",
126 value: format!("{}", path.display()).into(),
127 error: "Unable to parse psl file",
128 })?,
129 );
130 }
131
132 Ok(ParseEtldFn {
133 value,
134 plus_parts,
135 psl,
136 }
137 .as_expr())
138 }
139}
140
141#[derive(Debug, Clone)]
142struct ParseEtldFn {
143 value: Box<dyn Expression>,
144 plus_parts: Box<dyn Expression>,
145 psl: Option<List>,
146}
147
148impl FunctionExpression for ParseEtldFn {
149 fn resolve(&self, ctx: &mut Context) -> Resolved {
150 let value = self.value.resolve(ctx)?;
151 let string = value.try_bytes_utf8_lossy()?;
152
153 let plus_parts = match self.plus_parts.resolve(ctx)?.try_integer()? {
154 x if x < 0 => 0,
155 #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
157 x => x as usize,
158 };
159
160 let suffix_result = if let Some(list) = &self.psl {
161 list.suffix(string.as_bytes())
162 } else {
163 psl::suffix(string.as_bytes())
164 };
165 let etld = suffix_result.ok_or(format!("unable to determine eTLD for {string}"))?;
166 let etld_string = core::str::from_utf8(etld.as_bytes())
167 .map_err(|err| format!("could not convert eTLD to UTF8 {err}"))?;
168
169 let etld_parts_count = etld_string.chars().filter(|c| *c == '.').count() + 1;
170 let etld_plus_parts: Vec<&str> = string
171 .rsplit('.')
172 .take(etld_parts_count + plus_parts)
173 .collect();
174
175 let etld_plus = etld_plus_parts
176 .into_iter()
177 .rev()
178 .collect::<Vec<_>>()
179 .join(".");
180
181 let mut map = BTreeMap::<&str, Value>::new();
182
183 map.insert("etld", etld_string.to_owned().into());
184 map.insert("etld_plus", etld_plus.into());
185 map.insert("known_suffix", etld.is_known().into());
186
187 Ok(map
188 .into_iter()
189 .map(|(k, v)| (k.to_owned(), v))
190 .collect::<Value>())
191 }
192
193 fn type_def(&self, _: &state::TypeState) -> TypeDef {
194 TypeDef::object(inner_kind()).fallible()
195 }
196}
197
198fn inner_kind() -> BTreeMap<Field, Kind> {
199 BTreeMap::from([
200 ("etld".into(), Kind::bytes()),
201 ("etld_plus".into(), Kind::bytes()),
202 ("known_suffix".into(), Kind::boolean()),
203 ])
204}
205
206#[cfg(test)]
207mod tests {
208 use super::*;
209 use crate::value;
210
211 test_function![
212 parse_etld => ParseEtld;
213
214 naive {
215 args: func_args![value: value!("vector.dev")],
216 want: Ok(value!({
217 etld: "dev",
218 etld_plus: "dev",
219 known_suffix: true,
220 })),
221 tdef: TypeDef::object(inner_kind()).fallible(),
222 }
223
224 naive_plus_one {
225 args: func_args![value: value!("vector.dev"), plus_parts: 1],
226 want: Ok(value!({
227 etld: "dev",
228 etld_plus: "vector.dev",
229 known_suffix: true,
230 })),
231 tdef: TypeDef::object(inner_kind()).fallible(),
232 }
233
234 psl {
235 args: func_args![value: value!("sussex.ac.uk")],
236 want: Ok(value!({
237 etld: "ac.uk",
238 etld_plus: "ac.uk",
239 known_suffix: true,
240 })),
241 tdef: TypeDef::object(inner_kind()).fallible(),
242 }
243
244 psl_plus_one {
245 args: func_args![value: value!("sussex.ac.uk"), plus_parts: 1],
246 want: Ok(value!({
247 etld: "ac.uk",
248 etld_plus: "sussex.ac.uk",
249 known_suffix: true,
250 })),
251 tdef: TypeDef::object(inner_kind()).fallible(),
252 }
253
254 short_plus {
255 args: func_args![value: value!("vector.dev"), plus_parts: 10],
256 want: Ok(value!({
257 etld: "dev",
258 etld_plus: "vector.dev",
259 known_suffix: true,
260 })),
261 tdef: TypeDef::object(inner_kind()).fallible(),
262 }
263
264 long_plus {
265 args: func_args![value: value!("www.long.plus.test.vector.dev"), plus_parts: 4],
266 want: Ok(value!({
267 etld: "dev",
268 etld_plus: "long.plus.test.vector.dev",
269 known_suffix: true,
270 })),
271 tdef: TypeDef::object(inner_kind()).fallible(),
272 }
273
274 unknown_tld {
275 args: func_args![value: value!("vector.unknowndev")],
276 want: Ok(value!({
277 etld: "unknowndev",
278 etld_plus: "unknowndev",
279 known_suffix: false,
280 })),
281 tdef: TypeDef::object(inner_kind()).fallible(),
282 }
283
284 utf8 {
285 args: func_args![value: value!("www.食狮.中国")],
286 want: Ok(value!({
287 etld: "中国",
288 etld_plus: "中国",
289 known_suffix: true,
290 })),
291 tdef: TypeDef::object(inner_kind()).fallible(),
292 }
293
294 utf8_plus_one {
295 args: func_args![value: value!("www.食狮.中国"), plus_parts: 1],
296 want: Ok(value!({
297 etld: "中国",
298 etld_plus: "食狮.中国",
299 known_suffix: true,
300 })),
301 tdef: TypeDef::object(inner_kind()).fallible(),
302 }
303
304 empty_host {
305 args: func_args![value: value!("")],
306 want: Err("unable to determine eTLD for "),
307 tdef: TypeDef::object(inner_kind()).fallible(),
308 }
309
310 bad_psl_file {
311 args: func_args![value: value!("vector.dev"), psl: value!("definitelynotafile")],
312 want: Err("invalid argument"),
313 tdef: TypeDef::object(inner_kind()).fallible(),
314 }
315 ];
316}