1use idna::domain_to_unicode;
4use proc_macro2::TokenStream;
5use psl_lexer::{List, Type};
6use quote::quote;
7use quote::TokenStreamExt;
8use sequence_trie::SequenceTrie;
9use std::env;
10use std::path::Path;
11use std::str::FromStr;
12
13pub fn compile_psl<P: AsRef<Path>>(path: P) -> proc_macro2::TokenStream {
14 let mut funcs = TokenStream::new();
15 let body = process(&mut funcs, path);
16
17 quote! {
18 use crate::{Type, Info};
19
20 #[inline]
21 pub(super) fn lookup<'a, T>(mut labels: T) -> Info
22 where T: Iterator<Item=&'a [u8]>
23 {
24 let mut info = Info { len: 0, typ: None };
25 match labels.next() {
26 Some(label) => {
27 match label {
28 #body
29 }
30 }
31 None => info,
32 }
33 }
34
35 #funcs
36 }
37}
38
39#[derive(Debug, Clone, Copy)]
40struct Depth(usize);
41
42fn process<P: AsRef<Path>>(funcs: &mut TokenStream, path: P) -> TokenStream {
43 let data = psl_lexer::request(psl_lexer::LIST_URL).expect("failed to download the list");
44 let mut list = List::from_str(&data).expect("failed to build the list");
45 std::fs::write(path, list.all().join("\r\n")).expect("failed to write the list to disk");
46
47 let mut tlds = Vec::new();
48 for key in &["PSL_TLD", "PSL_TLDS"] {
49 if let Ok(val) = env::var(key) {
50 for input in val
51 .split(',')
52 .map(|x| x.trim().to_lowercase())
53 .filter(|x| !x.is_empty())
54 {
55 let (tld, res) = domain_to_unicode(&input);
56 if res.is_err() {
57 panic!("failed to parse `{}` as valid unicode domain", input);
58 }
59 let val = list
60 .rules
61 .remove(&tld)
62 .unwrap_or_else(|| panic!("`{}` not found in the list", input));
63 tlds.push((tld, val));
64 }
65 }
66 }
67 if !tlds.is_empty() {
68 list.rules = tlds.into_iter().collect();
69 }
70
71 let mut tree = SequenceTrie::new();
72 for val in list.rules.values() {
73 for suffix in val {
74 let rule = suffix.rule.replace('*', "_");
75 let labels: Vec<_> = rule.split('.').map(|s| s.to_owned()).rev().collect();
76 tree.insert(labels.iter(), suffix.typ);
77 let labels: Vec<_> = labels
78 .into_iter()
79 .map(|label| {
80 idna::domain_to_ascii(&label).unwrap_or_else(|_| {
81 panic!(
82 "expected: a label that can be converted to ascii, found: {}",
83 label
84 )
85 })
86 })
87 .collect();
88 tree.insert(labels.iter(), suffix.typ);
89 }
90 }
91
92 build("lookup", tree.children_with_keys(), Depth(0), funcs)
93}
94
95#[derive(Debug, Clone)]
96struct Func {
97 name: syn::Ident,
98 len: TokenStream,
99 iter: TokenStream,
100 wild: TokenStream,
101}
102
103impl Func {
104 fn new(name: syn::Ident, len: TokenStream, iter: TokenStream) -> Self {
105 Func {
106 name,
107 len,
108 iter,
109 wild: TokenStream::new(),
110 }
111 }
112
113 fn root(self) -> TokenStream {
114 let Func {
115 name, len, wild, ..
116 } = self;
117 quote! {
118 #[inline]
119 fn #name(mut info: Info #wild) -> Info {
120 info.len = #len;
121 info
122 }
123 }
124 }
125
126 fn root_with_typ(self, typ: TokenStream) -> TokenStream {
127 let Func {
128 name, len, wild, ..
129 } = self;
130 quote! {
131 #[inline]
132 fn #name(#wild) -> Info {
133 Info {
134 len: #len,
135 typ: Some(Type::#typ),
136 }
137 }
138 }
139 }
140
141 fn nested_root(self, body: TokenStream) -> TokenStream {
142 let Func {
143 name,
144 len,
145 iter,
146 wild,
147 } = self;
148 quote! {
149 #[inline]
150 fn #name<'a, T>(mut info: Info, #wild mut labels: T) -> Info
151 where T: Iterator<Item=&'a #iter>
152 {
153 let acc = #len;
154 info.len = acc;
155 match labels.next() {
156 Some(label) => {
157 match label {
158 #body
159 }
160 }
161 None => info,
162 }
163 }
164 }
165 }
166
167 fn nested_root_with_typ(self, typ: TokenStream, body: TokenStream) -> TokenStream {
168 let Func {
169 name,
170 len,
171 iter,
172 wild,
173 } = self;
174 quote! {
175 #[inline]
176 fn #name<'a, T>(#wild mut labels: T) -> Info
177 where T: Iterator<Item=&'a #iter>
178 {
179 let acc = #len;
180 let info = Info {
181 len: acc,
182 typ: Some(Type::#typ),
183 };
184 match labels.next() {
185 Some(label) => {
186 match label {
187 #body
188 }
189 }
190 None => info,
191 }
192 }
193 }
194 }
195
196 fn inner(self, body: TokenStream) -> TokenStream {
197 let Func {
198 name,
199 len,
200 iter,
201 wild,
202 } = self;
203 quote! {
204 #[inline]
205 fn #name<'a, T>(info: Info, #wild mut labels: T, mut acc: usize) -> Info
206 where T: Iterator<Item=&'a #iter>
207 {
208 acc += 1 + #len;
209 match labels.next() {
210 Some(label) => {
211 match label {
212 #body
213 }
214 }
215 None => info,
216 }
217 }
218 }
219 }
220
221 fn inner_with_typ(self, typ: TokenStream, body: TokenStream) -> TokenStream {
222 let Func {
223 name,
224 len,
225 iter,
226 wild,
227 } = self;
228 quote! {
229 #[inline]
230 fn #name<'a, T>(#wild mut labels: T, mut acc: usize) -> Info
231 where T: Iterator<Item=&'a #iter>
232 {
233 acc += 1 + #len;
234 let info = Info {
235 len: acc,
236 typ: Some(Type::#typ),
237 };
238 match labels.next() {
239 Some(label) => {
240 match label {
241 #body
242 }
243 }
244 None => info,
245 }
246 }
247 }
248 }
249
250 fn leaf(self, typ: TokenStream) -> TokenStream {
251 let Func {
252 name, len, wild, ..
253 } = self;
254 quote! {
255 #[inline]
256 fn #name(#wild acc: usize) -> Info {
257 Info {
258 len: acc + 1 + #len,
259 typ: Some(Type::#typ),
260 }
261 }
262 }
263 }
264
265 fn bang_leaf(self, typ: TokenStream) -> TokenStream {
266 let Func { name, wild, .. } = self;
267 quote! {
268 #[inline]
269 fn #name(#wild acc: usize) -> Info {
270 Info {
271 len: acc,
272 typ: Some(Type::#typ),
273 }
274 }
275 }
276 }
277}
278
279fn ident(name: &str) -> syn::Ident {
280 syn::parse_str::<syn::Ident>(&name).unwrap()
281}
282
283fn pat(label: &str) -> (TokenStream, TokenStream) {
284 let label = label.trim_start_matches('!');
285 let len = label.len();
286 if label == "_" {
287 (quote!(wild), quote!(wild.len()))
288 } else {
289 let pat = array_expr(label);
290 (quote!(#pat), quote!(#len))
291 }
292}
293
294fn build(
295 fname: &str,
296 list: Vec<(&String, &SequenceTrie<String, Type>)>,
297 Depth(depth): Depth,
298 funcs: &mut TokenStream,
299) -> TokenStream {
300 if list.is_empty() && depth == 0 && !cfg!(test) {
301 panic!("Found empty list. This implementation doesn't support empty lists.");
302 }
303
304 let iter = quote!([u8]);
305
306 let mut head = TokenStream::new();
307 let mut body = TokenStream::new();
308 let mut footer = TokenStream::new();
309
310 for (i, (label, tree)) in list.into_iter().enumerate() {
311 let typ = match tree.value() {
312 Some(val) => {
313 let typ = match *val {
314 Type::Icann => quote!(Icann),
315 Type::Private => quote!(Private),
316 };
317 quote!(#typ)
318 }
319 None => TokenStream::new(),
320 };
321
322 let name = format!("{}_{}", fname, i);
323 let fident = ident(&name);
324 let children = build(&name, tree.children_with_keys(), Depth(depth + 1), funcs);
325 let (pat, len) = pat(label);
326 let mut func = Func::new(fident.clone(), len, iter.clone());
327
328 if label.starts_with('!') {
330 if !children.is_empty() {
331 panic!(
332 "an exclamation mark must be at the end of an exception rule: {}",
333 label
334 )
335 }
336 funcs.append_all(func.bang_leaf(typ));
337 if depth == 0 {
338 panic!("an exception rule cannot be in TLD position: {}", label);
339 } else {
340 head.append_all(quote! {
341 #pat => #fident(acc),
342 });
343 }
344 }
345 else if label == "_" {
347 if depth == 0 {
348 if children.is_empty() {
349 if typ.is_empty() {
350 func.wild = quote!(, wild: &#iter);
351 funcs.append_all(func.root());
352 footer.append_all(quote! {
353 wild => #fident(info, wild),
354 });
355 } else {
356 func.wild = quote!(wild: &#iter);
357 funcs.append_all(func.root_with_typ(typ));
358 footer.append_all(quote! {
359 wild => #fident(wild),
360 });
361 }
362 } else if typ.is_empty() {
363 func.wild = quote!(wild: &#iter,);
364 funcs.append_all(func.nested_root(children));
365 footer.append_all(quote! {
366 wild => #fident(info, wild, labels),
367 });
368 } else {
369 func.wild = quote!(wild: &#iter,);
370 funcs.append_all(func.nested_root_with_typ(typ, children));
371 footer.append_all(quote! {
372 wild => #fident(wild, labels),
373 });
374 }
375 } else if children.is_empty() {
376 func.wild = quote!(wild: &#iter,);
377 funcs.append_all(func.leaf(typ));
378 footer.append_all(quote! {
379 wild => #fident(wild, acc),
380 });
381 } else if typ.is_empty() {
382 func.wild = quote!(wild: &#iter,);
383 funcs.append_all(func.inner(children));
384 footer.append_all(quote! {
385 wild => #fident(info, wild, labels, acc),
386 });
387 } else {
388 func.wild = quote!(wild: &#iter,);
389 funcs.append_all(func.inner_with_typ(typ, children));
390 footer.append_all(quote! {
391 wild => #fident(wild, labels, acc),
392 });
393 }
394 }
395 else if depth == 0 {
397 if children.is_empty() {
398 if typ.is_empty() {
399 funcs.append_all(func.root());
400 body.append_all(quote! {
401 #pat => #fident(info),
402 });
403 } else {
404 funcs.append_all(func.root_with_typ(typ));
405 body.append_all(quote! {
406 #pat => #fident(),
407 });
408 }
409 } else if typ.is_empty() {
410 funcs.append_all(func.nested_root(children));
411 body.append_all(quote! {
412 #pat => #fident(info, labels),
413 });
414 } else {
415 funcs.append_all(func.nested_root_with_typ(typ, children));
416 body.append_all(quote! {
417 #pat => #fident(labels),
418 });
419 }
420 } else if children.is_empty() {
421 funcs.append_all(func.leaf(typ));
422 body.append_all(quote! {
423 #pat => #fident(acc),
424 });
425 } else if typ.is_empty() {
426 funcs.append_all(func.inner(children));
427 body.append_all(quote! {
428 #pat => #fident(info, labels, acc),
429 });
430 } else {
431 funcs.append_all(func.inner_with_typ(typ, children));
432 body.append_all(quote! {
433 #pat => #fident(labels, acc),
434 });
435 }
436 }
437
438 if head.is_empty() && body.is_empty() && footer.is_empty() {
439 return TokenStream::new();
440 }
441
442 if footer.is_empty() {
443 if fname == "lookup" {
444 footer.append_all(quote! {
445 wild => {
446 info.len = wild.len();
447 info
448 }
449 });
450 } else {
451 footer.append_all(quote!(_ => info,));
452 }
453 }
454
455 quote! {
456 #head
457 #body
458 #footer
459 }
460}
461
462fn array_expr(label: &str) -> syn::ExprArray {
463 let label = format!("{:?}", label.as_bytes());
464 syn::parse_str(&label).unwrap()
465}