1use std::{
35 collections::HashMap,
36 fmt,
37 fs::File,
38 io::{self, Read},
39 ops::Range,
40 path::Path,
41};
42
43const PREVAILING_STAR_RULE: &str = "*";
44
45#[derive(Debug)]
46struct ListLeaf {
48 is_exception_rule: bool,
49}
50
51impl ListLeaf {
52 fn new(is_exception_rule: bool) -> Self {
54 Self { is_exception_rule }
55 }
56}
57
58#[derive(Debug)]
59struct ListNode {
61 children: HashMap<String, ListNode>,
62 leaf: Option<ListLeaf>,
63}
64
65impl ListNode {
66 fn new() -> Self {
68 Self {
69 children: HashMap::new(),
70 leaf: None,
71 }
72 }
73}
74
75#[derive(Debug)]
77pub struct List {
78 root: ListNode,
79}
80
81#[derive(Debug, Clone, PartialEq, Eq, Hash)]
85pub struct DnsName {
86 name: String,
88 rname: String,
90 suffix: Option<Range<usize>>,
92 root: Option<Range<usize>>,
94 registrable: Option<Range<usize>>,
96}
97
98impl List {
99 fn append(&mut self, mut rule: &str) -> io::Result<()> {
100 let mut is_exception_rule = false;
101 if rule.starts_with('!') {
102 is_exception_rule = true;
103 rule = &rule[1..];
104 }
105
106 let mut current = &mut self.root;
107 for label in rule.rsplit('.') {
108 if label.is_empty() {
109 return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid rule"));
110 }
111
112 let cur = current;
113 current = cur
114 .children
115 .entry(label.to_owned())
116 .or_insert_with(ListNode::new);
117 }
118
119 current.leaf = Some(ListLeaf::new(is_exception_rule));
120
121 Ok(())
122 }
123
124 fn build(res: &str) -> io::Result<List> {
125 let mut list = List::empty();
126 for rule in res.split(',') {
127 list.append(rule)?;
128 }
129 if list.root.children.is_empty() {
130 return Err(io::Error::new(io::ErrorKind::NotFound, "invalid list"));
131 }
132 list.append(PREVAILING_STAR_RULE)?; Ok(list)
134 }
135
136 pub fn empty() -> List {
138 List {
139 root: ListNode::new(),
140 }
141 }
142
143 pub fn from_path<P: AsRef<Path>>(path: P) -> io::Result<List> {
145 File::open(path)
146 .and_then(|mut data| {
147 let mut res = String::new();
148 data.read_to_string(&mut res)?;
149 Ok(res)
150 })
151 .and_then(|s| s.parse::<List>())
152 }
153
154 pub fn from_reader<R: Read>(mut reader: R) -> io::Result<List> {
161 let mut res = String::new();
162 reader.read_to_string(&mut res)?;
163 Self::build(&res)
164 }
165
166 pub fn parse_domain(&self, domain: &str) -> io::Result<DnsName> {
168 DnsName::parse(domain, self)
169 }
170
171 pub fn parse_dns_name(&self, domain: &str) -> io::Result<DnsName> {
173 DnsName::parse(domain, self)
174 }
175
176 pub fn from_trustdns_name(
180 &self,
181 name: &hickory_proto::rr::domain::Name,
182 ) -> io::Result<DnsName> {
183 self.parse_dns_name(&name.to_ascii())
184 }
185}
186
187impl std::str::FromStr for List {
188 type Err = io::Error;
189
190 fn from_str(s: &str) -> io::Result<Self> {
191 Self::build(s)
192 }
193}
194
195impl DnsName {
196 fn new(name: String, suffix: Option<Range<usize>>, root: Option<Range<usize>>) -> DnsName {
197 let rname = name.chars().rev().collect::<String>();
198
199 let registrable = if let (Some(suffix), Some(root)) = (suffix.as_ref(), root.as_ref()) {
200 Some(Range {
201 start: root.start,
202 end: suffix.start - 1,
203 })
204 } else {
205 None
206 };
207
208 DnsName {
209 name,
210 rname,
211 root,
212 suffix,
213 registrable,
214 }
215 }
216
217 fn subname_length(input: &str, s_len: usize) -> usize {
221 let len = input
222 .trim_end_matches('.')
223 .split('.')
224 .rev()
225 .take(s_len)
226 .fold(0, |acc, part| acc + part.len());
227
228 len + (s_len - 1)
230 }
231
232 fn find_match(input: &str, list: &List) -> io::Result<DnsName> {
234 if input.len() == 1 && input.starts_with('.') {
236 return Ok(DnsName::new(input.to_owned(), None, None));
237 }
238
239 if input.starts_with('.') {
241 return Err(io::Error::new(io::ErrorKind::InvalidInput, "invalid name"));
242 }
243
244 let mut longest_valid = None;
245 let mut current = &list.root;
246 let mut s_labels_len = 0;
247
248 let input = input.to_ascii_lowercase();
249 let domain = input.trim_end_matches('.');
250
251 for label in domain.split('.') {
253 if label.is_empty() || label.contains(' ') {
254 return Err(io::Error::new(io::ErrorKind::InvalidInput, "invalid name"));
255 }
256 }
257
258 for label in domain.rsplit('.') {
259 if let Some(child) = current.children.get(label) {
260 current = child;
261 s_labels_len += 1;
262 } else if let Some(child) = current.children.get("*") {
263 current = child;
265 s_labels_len += 1;
266 } else {
267 break;
269 }
270
271 if let Some(list_leaf) = ¤t.leaf {
272 longest_valid = Some((list_leaf, s_labels_len));
273 }
274 }
275
276 match longest_valid {
277 Some((leaf, suffix_len)) => {
278 let suffix_len = if leaf.is_exception_rule {
279 suffix_len - 1
280 } else {
281 suffix_len
282 };
283
284 let suffix = Some(Range {
285 start: domain.len() - Self::subname_length(domain, suffix_len),
286 end: domain.len(),
287 });
288
289 let d_labels_len = domain.match_indices('.').count() + 1;
290
291 let registrable = if d_labels_len > suffix_len {
292 Some(Range {
293 start: domain.len() - Self::subname_length(domain, suffix_len + 1),
294 end: domain.len(),
295 })
296 } else {
297 None
298 };
299
300 Ok(DnsName::new(input, suffix, registrable))
301 }
302 None => Ok(DnsName::new(input, None, None)),
303 }
304 }
305
306 fn parse(domain: &str, list: &List) -> io::Result<DnsName> {
308 Self::find_match(domain, list)
309 }
310
311 pub fn name(&self) -> &str {
320 &self.name
321 }
322
323 pub fn rname(&self) -> &str {
331 &self.rname
332 }
333
334 pub fn root(&self) -> Option<&str> {
342 match self.root {
343 Some(ref root) if root.start < self.name.len() => Some(&self.name[root.start..]),
344 _ => None,
345 }
346 }
347
348 pub fn suffix(&self) -> Option<&str> {
356 match self.suffix {
357 Some(ref suffix) if suffix.start < self.name.len() => Some(&self.name[suffix.start..]),
358 _ => None,
359 }
360 }
361
362 pub fn registrable(&self) -> Option<&str> {
370 match self.registrable {
371 Some(ref registrable)
372 if registrable.start < self.name.len() && registrable.end < self.name.len() =>
373 {
374 Some(&self.name[registrable.start..registrable.end])
375 }
376 _ => None,
377 }
378 }
379}
380
381impl fmt::Display for DnsName {
382 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
383 write!(f, "{}", self.name.trim_end_matches('.').to_lowercase())
384 }
385}
386
387#[cfg(test)]
388mod unit_tests {
389 use super::*;
390
391 #[test]
392 fn dnsname() -> Result<(), std::io::Error> {
393 let list = List::from_path("suffix-list.txt").unwrap();
394
395 let domain = list.parse_dns_name("www.example.com")?;
396 assert_eq!(domain.name(), "www.example.com");
397 assert_eq!(domain.rname(), "moc.elpmaxe.www");
398 assert_eq!(domain.root(), Some("example.com"));
399 assert_eq!(domain.suffix(), Some("com"));
400 assert_eq!(domain.registrable(), Some("example"));
401
402 let domain = list.parse_dns_name("wWw.BlUeCaTnEtWoRkS.Uk.CoM.")?;
404 assert_eq!(domain.name(), "www.bluecatnetworks.uk.com.");
405 assert_eq!(domain.rname(), ".moc.ku.skrowtentaceulb.www");
406 assert_eq!(domain.root(), Some("bluecatnetworks.uk.com."));
407 assert_eq!(domain.suffix(), Some("uk.com."));
408 assert_eq!(domain.registrable(), Some("bluecatnetworks"));
409
410 let domain = list.parse_dns_name(".")?;
412 assert_eq!(domain.name(), ".");
413 assert_eq!(domain.rname(), ".");
414 assert_eq!(domain.root(), None);
415 assert_eq!(domain.suffix(), None);
416 assert_eq!(domain.registrable(), None);
417
418 Ok(())
419 }
420
421 #[test]
422 fn trustdns() -> Result<(), std::io::Error> {
423 use hickory_proto::rr::domain::Name;
424 use std::str::FromStr;
425 let list = List::from_path("suffix-list.txt").unwrap();
426
427 let domain = list.from_trustdns_name(&Name::from_str("a.b.c").unwrap())?;
428 assert_eq!(domain.name(), "a.b.c");
429 assert_eq!(domain.rname(), "c.b.a");
430 assert_eq!(domain.root(), Some("b.c"));
431 assert_eq!(domain.suffix(), Some("c"));
432
433 let domain = list.from_trustdns_name(&Name::from_str("a.♥").unwrap())?;
435 assert_eq!(domain.name(), "a.xn--g6h");
436 assert_eq!(domain.root(), Some("a.xn--g6h"));
437 assert_eq!(domain.suffix(), Some("xn--g6h"));
438
439 Ok(())
440 }
441
442 fn make_list() -> List {
443 let list = List::from_path("suffix-list.txt").unwrap();
444
445 let body = File::open("tests.txt")
446 .and_then(|mut data| {
447 let mut res = String::new();
448 data.read_to_string(&mut res)?;
449 Ok(res)
450 })
451 .unwrap();
452
453 let mut parse = false;
454
455 for (i, line) in body.lines().enumerate() {
456 match line {
457 line if line.trim().is_empty() => {
458 parse = true;
459 continue;
460 }
461 line if line.starts_with("//") => {
462 continue;
463 }
464 line => {
465 if !parse {
466 continue;
467 }
468 let mut test = line.split_whitespace().peekable();
469 if test.peek().is_none() {
470 continue;
471 }
472 let input = match test.next() {
473 Some("null") => "",
474 Some(res) => res,
475 None => {
476 panic!("line {i} of the test file doesn't seem to be valid");
477 }
478 };
479 let (expected_root, expected_suffix) = match test.next() {
480 Some("null") => (None, None),
481 Some(root) => {
482 let suffix = {
483 let parts: Vec<&str> = root.split('.').rev().collect();
484 parts[..parts.len() - 1]
485 .iter()
486 .rev()
487 .copied()
488 .collect::<Vec<_>>()
489 .join(".")
490 };
491 (Some(root.to_string()), Some(suffix.to_string()))
492 }
493 None => {
494 panic!("line {i} of the test file doesn't seem to be valid");
495 }
496 };
497 let (found_root, found_suffix) = match list.parse_domain(input) {
498 Ok(domain) => {
499 let found_root = domain.root().map(|found| found.to_string());
500 let found_suffix = domain.suffix().map(|found| found.to_string());
501 (found_root, found_suffix)
502 }
503 Err(_) => (None, None),
504 };
505 if expected_root != found_root
506 || (expected_root.is_some() && expected_suffix != found_suffix)
507 {
508 let msg = format!(
509 "\n\nGiven `{}`:\nWe expected root domain to be `{:?}` and suffix be \
510 `{:?}`\nBut instead, we have `{:?}` as root domain and `{:?}` as \
511 suffix.\nWe are on line {} of `test_psl.txt`.\n\n",
512 input,
513 expected_root,
514 expected_suffix,
515 found_root,
516 found_suffix,
517 i + 1
518 );
519 panic!("{}", msg);
520 }
521 }
522 }
523 }
524 list
525 }
526
527 #[test]
528 fn allow_qualified_domain_names() {
529 let list = make_list();
530 assert!(list.parse_domain("example.com.").is_ok());
531 }
532
533 #[test]
534 fn allow_single_label_trailing_dot() {
535 let list = make_list();
536 assert!(list.parse_domain("com.").is_ok());
537 }
538
539 #[test]
540 fn have_suffix_single_label_domains() {
541 let list = make_list();
542 let domains = vec![
543 "com",
545 "saarland",
546 "museum.",
547 "localhost",
549 "madeup",
550 "with-dot.",
551 ];
552 for domain in domains {
553 let res = list.parse_domain(domain).unwrap();
554 assert_eq!(res.suffix(), Some(domain));
555 assert!(res.root().is_none());
556 }
557 }
558
559 #[test]
560 fn no_empty_labels() {
561 let list = make_list();
562 assert!(list.parse_domain("exa..mple.com").is_err());
563 }
564 #[test]
565 fn no_spaces() {
566 let list = make_list();
567 assert!(list.parse_domain("exa mple.com").is_err());
568 }
569
570 #[test]
571 fn no_fwd_slash() {
572 let list = make_list();
573 assert!(list.parse_domain("exa/mple.com").is_ok());
574 }
575
576 #[test]
577 fn no_ipv4() {
578 let list = make_list();
579 assert!(list.parse_domain("127.38.53.247").is_ok());
580 }
581 #[test]
582 fn no_ipv6() {
583 let list = make_list();
584 assert!(list
585 .parse_domain("fd79:cdcb:38cc:9dd:f686:e06d:32f3:c123")
586 .is_ok());
587 }
588 #[test]
589 fn label_max_127() {
590 let list = make_list();
591 let mut too_many_labels_domain = String::from("a");
592 for _ in 0..126 {
593 too_many_labels_domain.push_str(".a");
594 }
595 too_many_labels_domain.push_str(".com");
596 assert!(list.parse_domain(&too_many_labels_domain).is_ok());
597 }
598
599 #[test]
600 fn choose_longest_valid() {
601 let list = make_list();
602 let domain = list.parse_domain("foo.builder.nu").unwrap();
603 assert_eq!(Some("nu"), domain.suffix());
604 assert_eq!(Some("builder.nu"), domain.root());
605
606 let domain = list.parse_domain("foo.fbsbx.com").unwrap();
607 assert_eq!(Some("com"), domain.suffix());
608 assert_eq!(Some("fbsbx.com"), domain.root());
609 }
610
611 #[test]
612 fn allow_num_only_labels() {
613 let list = make_list();
614 assert!(list.parse_domain("127.com").is_ok());
615 }
616}