dns_name/
lib.rs

1//! Robust domain name parsing using the Public Suffix List
2//!
3//! This library allows you to easily and accurately parse any given domain
4//! name.
5//! ```
6//! use dns_name::List;
7//!
8//! let list = List::from_path("suffix-list.txt").unwrap();
9
10//! let domain = list.parse_dns_name("www.example.com").unwrap();
11//! assert_eq!(domain.name(), "www.example.com");
12//! assert_eq!(domain.rname(), "moc.elpmaxe.www");
13//! assert_eq!(domain.root(), Some("example.com"));
14//! assert_eq!(domain.suffix(), Some("com"));
15//! assert_eq!(domain.registrable(), Some("example"));
16
17//! // 2-level TLD
18//! let domain = list.parse_dns_name("wWw.BlUeCaTnEtWoRkS.Uk.CoM.").unwrap();
19//! assert_eq!(domain.name(), "www.bluecatnetworks.uk.com.");
20//! assert_eq!(domain.rname(), ".moc.ku.skrowtentaceulb.www");
21//! assert_eq!(domain.root(), Some("bluecatnetworks.uk.com."));
22//! assert_eq!(domain.suffix(), Some("uk.com."));
23//! assert_eq!(domain.registrable(), Some("bluecatnetworks"));
24
25//! // the root name
26//! let domain = list.parse_dns_name(".").unwrap();
27//! assert_eq!(domain.name(), ".");
28//! assert_eq!(domain.rname(), ".");
29//! assert_eq!(domain.root(), None);
30//! assert_eq!(domain.suffix(), None);
31//! assert_eq!(domain.registrable(), None);
32//! ```
33
34use std::{
35    collections::HashMap,
36    fmt,
37    fs::File,
38    io::{self, Read},
39    ops::Range,
40    path::Path,
41};
42
43const PREVAILING_STAR_RULE: &str = "*";
44
45#[derive(Debug)]
46// A node leaf
47struct ListLeaf {
48    is_exception_rule: bool,
49}
50
51impl ListLeaf {
52    /// Creates a new `ListLeaf`
53    fn new(is_exception_rule: bool) -> Self {
54        Self { is_exception_rule }
55    }
56}
57
58#[derive(Debug)]
59/// A List node
60struct ListNode {
61    children: HashMap<String, ListNode>,
62    leaf: Option<ListLeaf>,
63}
64
65impl ListNode {
66    /// Creates a new `ListNode`
67    fn new() -> Self {
68        Self {
69            children: HashMap::new(),
70            leaf: None,
71        }
72    }
73}
74
75/// Stores the public suffix list
76#[derive(Debug)]
77pub struct List {
78    root: ListNode,
79}
80
81/// Holds information about a particular DNS name
82///
83/// This is created by `List::parse_domain`.
84#[derive(Debug, Clone, PartialEq, Eq, Hash)]
85pub struct DnsName {
86    /// full name: foo.example.com
87    name: String,
88    /// name, reversed by character: moc.elpmaxe.oof
89    rname: String,
90    /// suffix: com
91    suffix: Option<Range<usize>>,
92    /// root: example.com
93    root: Option<Range<usize>>,
94    /// registrable: example
95    registrable: Option<Range<usize>>,
96}
97
98impl List {
99    fn append(&mut self, mut rule: &str) -> io::Result<()> {
100        let mut is_exception_rule = false;
101        if rule.starts_with('!') {
102            is_exception_rule = true;
103            rule = &rule[1..];
104        }
105
106        let mut current = &mut self.root;
107        for label in rule.rsplit('.') {
108            if label.is_empty() {
109                return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid rule"));
110            }
111
112            let cur = current;
113            current = cur
114                .children
115                .entry(label.to_owned())
116                .or_insert_with(ListNode::new);
117        }
118
119        current.leaf = Some(ListLeaf::new(is_exception_rule));
120
121        Ok(())
122    }
123
124    fn build(res: &str) -> io::Result<List> {
125        let mut list = List::empty();
126        for rule in res.split(',') {
127            list.append(rule)?;
128        }
129        if list.root.children.is_empty() {
130            return Err(io::Error::new(io::ErrorKind::NotFound, "invalid list"));
131        }
132        list.append(PREVAILING_STAR_RULE)?; // add the default rule
133        Ok(list)
134    }
135
136    /// Creates an empty List without any rules
137    pub fn empty() -> List {
138        List {
139            root: ListNode::new(),
140        }
141    }
142
143    /// Fetch the list from a local file
144    pub fn from_path<P: AsRef<Path>>(path: P) -> io::Result<List> {
145        File::open(path)
146            .and_then(|mut data| {
147                let mut res = String::new();
148                data.read_to_string(&mut res)?;
149                Ok(res)
150            })
151            .and_then(|s| s.parse::<List>())
152    }
153
154    /// Build the list from the result of anything that implements
155    /// `std::io::Read`
156    ///
157    /// If you don't already have your list on the filesystem but want to use
158    /// your own library to fetch the list you can use this method so you
159    /// don't have to save it first.
160    pub fn from_reader<R: Read>(mut reader: R) -> io::Result<List> {
161        let mut res = String::new();
162        reader.read_to_string(&mut res)?;
163        Self::build(&res)
164    }
165
166    /// Parses a domain using the list (API backwards compat)
167    pub fn parse_domain(&self, domain: &str) -> io::Result<DnsName> {
168        DnsName::parse(domain, self)
169    }
170
171    /// Parses a DNS name using the list
172    pub fn parse_dns_name(&self, domain: &str) -> io::Result<DnsName> {
173        DnsName::parse(domain, self)
174    }
175
176    /// Converts a TrustDNS [`Name`] into a `DnsName`
177    ///
178    /// [`Name`]: trust_dns_proto::rr::domain::Name
179    pub fn from_trustdns_name(
180        &self,
181        name: &hickory_proto::rr::domain::Name,
182    ) -> io::Result<DnsName> {
183        self.parse_dns_name(&name.to_ascii())
184    }
185}
186
187impl std::str::FromStr for List {
188    type Err = io::Error;
189
190    fn from_str(s: &str) -> io::Result<Self> {
191        Self::build(s)
192    }
193}
194
195impl DnsName {
196    fn new(name: String, suffix: Option<Range<usize>>, root: Option<Range<usize>>) -> DnsName {
197        let rname = name.chars().rev().collect::<String>();
198
199        let registrable = if let (Some(suffix), Some(root)) = (suffix.as_ref(), root.as_ref()) {
200            Some(Range {
201                start: root.start,
202                end: suffix.start - 1,
203            })
204        } else {
205            None
206        };
207
208        DnsName {
209            name,
210            rname,
211            root,
212            suffix,
213            registrable,
214        }
215    }
216
217    /// Counts the length of 1 or more labels, counting from reverse
218    ///
219    /// ("b.example.uk.com", 2) -> "uk.com" -> 6
220    fn subname_length(input: &str, s_len: usize) -> usize {
221        let len = input
222            .trim_end_matches('.')
223            .split('.')
224            .rev()
225            .take(s_len)
226            .fold(0, |acc, part| acc + part.len());
227
228        // Add in "." seperators
229        len + (s_len - 1)
230    }
231
232    /// Finds a match in the Public Suffix list
233    fn find_match(input: &str, list: &List) -> io::Result<DnsName> {
234        // root domain is permitted
235        if input.len() == 1 && input.starts_with('.') {
236            return Ok(DnsName::new(input.to_owned(), None, None));
237        }
238
239        // a name cannot start with '.'
240        if input.starts_with('.') {
241            return Err(io::Error::new(io::ErrorKind::InvalidInput, "invalid name"));
242        }
243
244        let mut longest_valid = None;
245        let mut current = &list.root;
246        let mut s_labels_len = 0;
247
248        let input = input.to_ascii_lowercase();
249        let domain = input.trim_end_matches('.');
250
251        // very basic sanity check the labels
252        for label in domain.split('.') {
253            if label.is_empty() || label.contains(' ') {
254                return Err(io::Error::new(io::ErrorKind::InvalidInput, "invalid name"));
255            }
256        }
257
258        for label in domain.rsplit('.') {
259            if let Some(child) = current.children.get(label) {
260                current = child;
261                s_labels_len += 1;
262            } else if let Some(child) = current.children.get("*") {
263                // wildcard rule
264                current = child;
265                s_labels_len += 1;
266            } else {
267                // no match rules
268                break;
269            }
270
271            if let Some(list_leaf) = &current.leaf {
272                longest_valid = Some((list_leaf, s_labels_len));
273            }
274        }
275
276        match longest_valid {
277            Some((leaf, suffix_len)) => {
278                let suffix_len = if leaf.is_exception_rule {
279                    suffix_len - 1
280                } else {
281                    suffix_len
282                };
283
284                let suffix = Some(Range {
285                    start: domain.len() - Self::subname_length(domain, suffix_len),
286                    end: domain.len(),
287                });
288
289                let d_labels_len = domain.match_indices('.').count() + 1;
290
291                let registrable = if d_labels_len > suffix_len {
292                    Some(Range {
293                        start: domain.len() - Self::subname_length(domain, suffix_len + 1),
294                        end: domain.len(),
295                    })
296                } else {
297                    None
298                };
299
300                Ok(DnsName::new(input, suffix, registrable))
301            }
302            None => Ok(DnsName::new(input, None, None)),
303        }
304    }
305
306    /// Parses a DNS name using the list
307    fn parse(domain: &str, list: &List) -> io::Result<DnsName> {
308        Self::find_match(domain, list)
309    }
310
311    /// Get the DNS name
312    ///
313    /// ```rust
314    /// # use dns_name::{List, DnsName};
315    /// let list = List::empty();
316    /// let name = list.parse_domain("www.example.com").unwrap();
317    /// assert_eq!(name.name(), "www.example.com");
318    /// ```
319    pub fn name(&self) -> &str {
320        &self.name
321    }
322
323    /// Get the DNS name in character reversed order
324    /// ```rust
325    /// # use dns_name::{List, DnsName};
326    /// let list = List::empty();
327    /// let name = list.parse_domain("www.example.com").unwrap();
328    /// assert_eq!(name.rname(), "moc.elpmaxe.www");
329    /// ```
330    pub fn rname(&self) -> &str {
331        &self.rname
332    }
333
334    /// Gets the root domain portion of the Name
335    /// ```should_panic
336    /// # use dns_name::{List, DnsName};
337    /// let list = List::empty();
338    /// let name = list.parse_domain("www.example.com").unwrap();
339    /// assert_eq!(name.root(), Some("example.com"));
340    /// ```
341    pub fn root(&self) -> Option<&str> {
342        match self.root {
343            Some(ref root) if root.start < self.name.len() => Some(&self.name[root.start..]),
344            _ => None,
345        }
346    }
347
348    /// Gets the suffix portion of the Name
349    /// ```should_panic
350    /// # use dns_name::{List, DnsName};
351    /// let list = List::empty();
352    /// let name = list.parse_domain("www.example.com").unwrap();
353    /// assert_eq!(name.root(), Some("com"));
354    /// ```
355    pub fn suffix(&self) -> Option<&str> {
356        match self.suffix {
357            Some(ref suffix) if suffix.start < self.name.len() => Some(&self.name[suffix.start..]),
358            _ => None,
359        }
360    }
361
362    /// Gets the registrable portion of the Name
363    /// ```should_panic
364    /// # use dns_name::{List, DnsName};
365    /// let list = List::empty();
366    /// let name = list.parse_domain("www.example.com").unwrap();
367    /// assert_eq!(name.root(), Some("example"));
368    /// ```
369    pub fn registrable(&self) -> Option<&str> {
370        match self.registrable {
371            Some(ref registrable)
372                if registrable.start < self.name.len() && registrable.end < self.name.len() =>
373            {
374                Some(&self.name[registrable.start..registrable.end])
375            }
376            _ => None,
377        }
378    }
379}
380
381impl fmt::Display for DnsName {
382    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
383        write!(f, "{}", self.name.trim_end_matches('.').to_lowercase())
384    }
385}
386
387#[cfg(test)]
388mod unit_tests {
389    use super::*;
390
391    #[test]
392    fn dnsname() -> Result<(), std::io::Error> {
393        let list = List::from_path("suffix-list.txt").unwrap();
394
395        let domain = list.parse_dns_name("www.example.com")?;
396        assert_eq!(domain.name(), "www.example.com");
397        assert_eq!(domain.rname(), "moc.elpmaxe.www");
398        assert_eq!(domain.root(), Some("example.com"));
399        assert_eq!(domain.suffix(), Some("com"));
400        assert_eq!(domain.registrable(), Some("example"));
401
402        // 2-level TLD
403        let domain = list.parse_dns_name("wWw.BlUeCaTnEtWoRkS.Uk.CoM.")?;
404        assert_eq!(domain.name(), "www.bluecatnetworks.uk.com.");
405        assert_eq!(domain.rname(), ".moc.ku.skrowtentaceulb.www");
406        assert_eq!(domain.root(), Some("bluecatnetworks.uk.com."));
407        assert_eq!(domain.suffix(), Some("uk.com."));
408        assert_eq!(domain.registrable(), Some("bluecatnetworks"));
409
410        // the root name
411        let domain = list.parse_dns_name(".")?;
412        assert_eq!(domain.name(), ".");
413        assert_eq!(domain.rname(), ".");
414        assert_eq!(domain.root(), None);
415        assert_eq!(domain.suffix(), None);
416        assert_eq!(domain.registrable(), None);
417
418        Ok(())
419    }
420
421    #[test]
422    fn trustdns() -> Result<(), std::io::Error> {
423        use hickory_proto::rr::domain::Name;
424        use std::str::FromStr;
425        let list = List::from_path("suffix-list.txt").unwrap();
426
427        let domain = list.from_trustdns_name(&Name::from_str("a.b.c").unwrap())?;
428        assert_eq!(domain.name(), "a.b.c");
429        assert_eq!(domain.rname(), "c.b.a");
430        assert_eq!(domain.root(), Some("b.c"));
431        assert_eq!(domain.suffix(), Some("c"));
432
433        // conversion to ascii
434        let domain = list.from_trustdns_name(&Name::from_str("a.♥").unwrap())?;
435        assert_eq!(domain.name(), "a.xn--g6h");
436        assert_eq!(domain.root(), Some("a.xn--g6h"));
437        assert_eq!(domain.suffix(), Some("xn--g6h"));
438
439        Ok(())
440    }
441
442    fn make_list() -> List {
443        let list = List::from_path("suffix-list.txt").unwrap();
444
445        let body = File::open("tests.txt")
446            .and_then(|mut data| {
447                let mut res = String::new();
448                data.read_to_string(&mut res)?;
449                Ok(res)
450            })
451            .unwrap();
452
453        let mut parse = false;
454
455        for (i, line) in body.lines().enumerate() {
456            match line {
457                line if line.trim().is_empty() => {
458                    parse = true;
459                    continue;
460                }
461                line if line.starts_with("//") => {
462                    continue;
463                }
464                line => {
465                    if !parse {
466                        continue;
467                    }
468                    let mut test = line.split_whitespace().peekable();
469                    if test.peek().is_none() {
470                        continue;
471                    }
472                    let input = match test.next() {
473                        Some("null") => "",
474                        Some(res) => res,
475                        None => {
476                            panic!("line {i} of the test file doesn't seem to be valid");
477                        }
478                    };
479                    let (expected_root, expected_suffix) = match test.next() {
480                        Some("null") => (None, None),
481                        Some(root) => {
482                            let suffix = {
483                                let parts: Vec<&str> = root.split('.').rev().collect();
484                                parts[..parts.len() - 1]
485                                    .iter()
486                                    .rev()
487                                    .copied()
488                                    .collect::<Vec<_>>()
489                                    .join(".")
490                            };
491                            (Some(root.to_string()), Some(suffix.to_string()))
492                        }
493                        None => {
494                            panic!("line {i} of the test file doesn't seem to be valid");
495                        }
496                    };
497                    let (found_root, found_suffix) = match list.parse_domain(input) {
498                        Ok(domain) => {
499                            let found_root = domain.root().map(|found| found.to_string());
500                            let found_suffix = domain.suffix().map(|found| found.to_string());
501                            (found_root, found_suffix)
502                        }
503                        Err(_) => (None, None),
504                    };
505                    if expected_root != found_root
506                        || (expected_root.is_some() && expected_suffix != found_suffix)
507                    {
508                        let msg = format!(
509                            "\n\nGiven `{}`:\nWe expected root domain to be `{:?}` and suffix be \
510                             `{:?}`\nBut instead, we have `{:?}` as root domain and `{:?}` as \
511                             suffix.\nWe are on line {} of `test_psl.txt`.\n\n",
512                            input,
513                            expected_root,
514                            expected_suffix,
515                            found_root,
516                            found_suffix,
517                            i + 1
518                        );
519                        panic!("{}", msg);
520                    }
521                }
522            }
523        }
524        list
525    }
526
527    #[test]
528    fn allow_qualified_domain_names() {
529        let list = make_list();
530        assert!(list.parse_domain("example.com.").is_ok());
531    }
532
533    #[test]
534    fn allow_single_label_trailing_dot() {
535        let list = make_list();
536        assert!(list.parse_domain("com.").is_ok());
537    }
538
539    #[test]
540    fn have_suffix_single_label_domains() {
541        let list = make_list();
542        let domains = vec![
543            // real TLDs
544            "com",
545            "saarland",
546            "museum.",
547            // non-existant TLDs
548            "localhost",
549            "madeup",
550            "with-dot.",
551        ];
552        for domain in domains {
553            let res = list.parse_domain(domain).unwrap();
554            assert_eq!(res.suffix(), Some(domain));
555            assert!(res.root().is_none());
556        }
557    }
558
559    #[test]
560    fn no_empty_labels() {
561        let list = make_list();
562        assert!(list.parse_domain("exa..mple.com").is_err());
563    }
564    #[test]
565    fn no_spaces() {
566        let list = make_list();
567        assert!(list.parse_domain("exa mple.com").is_err());
568    }
569
570    #[test]
571    fn no_fwd_slash() {
572        let list = make_list();
573        assert!(list.parse_domain("exa/mple.com").is_ok());
574    }
575
576    #[test]
577    fn no_ipv4() {
578        let list = make_list();
579        assert!(list.parse_domain("127.38.53.247").is_ok());
580    }
581    #[test]
582    fn no_ipv6() {
583        let list = make_list();
584        assert!(list
585            .parse_domain("fd79:cdcb:38cc:9dd:f686:e06d:32f3:c123")
586            .is_ok());
587    }
588    #[test]
589    fn label_max_127() {
590        let list = make_list();
591        let mut too_many_labels_domain = String::from("a");
592        for _ in 0..126 {
593            too_many_labels_domain.push_str(".a");
594        }
595        too_many_labels_domain.push_str(".com");
596        assert!(list.parse_domain(&too_many_labels_domain).is_ok());
597    }
598
599    #[test]
600    fn choose_longest_valid() {
601        let list = make_list();
602        let domain = list.parse_domain("foo.builder.nu").unwrap();
603        assert_eq!(Some("nu"), domain.suffix());
604        assert_eq!(Some("builder.nu"), domain.root());
605
606        let domain = list.parse_domain("foo.fbsbx.com").unwrap();
607        assert_eq!(Some("com"), domain.suffix());
608        assert_eq!(Some("fbsbx.com"), domain.root());
609    }
610
611    #[test]
612    fn allow_num_only_labels() {
613        let list = make_list();
614        assert!(list.parse_domain("127.com").is_ok());
615    }
616}