domain_lookup_tree/
lib.rs

1use std::collections::HashMap;
2
3/// DomainLookupTree is a data structure which provides efficient domain name lookup matching with
4/// support for wildcard entries.
5///
6/// Requirements for this implementation:
7/// - Given a domain name, determine if it matches an entry in the tree
8/// - There can be an ever-growing amount of tree entries
9/// - Entries can be absolute matches, e.g.: www.google.com
10/// - Entries may be wildcard entries, which is denoted in the entry by providing a leading dot,
11///   e.g.: .twitter.com, .en.wikipedia.org, .giggl.app
12/// - Wilcard entries can not be embedded
13///
14/// To achieve this, we implement a simple tree-style structure which has a root structure that
15/// contains a vector of nodes. These nodes can then contain other node decendants, and also be
16/// marked as "wildcard" which means theres a rule that matches that domain level and all of its
17/// decendants.
18///
19/// If, when performing a lookup, the search domain contains segments deeper than the wildcard
20/// match, it can continue to traverse the tree until it exhausts its lookup options. At that
21/// point, the deepest wildcard entry found would be returned, if no absolute match was found.
22///
23/// It's good to keep in mind that, when traversing the tree, domain names are sorted by top level
24/// to infinite n-level, or in simpler terms, in reverse. This means that if "google.com" is looked
25/// up in the tree, it would split by ".", reverse the vector, then first perform a root node
26/// lookup for "com", and so on.
27///
28/// Walking down the tree - the story of a lookup:
29/// Let's say have a DomainLookupTree with an entry ".giggl.app" which means that the tree looks
30/// like this:
31///
32/// app
33/// └── giggl [wildcard]
34///
35/// A domain lookup for "canary.giggl.app" is requested. First, "app" is matched, but it's not a
36/// wildcard, so it's ignored. We now check the decendants of "app" for "giggl" - it matches, and
37/// it's a wildcard match, so we store it within the context of the lookup. This lookup will now
38/// 100% return a match, even if it isn't absolute. Anyway, we now check the decendants of "giggl"
39/// for "canary", though it doesn't exist, and the traversal ends. Now, we didn't have an absolute
40/// match, but we did have a wildcard match earlier on for ".giggl.app", so we successfully return
41/// the result ".giggl.app" from the lookup function.
42///
43///
44
45type NodeList = HashMap<String, Node>;
46
47#[derive(Debug)]
48pub struct DomainLookupTree {
49    nodes: NodeList,
50    minimum_level: usize,
51}
52
53#[derive(Debug)]
54pub struct Node {
55    wildcard: bool,
56    nodes: NodeList,
57    data: String,
58}
59
60impl Node {
61    fn new(wildcard: bool, data: &str) -> Self {
62        Self {
63            wildcard,
64            nodes: Default::default(),
65            data: data.to_owned(),
66        }
67    }
68}
69
70impl DomainLookupTree {
71    pub fn new() -> DomainLookupTree {
72        DomainLookupTree {
73            nodes: Default::default(),
74            minimum_level: 0,
75        }
76    }
77
78    pub fn insert(&mut self, domain: &str) {
79        let is_wildcard = domain.starts_with(".");
80        let segments = domain_to_rseg(domain);
81        let n_segments = segments.len();
82
83        let mut head = &mut self.nodes;
84        for (i, segment) in segments.iter().copied().enumerate() {
85            let node = head
86                .entry(segment.to_owned())
87                .or_insert_with(|| Node::new(i == n_segments - 2 && is_wildcard, segment));
88
89            if i == n_segments - 2 && is_wildcard {
90                return;
91            }
92
93            head = &mut node.nodes;
94        }
95    }
96
97    pub fn lookup(&self, domain: &str) -> Option<String> {
98        match self.traverse(domain) {
99            None => None,
100            Some((fqdn, node)) => {
101                return Some(format!("{}{}", if node.wildcard { "." } else { "" }, fqdn))
102            }
103        }
104    }
105
106    pub fn traverse(&self, domain: &str) -> Option<(String, &Node)> {
107        let segments = domain_to_rseg(domain);
108        let mut wildcard_match = None;
109        // We start the traversal at the root
110        let mut head: &NodeList = &self.nodes;
111
112        let mut fqdn = String::new();
113
114        // We traverse the tree in level-reverse order
115        for (i, segment) in segments.iter().copied().enumerate() {
116            // Now we look up the children of the latest matched node
117            // If this is the first iteration, then it's the root NodeList
118            if let Some(child) = head.get(segment) {
119                fqdn = format!(
120                    "{}{}{}",
121                    segment.to_owned(),
122                    if i == 0 { "" } else { "." },
123                    fqdn
124                );
125                head = &child.nodes;
126                // We have exhausted the traversal. If the traversal depth is equal to the segment
127                // length, then we've found an absolute match!
128                if i == segments.len() - 1 {
129                    return Some((fqdn, child));
130                } else if child.wildcard {
131                    // Current node is wildcard, so we now 100% have a value to return
132                    wildcard_match = Some(child);
133                }
134            } else {
135                // We have exhausted the traversal.
136                break;
137            }
138        }
139
140        if let Some(m) = wildcard_match {
141            Some((fqdn, m))
142        } else {
143            None
144        }
145    }
146}
147
148fn domain_to_rseg(domain: &str) -> Vec<&str> {
149    domain.rsplit(".").collect::<Vec<&str>>()
150}