1#![warn(missing_docs)]
13
14pub use crate::suffix_list::{Source, SuffixList};
15pub use error::{Result, TLDExtractError};
16#[cfg(feature = "serde")]
17use serde::{Deserialize, Serialize};
18use std::collections::HashMap;
19use std::ops::Index;
20
21mod error;
22mod snapshot;
23mod suffix_list;
24
25#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
27#[derive(Debug, Clone)]
28pub struct TLDTrieTree {
29 node: HashMap<String, TLDTrieTree>,
31 end: bool,
33}
34
35impl TLDTrieTree {
36 #[inline]
38 fn insert(&mut self, keys: Vec<&str>) {
39 let keys_len = keys.len();
40 let mut current_node = &mut self.node;
41 for (index, mut key) in keys.clone().into_iter().enumerate() {
42 let mut is_exclude = false;
43 if index == keys_len - 1 && key.starts_with('!') {
45 key = &key[1..];
46 is_exclude = true;
47 }
48 let next_node = current_node.entry(key.to_string()).or_insert(TLDTrieTree {
50 node: Default::default(),
51 end: false,
52 });
53 if !is_exclude && (index == keys_len - 1)
55 || (key != "*" && index == keys_len - 2 && keys[index + 1] == "*")
57 {
58 next_node.end = true;
59 }
60 current_node = &mut next_node.node;
61 }
62 }
63 #[inline]
65 fn search(&self, keys: &[String]) -> Vec<Suffix> {
66 let mut suffix_list = Vec::new();
67 let mut current_node = &self.node;
68 for key in keys.iter() {
69 match current_node.get(key) {
70 Some(next_node) => {
71 suffix_list.push(Suffix {
72 suffix: key.to_string(),
73 end: next_node.end,
74 });
75 current_node = &next_node.node;
76 }
77 None => {
78 if let Some(next_node) = current_node.get("*") {
79 suffix_list.push(Suffix {
80 suffix: key.to_string(),
81 end: next_node.end,
82 });
83 }
84 break;
85 }
86 }
87 }
88 suffix_list
89 }
90}
91
92#[derive(Debug)]
93struct Suffix {
94 suffix: String,
95 end: bool,
96}
97
98#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
100#[derive(Clone, Debug, Default)]
101pub struct ExtractResult {
102 pub subdomain: Option<String>,
104 pub domain: Option<String>,
106 pub suffix: Option<String>,
108 pub registered_domain: Option<String>,
110}
111
112#[derive(Debug)]
114pub struct TLDExtract {
115 suffix_list: SuffixList,
116 tld_trie: TLDTrieTree,
117 domain_to_unicode: bool,
118}
119
120impl Default for TLDExtract {
121 fn default() -> Self {
122 let mut suffix = SuffixList::default();
123 let trie = suffix.build().expect("default trie build error");
124 TLDExtract {
125 suffix_list: suffix.clone(),
126 tld_trie: trie,
127 domain_to_unicode: true,
128 }
129 }
130}
131
132impl TLDExtract {
133 #[inline]
135 pub fn new(suffix: SuffixList, domain_to_unicode: bool) -> Result<Self> {
136 let mut new_suffix = suffix;
137 let trie = new_suffix.build()?;
138 Ok(TLDExtract {
139 suffix_list: new_suffix.clone(),
140 tld_trie: trie,
141 domain_to_unicode,
142 })
143 }
144 #[inline]
146 pub fn update(&mut self, suffix: Option<SuffixList>) {
147 if let Some(new_suffix) = suffix {
148 self.suffix_list = new_suffix;
149 }
150 let backup_tld_trie = self.tld_trie.clone();
151 match self.suffix_list.build() {
152 Ok(trie) => {
153 self.tld_trie = trie;
154 }
155 Err(_err) => {
156 self.tld_trie = backup_tld_trie;
158 }
159 }
160 }
161}
162
163impl TLDExtract {
172 #[inline]
174 pub fn extract(&mut self, target: &str) -> Result<ExtractResult> {
175 let target = match idna::domain_to_ascii(target) {
177 Ok(target) => target,
178 Err(err) => {
179 return Err(TLDExtractError::DomainError(err.to_string()));
180 }
181 };
182 let target = target
183 .trim_matches(|ch: char| ch.is_whitespace() || ch <= ' ' || ch.is_control())
184 .to_string();
185 for (index, ch) in target.chars().enumerate() {
186 if !ch.is_ascii_alphanumeric() && ch != '.' && ch != '-'
187 || ((index == 0 || index == target.len() - 1) && ch == '-')
188 {
189 return Err(TLDExtractError::DomainError(format!("char:{ch}")));
190 }
191 }
192 let keys: Vec<String> = target.rsplit('.').map(|s| s.to_string()).collect();
194 let mut extract_result = ExtractResult::default();
195 if self.suffix_list.is_expired() {
196 self.update(None);
197 }
198 let mut suffix_list = self.tld_trie.search(&keys);
199 let rev_key: Vec<String> = keys.clone().into_iter().rev().collect();
200 let rev_key = rev_key.as_slice();
201 let mut sl = Vec::new();
202 while let Some(s) = suffix_list.pop() {
203 if s.end {
204 sl.push(s.suffix);
205 while let Some(s) = suffix_list.pop() {
206 sl.push(s.suffix);
207 }
208 }
209 }
210 if !sl.is_empty() {
211 let suffix = self.domain_to_unicode(sl.join("."));
212 extract_result.suffix = Some(suffix);
213 }
214 if keys.len() == sl.len() {
216 return Ok(extract_result);
217 }
218 let index = rev_key.len() - sl.len() - 1;
220 let domain = self.domain_to_unicode(rev_key.index(index).to_string());
221 if !domain.is_empty() {
222 extract_result.domain = Some(domain);
223 }
224 let subdomain = self.domain_to_unicode(rev_key[..index].join("."));
225 let registered_domain = self.domain_to_unicode(rev_key[index..].join("."));
226 if !subdomain.is_empty() {
227 extract_result.subdomain = Some(subdomain);
228 }
229 if !sl.is_empty() {
230 extract_result.registered_domain = Some(registered_domain);
231 }
232 Ok(extract_result)
233 }
234 fn domain_to_unicode(&self, mut domain: String) -> String {
236 if self.domain_to_unicode {
237 let (unicode, err) = idna::domain_to_unicode(&domain);
238 if err.is_ok() && !unicode.is_empty() {
239 domain = unicode;
240 }
241 }
242 domain
243 }
244}