Skip to main content

tirith_core/
data.rs

1//! Embedded lookup tables (known domains, popular repos, OCR confusions,
2//! public suffix list) compiled by `build.rs`.
3
4include!(concat!(env!("OUT_DIR"), "/known_domains_gen.rs"));
5include!(concat!(env!("OUT_DIR"), "/popular_repos_gen.rs"));
6include!(concat!(env!("OUT_DIR"), "/psl_gen.rs"));
7include!(concat!(env!("OUT_DIR"), "/ocr_confusions_gen.rs"));
8
9/// Check if a domain is in the known high-value targets list.
10pub fn is_known_domain(domain: &str) -> bool {
11    let lower = domain.to_lowercase();
12    KNOWN_DOMAINS.iter().any(|d| *d == lower)
13}
14
15/// Check if a repo (owner/name) is in the popular repos list.
16pub fn is_popular_repo(owner: &str, name: &str) -> bool {
17    let owner_lower = owner.to_lowercase();
18    let name_lower = name.to_lowercase();
19    POPULAR_REPOS
20        .iter()
21        .any(|(o, n)| o.to_lowercase() == owner_lower && n.to_lowercase() == name_lower)
22}
23
24/// Get all known domains for confusable checking.
25pub fn known_domains() -> &'static [&'static str] {
26    KNOWN_DOMAINS
27}
28
29/// Get the OCR confusion table for confusable domain normalization.
30pub fn ocr_confusions() -> &'static [(&'static str, &'static str)] {
31    OCR_CONFUSIONS
32}
33
34/// Check if a suffix is in the public suffix list.
35pub fn is_public_suffix(suffix: &str) -> bool {
36    let lower = suffix.to_lowercase();
37    PUBLIC_SUFFIXES.iter().any(|s| *s == lower)
38}
39
40/// Extract the registrable domain (eTLD+1) from a hostname.
41/// Returns None if the entire hostname is a public suffix or has no suffix match.
42pub fn registrable_domain(host: &str) -> Option<String> {
43    let lower = host.to_lowercase().trim_end_matches('.').to_string();
44    let labels: Vec<&str> = lower.split('.').collect();
45    if labels.len() < 2 {
46        return None;
47    }
48    // Longest-suffix match: try the fullest tail first so multi-label
49    // suffixes (e.g. `co.uk`) win over single-label ones.
50    for i in 0..labels.len() {
51        let suffix = labels[i..].join(".");
52        if is_public_suffix(&suffix) {
53            if i == 0 {
54                return None;
55            }
56            return Some(labels[i - 1..].join("."));
57        }
58    }
59    // No PSL match: treat the final label as the TLD.
60    if labels.len() >= 2 {
61        Some(labels[labels.len() - 2..].join("."))
62    } else {
63        None
64    }
65}
66
67#[cfg(test)]
68mod tests {
69    use super::*;
70
71    #[test]
72    fn test_known_domain() {
73        assert!(is_known_domain("github.com"));
74        assert!(is_known_domain("GitHub.com"));
75        assert!(!is_known_domain("notaknowndomain.com"));
76    }
77
78    #[test]
79    fn test_popular_repo() {
80        assert!(is_popular_repo("torvalds", "linux"));
81        assert!(!is_popular_repo("nobody", "nothing"));
82    }
83}