Skip to main content

tirith_core/
data.rs

1// Embedded data from build.rs: known domains, popular repos, OCR confusions.
2
3// Include generated data
4include!(concat!(env!("OUT_DIR"), "/known_domains_gen.rs"));
5include!(concat!(env!("OUT_DIR"), "/popular_repos_gen.rs"));
6include!(concat!(env!("OUT_DIR"), "/psl_gen.rs"));
7include!(concat!(env!("OUT_DIR"), "/ocr_confusions_gen.rs"));
8
9/// Check if a domain is in the known high-value targets list.
10pub fn is_known_domain(domain: &str) -> bool {
11    let lower = domain.to_lowercase();
12    KNOWN_DOMAINS.iter().any(|d| *d == lower)
13}
14
15/// Check if a repo (owner/name) is in the popular repos list.
16pub fn is_popular_repo(owner: &str, name: &str) -> bool {
17    let owner_lower = owner.to_lowercase();
18    let name_lower = name.to_lowercase();
19    POPULAR_REPOS
20        .iter()
21        .any(|(o, n)| o.to_lowercase() == owner_lower && n.to_lowercase() == name_lower)
22}
23
24/// Get all known domains for confusable checking.
25pub fn known_domains() -> &'static [&'static str] {
26    KNOWN_DOMAINS
27}
28
29/// Get the OCR confusion table for confusable domain normalization.
30pub fn ocr_confusions() -> &'static [(&'static str, &'static str)] {
31    OCR_CONFUSIONS
32}
33
34/// Check if a suffix is in the public suffix list.
35pub fn is_public_suffix(suffix: &str) -> bool {
36    let lower = suffix.to_lowercase();
37    PUBLIC_SUFFIXES.iter().any(|s| *s == lower)
38}
39
40/// Extract the registrable domain (eTLD+1) from a hostname.
41/// Returns None if the entire hostname is a public suffix or has no suffix match.
42pub fn registrable_domain(host: &str) -> Option<String> {
43    let lower = host.to_lowercase().trim_end_matches('.').to_string();
44    let labels: Vec<&str> = lower.split('.').collect();
45    if labels.len() < 2 {
46        return None;
47    }
48    // Try multi-part suffixes first (longest match)
49    for i in 0..labels.len() {
50        let suffix = labels[i..].join(".");
51        if is_public_suffix(&suffix) {
52            if i == 0 {
53                // Entire hostname is a public suffix
54                return None;
55            }
56            // eTLD+1 = one label before the suffix + suffix
57            return Some(labels[i - 1..].join("."));
58        }
59    }
60    // Fallback: treat last label as TLD
61    if labels.len() >= 2 {
62        Some(labels[labels.len() - 2..].join("."))
63    } else {
64        None
65    }
66}
67
68#[cfg(test)]
69mod tests {
70    use super::*;
71
72    #[test]
73    fn test_known_domain() {
74        assert!(is_known_domain("github.com"));
75        assert!(is_known_domain("GitHub.com"));
76        assert!(!is_known_domain("notaknowndomain.com"));
77    }
78
79    #[test]
80    fn test_popular_repo() {
81        assert!(is_popular_repo("torvalds", "linux"));
82        assert!(!is_popular_repo("nobody", "nothing"));
83    }
84}