use crate::error::DomainCheckError;
use std::collections::{HashMap, HashSet};
use std::sync::{Mutex, OnceLock};
use std::time::{Duration, Instant};
struct BootstrapCache {
rdap_endpoints: HashMap<String, String>,
whois_servers: HashMap<String, String>,
no_rdap: HashSet<String>,
rdap_loaded: bool,
last_fetch: Option<Instant>,
}
const BOOTSTRAP_TTL: Duration = Duration::from_secs(24 * 3600);
impl BootstrapCache {
fn new() -> Self {
Self {
rdap_endpoints: HashMap::new(),
whois_servers: HashMap::new(),
no_rdap: HashSet::new(),
rdap_loaded: false,
last_fetch: None,
}
}
fn is_stale(&self) -> bool {
match self.last_fetch {
Some(t) => t.elapsed() > BOOTSTRAP_TTL,
None => true,
}
}
}
fn bootstrap_cache() -> &'static Mutex<BootstrapCache> {
static CACHE: OnceLock<Mutex<BootstrapCache>> = OnceLock::new();
CACHE.get_or_init(|| Mutex::new(BootstrapCache::new()))
}
pub fn get_rdap_registry_map() -> HashMap<&'static str, &'static str> {
HashMap::from([
("com", "https://rdap.verisign.com/com/v1/domain/"),
("net", "https://rdap.verisign.com/net/v1/domain/"),
(
"org",
"https://rdap.publicinterestregistry.org/rdap/domain/",
),
("info", "https://rdap.identitydigital.services/rdap/domain/"),
("biz", "https://rdap.nic.biz/domain/"),
("app", "https://pubapi.registry.google/rdap/domain/"),
("dev", "https://pubapi.registry.google/rdap/domain/"),
("page", "https://pubapi.registry.google/rdap/domain/"),
("xyz", "https://rdap.centralnic.com/xyz/domain/"),
("tech", "https://rdap.centralnic.com/tech/domain/"),
("online", "https://rdap.centralnic.com/online/domain/"),
("site", "https://rdap.centralnic.com/site/domain/"),
("website", "https://rdap.centralnic.com/website/domain/"),
("blog", "https://rdap.blog.fury.ca/rdap/domain/"),
("shop", "https://rdap.gmoregistry.net/rdap/domain/"),
("ai", "https://rdap.identitydigital.services/rdap/domain/"), ("io", "https://rdap.identitydigital.services/rdap/domain/"), ("me", "https://rdap.identitydigital.services/rdap/domain/"), ("zone", "https://rdap.identitydigital.services/rdap/domain/"),
(
"digital",
"https://rdap.identitydigital.services/rdap/domain/",
),
("us", "https://rdap.nic.us/domain/"), ("uk", "https://rdap.nominet.uk/domain/"), ("de", "https://rdap.denic.de/domain/"), ("ca", "https://rdap.ca.fury.ca/rdap/domain/"), ("au", "https://rdap.cctld.au/rdap/domain/"), ("fr", "https://rdap.nic.fr/domain/"), ("nl", "https://rdap.sidn.nl/domain/"), ("br", "https://rdap.registro.br/domain/"), ("in", "https://rdap.nixiregistry.in/rdap/domain/"), ("tv", "https://rdap.nic.tv/domain/"), ("cc", "https://tld-rdap.verisign.com/cc/v1/domain/"), ("cloud", "https://rdap.registry.cloud/rdap/domain/"),
])
}
pub fn get_all_known_tlds() -> Vec<String> {
let registry = get_rdap_registry_map();
let mut tld_set: HashSet<String> = registry.keys().map(|k| k.to_string()).collect();
if let Ok(cache) = bootstrap_cache().lock() {
for tld in cache.rdap_endpoints.keys() {
tld_set.insert(tld.clone());
}
}
let mut tlds: Vec<String> = tld_set.into_iter().collect();
tlds.sort(); tlds
}
pub fn get_preset_tlds(preset: &str) -> Option<Vec<String>> {
let tlds: Option<Vec<&str>> = match preset.to_lowercase().as_str() {
"startup" => Some(vec!["com", "org", "io", "ai", "tech", "app", "dev", "xyz"]),
"enterprise" => Some(vec!["com", "org", "net", "info", "biz", "us"]),
"country" => Some(vec!["us", "uk", "de", "fr", "ca", "au", "br", "in", "nl"]),
"popular" => Some(vec![
"com", "net", "org", "io", "ai", "app", "dev", "tech", "me", "co", "xyz",
]),
"classic" => Some(vec!["com", "net", "org", "info", "biz"]),
"tech" => Some(vec![
"io",
"ai",
"app",
"dev",
"tech",
"cloud",
"software",
"digital",
"codes",
"systems",
"network",
"solutions",
]),
"creative" => Some(vec![
"design",
"art",
"studio",
"media",
"photography",
"film",
"music",
"gallery",
"graphics",
"ink",
]),
"ecommerce" | "shopping" => Some(vec![
"shop", "store", "market", "sale", "deals", "shopping", "buy", "bargains",
]),
"finance" => Some(vec![
"finance",
"capital",
"fund",
"money",
"investments",
"insurance",
"tax",
"exchange",
"trading",
]),
"web" => Some(vec![
"web", "site", "website", "online", "blog", "page", "wiki", "host", "email",
]),
"trendy" => Some(vec![
"xyz", "online", "site", "top", "icu", "fun", "space", "click", "website", "life",
"world", "live", "today",
]),
_ => None,
};
tlds.map(|v| v.into_iter().map(|s| s.to_string()).collect())
}
pub fn get_preset_tlds_with_custom(
preset: &str,
custom_presets: Option<&std::collections::HashMap<String, Vec<String>>>,
) -> Option<Vec<String>> {
let preset_lower = preset.to_lowercase();
if let Some(custom_map) = custom_presets {
if let Some(custom_tlds) = custom_map
.get(preset)
.or_else(|| custom_map.get(&preset_lower))
{
return Some(custom_tlds.clone());
}
}
get_preset_tlds(&preset_lower)
}
pub fn get_available_presets() -> Vec<&'static str> {
vec![
"classic",
"country",
"creative",
"ecommerce",
"enterprise",
"finance",
"popular",
"startup",
"tech",
"trendy",
"web",
]
}
#[allow(dead_code)]
pub fn validate_preset_tlds(preset_tlds: &[String]) -> bool {
let registry = get_rdap_registry_map();
preset_tlds
.iter()
.all(|tld| registry.contains_key(tld.as_str()))
}
pub async fn get_rdap_endpoint(tld: &str, use_bootstrap: bool) -> Result<String, DomainCheckError> {
let tld_lower = tld.to_lowercase();
let registry = get_rdap_registry_map();
if let Some(endpoint) = registry.get(tld_lower.as_str()) {
return Ok(endpoint.to_string());
}
{
let cache = bootstrap_cache()
.lock()
.map_err(|_| DomainCheckError::internal("Failed to acquire bootstrap cache lock"))?;
if !cache.is_stale() {
if let Some(endpoint) = cache.rdap_endpoints.get(&tld_lower) {
return Ok(endpoint.clone());
}
}
if cache.no_rdap.contains(&tld_lower) && !cache.is_stale() {
return Err(DomainCheckError::bootstrap(
&tld_lower,
"TLD has no known RDAP endpoint",
));
}
}
if use_bootstrap {
let needs_fetch = {
let cache = bootstrap_cache().lock().map_err(|_| {
DomainCheckError::internal("Failed to acquire bootstrap cache lock")
})?;
!cache.rdap_loaded || cache.is_stale()
};
if needs_fetch {
fetch_full_bootstrap().await?;
}
let cache = bootstrap_cache()
.lock()
.map_err(|_| DomainCheckError::internal("Failed to acquire bootstrap cache lock"))?;
if let Some(endpoint) = cache.rdap_endpoints.get(&tld_lower) {
return Ok(endpoint.clone());
}
drop(cache);
{
let mut cache = bootstrap_cache().lock().map_err(|_| {
DomainCheckError::internal("Failed to acquire bootstrap cache lock")
})?;
cache.no_rdap.insert(tld_lower.clone());
}
Err(DomainCheckError::bootstrap(
&tld_lower,
"TLD not found in IANA bootstrap registry",
))
} else {
Err(DomainCheckError::bootstrap(
&tld_lower,
"No known RDAP endpoint and bootstrap disabled",
))
}
}
async fn fetch_full_bootstrap() -> Result<(), DomainCheckError> {
const BOOTSTRAP_URL: &str = "https://data.iana.org/rdap/dns.json";
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.build()
.map_err(|e| {
DomainCheckError::network_with_source("Failed to create HTTP client", e.to_string())
})?;
let response = client.get(BOOTSTRAP_URL).send().await.map_err(|e| {
DomainCheckError::bootstrap("*", format!("Failed to fetch bootstrap registry: {}", e))
})?;
if !response.status().is_success() {
return Err(DomainCheckError::bootstrap(
"*",
format!("Bootstrap registry returned HTTP {}", response.status()),
));
}
let json: serde_json::Value = response.json().await.map_err(|e| {
DomainCheckError::bootstrap("*", format!("Failed to parse bootstrap JSON: {}", e))
})?;
let services = json
.get("services")
.and_then(|s| s.as_array())
.ok_or_else(|| {
DomainCheckError::bootstrap(
"*",
"Invalid bootstrap JSON: missing or invalid 'services' array",
)
})?;
let mut endpoints: HashMap<String, String> = HashMap::new();
for service in services {
if let Some(service_array) = service.as_array() {
if service_array.len() >= 2 {
let url = service_array[1]
.as_array()
.and_then(|urls| urls.first())
.and_then(|u| u.as_str());
if let Some(url) = url {
let endpoint = format!("{}/domain/", url.trim_end_matches('/'));
if let Some(tlds) = service_array[0].as_array() {
for t in tlds {
if let Some(tld_str) = t.as_str() {
endpoints.insert(tld_str.to_lowercase(), endpoint.clone());
}
}
}
}
}
}
}
let mut cache = bootstrap_cache()
.lock()
.map_err(|_| DomainCheckError::internal("Failed to acquire bootstrap cache lock"))?;
cache.rdap_endpoints = endpoints;
cache.rdap_loaded = true;
cache.last_fetch = Some(Instant::now());
cache.no_rdap.clear();
Ok(())
}
pub async fn initialize_bootstrap() -> Result<(), DomainCheckError> {
let needs_fetch = {
let cache = bootstrap_cache()
.lock()
.map_err(|_| DomainCheckError::internal("Failed to acquire bootstrap cache lock"))?;
!cache.rdap_loaded || cache.is_stale()
};
if needs_fetch {
fetch_full_bootstrap().await?;
}
Ok(())
}
pub fn cache_whois_server(tld: &str, server: &str) -> Result<(), DomainCheckError> {
let mut cache = bootstrap_cache().lock().map_err(|_| {
DomainCheckError::internal("Failed to acquire bootstrap cache lock for writing")
})?;
cache
.whois_servers
.insert(tld.to_lowercase(), server.to_string());
Ok(())
}
pub fn get_cached_whois_server(tld: &str) -> Option<String> {
let cache = bootstrap_cache().lock().ok()?;
let server = cache.whois_servers.get(&tld.to_lowercase())?;
if server.is_empty() {
None } else {
Some(server.clone())
}
}
pub fn is_whois_negatively_cached(tld: &str) -> bool {
if let Ok(cache) = bootstrap_cache().lock() {
matches!(cache.whois_servers.get(&tld.to_lowercase()), Some(s) if s.is_empty())
} else {
false
}
}
pub async fn get_whois_server(tld: &str) -> Option<String> {
let tld_lower = tld.to_lowercase();
if let Some(server) = get_cached_whois_server(&tld_lower) {
return Some(server);
}
if is_whois_negatively_cached(&tld_lower) {
return None;
}
match crate::protocols::whois::discover_whois_server(&tld_lower).await {
Some(server) => {
let _ = cache_whois_server(&tld_lower, &server);
Some(server)
}
None => {
let _ = cache_whois_server(&tld_lower, "");
None
}
}
}
pub fn extract_tld(domain: &str) -> Result<String, DomainCheckError> {
let parts: Vec<&str> = domain.split('.').collect();
if parts.len() < 2 {
return Err(DomainCheckError::invalid_domain(
domain,
"Domain must contain at least one dot",
));
}
Ok(parts.last().unwrap().to_lowercase())
}
#[allow(dead_code)]
pub fn clear_bootstrap_cache() -> Result<(), DomainCheckError> {
let mut cache = bootstrap_cache().lock().map_err(|_| {
DomainCheckError::internal("Failed to acquire bootstrap cache lock for clearing")
})?;
cache.rdap_endpoints.clear();
cache.whois_servers.clear();
cache.no_rdap.clear();
cache.rdap_loaded = false;
cache.last_fetch = None;
Ok(())
}
#[allow(dead_code)]
pub fn get_bootstrap_cache_stats() -> Result<(usize, bool), DomainCheckError> {
let cache = bootstrap_cache().lock().map_err(|_| {
DomainCheckError::internal("Failed to acquire bootstrap cache lock for stats")
})?;
Ok((cache.rdap_endpoints.len(), cache.is_stale()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_tld_basic() {
assert_eq!(extract_tld("example.com").unwrap(), "com");
assert_eq!(extract_tld("test.org").unwrap(), "org");
assert_eq!(extract_tld("sub.example.com").unwrap(), "com");
}
#[test]
fn test_extract_tld_case_insensitive() {
assert_eq!(extract_tld("EXAMPLE.COM").unwrap(), "com");
assert_eq!(extract_tld("Test.ORG").unwrap(), "org");
}
#[test]
fn test_extract_tld_no_dot() {
assert!(extract_tld("invalid").is_err());
let err = extract_tld("invalid").unwrap_err();
assert!(err.to_string().contains("at least one dot"));
}
#[test]
fn test_extract_tld_empty() {
assert!(extract_tld("").is_err());
}
#[test]
fn test_extract_tld_multi_level() {
assert_eq!(extract_tld("example.co.uk").unwrap(), "uk");
}
#[test]
fn test_registry_map_contains_common_tlds() {
let registry = get_rdap_registry_map();
assert!(registry.contains_key("com"));
assert!(registry.contains_key("org"));
assert!(registry.contains_key("net"));
assert!(registry.contains_key("io"));
assert!(registry.contains_key("ai"));
assert!(registry.contains_key("dev"));
assert!(registry.contains_key("app"));
}
#[test]
fn test_registry_map_size() {
let registry = get_rdap_registry_map();
assert!(
registry.len() >= 30,
"Expected at least 30 entries, got {}",
registry.len()
);
}
#[test]
fn test_all_endpoints_are_valid_https_urls() {
let registry = get_rdap_registry_map();
for (tld, endpoint) in ®istry {
assert!(
endpoint.starts_with("https://"),
"Endpoint for '{}' must use HTTPS: {}",
tld,
endpoint
);
assert!(
endpoint.ends_with("/domain/"),
"Endpoint for '{}' must end with /domain/: {}",
tld,
endpoint
);
}
}
#[test]
fn test_registry_does_not_contain_dead_cctlds() {
let registry = get_rdap_registry_map();
assert!(!registry.contains_key("co"));
assert!(!registry.contains_key("eu"));
assert!(!registry.contains_key("it"));
assert!(!registry.contains_key("jp"));
assert!(!registry.contains_key("es"));
assert!(!registry.contains_key("cn"));
}
#[tokio::test]
async fn test_get_rdap_endpoint_builtin() {
let endpoint = get_rdap_endpoint("com", false).await.unwrap();
assert!(endpoint.contains("verisign.com"));
}
#[tokio::test]
async fn test_get_rdap_endpoint_case_insensitive() {
let endpoint = get_rdap_endpoint("COM", false).await.unwrap();
assert!(endpoint.contains("verisign.com"));
}
#[tokio::test]
async fn test_get_rdap_endpoint_unknown_no_bootstrap() {
let result = get_rdap_endpoint("unknowntld123", false).await;
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
matches!(err, DomainCheckError::BootstrapError { .. }),
"Expected BootstrapError, got: {:?}",
err
);
}
#[test]
fn test_bootstrap_cache_new() {
let cache = BootstrapCache::new();
assert!(!cache.rdap_loaded);
assert!(cache.last_fetch.is_none());
assert!(cache.rdap_endpoints.is_empty());
assert!(cache.whois_servers.is_empty());
assert!(cache.no_rdap.is_empty());
assert!(cache.is_stale());
}
#[test]
fn test_bootstrap_cache_is_stale_no_fetch() {
let cache = BootstrapCache::new();
assert!(cache.is_stale()); }
#[test]
fn test_bootstrap_cache_is_stale_fresh() {
let mut cache = BootstrapCache::new();
cache.last_fetch = Some(Instant::now());
assert!(!cache.is_stale()); }
#[test]
fn test_whois_server_caching() {
clear_bootstrap_cache().unwrap();
cache_whois_server("com", "whois.verisign-grs.com").unwrap();
assert_eq!(
get_cached_whois_server("com"),
Some("whois.verisign-grs.com".to_string())
);
clear_bootstrap_cache().unwrap();
}
#[test]
fn test_whois_negative_caching() {
clear_bootstrap_cache().unwrap();
cache_whois_server("fake", "").unwrap();
assert_eq!(get_cached_whois_server("fake"), None);
assert!(is_whois_negatively_cached("fake"));
clear_bootstrap_cache().unwrap();
}
#[test]
fn test_whois_cache_case_insensitive() {
clear_bootstrap_cache().unwrap();
cache_whois_server("COM", "whois.verisign-grs.com").unwrap();
assert_eq!(
get_cached_whois_server("com"),
Some("whois.verisign-grs.com".to_string())
);
clear_bootstrap_cache().unwrap();
}
#[test]
fn test_whois_not_negatively_cached_when_absent() {
clear_bootstrap_cache().unwrap();
assert!(!is_whois_negatively_cached("neverqueried"));
clear_bootstrap_cache().unwrap();
}
#[test]
fn test_clear_bootstrap_cache() {
cache_whois_server("test", "whois.test.com").unwrap();
clear_bootstrap_cache().unwrap();
assert_eq!(get_cached_whois_server("test"), None);
assert!(!is_whois_negatively_cached("test"));
}
#[test]
fn test_get_bootstrap_cache_stats() {
clear_bootstrap_cache().unwrap();
let (count, stale) = get_bootstrap_cache_stats().unwrap();
assert_eq!(count, 0);
assert!(stale); clear_bootstrap_cache().unwrap();
}
#[test]
fn test_validate_preset_tlds_all_hardcoded() {
let tlds = vec!["com".to_string(), "org".to_string(), "net".to_string()];
assert!(validate_preset_tlds(&tlds));
}
#[test]
fn test_validate_preset_tlds_with_unknown() {
let tlds = vec!["com".to_string(), "unknowntld999".to_string()];
assert!(!validate_preset_tlds(&tlds));
}
#[test]
fn test_validate_preset_tlds_empty() {
assert!(validate_preset_tlds(&[]));
}
#[test]
fn test_custom_preset_takes_precedence() {
let mut custom = HashMap::new();
custom.insert(
"startup".to_string(),
vec!["custom1".to_string(), "custom2".to_string()],
);
let result = get_preset_tlds_with_custom("startup", Some(&custom)).unwrap();
assert_eq!(result, vec!["custom1", "custom2"]);
}
#[test]
fn test_custom_preset_fallback_to_builtin() {
let custom: HashMap<String, Vec<String>> = HashMap::new();
let result = get_preset_tlds_with_custom("startup", Some(&custom)).unwrap();
assert!(result.contains(&"com".to_string()));
}
#[test]
fn test_custom_preset_exact_case_match() {
let mut custom = HashMap::new();
custom.insert("MyPreset".to_string(), vec!["com".to_string()]);
let result = get_preset_tlds_with_custom("MyPreset", Some(&custom)).unwrap();
assert_eq!(result, vec!["com"]);
}
#[test]
fn test_custom_preset_lowercase_key_matches_lowercase_query() {
let mut custom = HashMap::new();
custom.insert("mypreset".to_string(), vec!["org".to_string()]);
let result = get_preset_tlds_with_custom("MYPRESET", Some(&custom)).unwrap();
assert_eq!(result, vec!["org"]);
}
#[test]
fn test_custom_preset_none_map() {
let result = get_preset_tlds_with_custom("startup", None).unwrap();
assert!(result.contains(&"com".to_string()));
}
#[test]
fn test_custom_preset_unknown_returns_none() {
let result = get_preset_tlds_with_custom("nonexistent", None);
assert!(result.is_none());
}
}
#[cfg(test)]
mod preset_tests {
use super::*;
#[test]
fn test_get_all_known_tlds() {
let tlds = get_all_known_tlds();
assert!(tlds.len() >= 30);
assert!(tlds.contains(&"com".to_string()));
assert!(tlds.contains(&"org".to_string()));
assert!(tlds.contains(&"io".to_string()));
assert!(tlds.contains(&"ai".to_string()));
let mut sorted_tlds = tlds.clone();
sorted_tlds.sort();
assert_eq!(tlds, sorted_tlds);
}
#[test]
fn test_startup_preset() {
let tlds = get_preset_tlds("startup").unwrap();
assert_eq!(tlds.len(), 8);
assert!(tlds.contains(&"com".to_string()));
assert!(tlds.contains(&"io".to_string()));
assert!(tlds.contains(&"ai".to_string()));
assert!(tlds.contains(&"tech".to_string()));
assert_eq!(get_preset_tlds("STARTUP"), get_preset_tlds("startup"));
}
#[test]
fn test_enterprise_preset() {
let tlds = get_preset_tlds("enterprise").unwrap();
assert_eq!(tlds.len(), 6);
assert!(tlds.contains(&"com".to_string()));
assert!(tlds.contains(&"org".to_string()));
assert!(tlds.contains(&"biz".to_string()));
}
#[test]
fn test_country_preset() {
let tlds = get_preset_tlds("country").unwrap();
assert_eq!(tlds.len(), 9);
assert!(tlds.contains(&"us".to_string()));
assert!(tlds.contains(&"uk".to_string()));
assert!(tlds.contains(&"de".to_string()));
assert!(tlds.contains(&"nl".to_string()));
}
#[test]
fn test_invalid_preset() {
assert!(get_preset_tlds("invalid").is_none());
assert!(get_preset_tlds("").is_none());
}
#[test]
fn test_available_presets() {
let presets = get_available_presets();
assert_eq!(presets.len(), 11);
assert!(presets.contains(&"startup"));
assert!(presets.contains(&"enterprise"));
assert!(presets.contains(&"country"));
assert!(presets.contains(&"popular"));
assert!(presets.contains(&"classic"));
assert!(presets.contains(&"tech"));
assert!(presets.contains(&"creative"));
assert!(presets.contains(&"ecommerce"));
assert!(presets.contains(&"finance"));
assert!(presets.contains(&"web"));
assert!(presets.contains(&"trendy"));
}
#[test]
fn test_validate_preset_tlds() {
let core_presets = ["startup", "enterprise", "country", "classic"];
for preset_name in &core_presets {
let tlds = get_preset_tlds(preset_name).unwrap();
assert!(
validate_preset_tlds(&tlds),
"Core preset '{}' contains TLDs without hardcoded RDAP endpoints",
preset_name
);
}
}
#[test]
fn test_all_presets_non_empty() {
for preset_name in get_available_presets() {
let tlds = get_preset_tlds(preset_name).unwrap();
assert!(
!tlds.is_empty(),
"Preset '{}' should not be empty",
preset_name
);
}
}
#[test]
fn test_ecommerce_alias() {
assert_eq!(get_preset_tlds("ecommerce"), get_preset_tlds("shopping"));
}
#[test]
fn test_preset_tlds_subset_of_known() {
let core_presets = ["startup", "enterprise", "country", "classic"];
let all_tlds = get_all_known_tlds();
for preset_name in &core_presets {
let preset_tlds = get_preset_tlds(preset_name).unwrap();
for tld in preset_tlds {
assert!(
all_tlds.contains(&tld),
"Preset '{}' contains unknown TLD: {}",
preset_name,
tld
);
}
}
}
}