Skip to main content

domain_check_lib/protocols/
registry.rs

1//! Domain registry mappings and IANA bootstrap functionality.
2//!
3//! This module provides mappings from TLDs to their corresponding RDAP endpoints,
4//! as well as dynamic discovery through the IANA bootstrap registry.
5
6use crate::error::DomainCheckError;
7use std::collections::{HashMap, HashSet};
8use std::sync::{Mutex, OnceLock};
9use std::time::{Duration, Instant};
10
11/// Bootstrap registry cache for discovered RDAP endpoints and WHOIS servers.
12///
13/// This cache stores RDAP endpoints from the IANA bootstrap registry and
14/// WHOIS server mappings discovered via IANA referral queries.
15struct BootstrapCache {
16    /// TLD -> RDAP endpoint URL (from IANA bootstrap)
17    rdap_endpoints: HashMap<String, String>,
18    /// TLD -> WHOIS server hostname (from IANA referral)
19    whois_servers: HashMap<String, String>,
20    /// TLDs known to have no RDAP endpoint (negative cache)
21    no_rdap: HashSet<String>,
22    /// Whether the full IANA bootstrap has been fetched
23    rdap_loaded: bool,
24    /// When the full bootstrap was last fetched
25    last_fetch: Option<Instant>,
26}
27
28/// Bootstrap cache TTL: 24 hours (RDAP endpoints rarely change)
29const BOOTSTRAP_TTL: Duration = Duration::from_secs(24 * 3600);
30
31impl BootstrapCache {
32    fn new() -> Self {
33        Self {
34            rdap_endpoints: HashMap::new(),
35            whois_servers: HashMap::new(),
36            no_rdap: HashSet::new(),
37            rdap_loaded: false,
38            last_fetch: None,
39        }
40    }
41
42    fn is_stale(&self) -> bool {
43        match self.last_fetch {
44            Some(t) => t.elapsed() > BOOTSTRAP_TTL,
45            None => true,
46        }
47    }
48}
49
50/// Global bootstrap cache accessor.
51fn bootstrap_cache() -> &'static Mutex<BootstrapCache> {
52    static CACHE: OnceLock<Mutex<BootstrapCache>> = OnceLock::new();
53    CACHE.get_or_init(|| Mutex::new(BootstrapCache::new()))
54}
55
56/// Get the built-in RDAP registry mappings.
57///
58/// This function returns a map of TLD strings to their corresponding RDAP endpoint URLs.
59/// These mappings are based on known registry endpoints and are updated periodically.
60///
61/// # Returns
62///
63/// A HashMap mapping TLD strings (like "com", "org") to RDAP endpoint base URLs.
64pub fn get_rdap_registry_map() -> HashMap<&'static str, &'static str> {
65    HashMap::from([
66        // Popular gTLDs (Generic Top-Level Domains)
67        ("com", "https://rdap.verisign.com/com/v1/domain/"),
68        ("net", "https://rdap.verisign.com/net/v1/domain/"),
69        (
70            "org",
71            "https://rdap.publicinterestregistry.org/rdap/domain/",
72        ),
73        ("info", "https://rdap.identitydigital.services/rdap/domain/"),
74        ("biz", "https://rdap.nic.biz/domain/"),
75        // Google TLDs (updated: rdap.nic.google no longer exists)
76        ("app", "https://pubapi.registry.google/rdap/domain/"),
77        ("dev", "https://pubapi.registry.google/rdap/domain/"),
78        ("page", "https://pubapi.registry.google/rdap/domain/"),
79        // CentralNic managed gTLDs
80        ("xyz", "https://rdap.centralnic.com/xyz/domain/"),
81        ("tech", "https://rdap.centralnic.com/tech/domain/"),
82        ("online", "https://rdap.centralnic.com/online/domain/"),
83        ("site", "https://rdap.centralnic.com/site/domain/"),
84        ("website", "https://rdap.centralnic.com/website/domain/"),
85        // Other popular gTLDs
86        ("blog", "https://rdap.blog.fury.ca/rdap/domain/"),
87        ("shop", "https://rdap.gmoregistry.net/rdap/domain/"),
88        // Identity Digital managed TLDs
89        ("ai", "https://rdap.identitydigital.services/rdap/domain/"), // Anguilla
90        ("io", "https://rdap.identitydigital.services/rdap/domain/"), // British Indian Ocean Territory
91        ("me", "https://rdap.identitydigital.services/rdap/domain/"), // Montenegro
92        ("zone", "https://rdap.identitydigital.services/rdap/domain/"),
93        (
94            "digital",
95            "https://rdap.identitydigital.services/rdap/domain/",
96        ),
97        // Country Code TLDs (ccTLDs) with working RDAP endpoints
98        ("us", "https://rdap.nic.us/domain/"), // United States
99        ("uk", "https://rdap.nominet.uk/domain/"), // United Kingdom
100        ("de", "https://rdap.denic.de/domain/"), // Germany
101        ("ca", "https://rdap.ca.fury.ca/rdap/domain/"), // Canada
102        ("au", "https://rdap.cctld.au/rdap/domain/"), // Australia
103        ("fr", "https://rdap.nic.fr/domain/"), // France
104        ("nl", "https://rdap.sidn.nl/domain/"), // Netherlands
105        ("br", "https://rdap.registro.br/domain/"), // Brazil
106        ("in", "https://rdap.nixiregistry.in/rdap/domain/"), // India
107        // Verisign managed ccTLDs
108        ("tv", "https://rdap.nic.tv/domain/"), // Tuvalu
109        ("cc", "https://tld-rdap.verisign.com/cc/v1/domain/"), // Cocos Islands
110        // Specialty TLDs
111        ("cloud", "https://rdap.registry.cloud/rdap/domain/"),
112        // NOTE: co, eu, it, jp, es, cn removed — their RDAP endpoints are
113        // defunct and no working alternatives found. These TLDs will fall
114        // through to WHOIS fallback, which handles them correctly.
115    ])
116}
117
118/// Get all TLDs that we have RDAP endpoints for.
119///
120/// Returns the union of hardcoded registry keys and bootstrap cache keys,
121/// deduplicated and sorted alphabetically.
122///
123/// # Returns
124///
125/// Vector of TLD strings (e.g., ["com", "org", "net", ...]) sorted alphabetically.
126pub fn get_all_known_tlds() -> Vec<String> {
127    let registry = get_rdap_registry_map();
128    let mut tld_set: HashSet<String> = registry.keys().map(|k| k.to_string()).collect();
129
130    // Include bootstrap cache entries
131    if let Ok(cache) = bootstrap_cache().lock() {
132        for tld in cache.rdap_endpoints.keys() {
133            tld_set.insert(tld.clone());
134        }
135    }
136
137    let mut tlds: Vec<String> = tld_set.into_iter().collect();
138    tlds.sort(); // Consistent ordering for user experience
139    tlds
140}
141
142/// Get predefined TLD presets for common use cases.
143///
144/// This function provides curated TLD lists for common scenarios.
145/// For custom preset support, use `get_preset_tlds_with_custom()`.
146///
147/// # Arguments
148///
149/// * `preset` - The preset name ("startup", "enterprise", "country")
150///
151/// # Returns
152///
153/// Optional vector of TLD strings, None if preset doesn't exist.
154///
155/// # Examples
156///
157/// ```rust
158/// use domain_check_lib::get_preset_tlds;
159///
160/// let startup_tlds = get_preset_tlds("startup").unwrap();
161/// assert!(startup_tlds.contains(&"io".to_string()));
162/// ```
163pub fn get_preset_tlds(preset: &str) -> Option<Vec<String>> {
164    let tlds: Option<Vec<&str>> = match preset.to_lowercase().as_str() {
165        "startup" => Some(vec!["com", "org", "io", "ai", "tech", "app", "dev", "xyz"]),
166        "enterprise" => Some(vec!["com", "org", "net", "info", "biz", "us"]),
167        "country" => Some(vec!["us", "uk", "de", "fr", "ca", "au", "br", "in", "nl"]),
168        "popular" => Some(vec![
169            "com", "net", "org", "io", "ai", "app", "dev", "tech", "me", "co", "xyz",
170        ]),
171        "classic" => Some(vec!["com", "net", "org", "info", "biz"]),
172        "tech" => Some(vec![
173            "io",
174            "ai",
175            "app",
176            "dev",
177            "tech",
178            "cloud",
179            "software",
180            "digital",
181            "codes",
182            "systems",
183            "network",
184            "solutions",
185        ]),
186        "creative" => Some(vec![
187            "design",
188            "art",
189            "studio",
190            "media",
191            "photography",
192            "film",
193            "music",
194            "gallery",
195            "graphics",
196            "ink",
197        ]),
198        "ecommerce" | "shopping" => Some(vec![
199            "shop", "store", "market", "sale", "deals", "shopping", "buy", "bargains",
200        ]),
201        "finance" => Some(vec![
202            "finance",
203            "capital",
204            "fund",
205            "money",
206            "investments",
207            "insurance",
208            "tax",
209            "exchange",
210            "trading",
211        ]),
212        "web" => Some(vec![
213            "web", "site", "website", "online", "blog", "page", "wiki", "host", "email",
214        ]),
215        "trendy" => Some(vec![
216            "xyz", "online", "site", "top", "icu", "fun", "space", "click", "website", "life",
217            "world", "live", "today",
218        ]),
219        _ => None,
220    };
221    tlds.map(|v| v.into_iter().map(|s| s.to_string()).collect())
222}
223
224/// Get predefined TLD presets with custom preset support.
225///
226/// This function checks custom presets first, then falls back to built-in presets.
227///
228/// # Arguments
229///
230/// * `preset` - The preset name to look up
231/// * `custom_presets` - Optional custom presets from config files
232///
233/// # Returns
234///
235/// Optional vector of TLD strings, None if preset doesn't exist.
236///
237/// # Examples
238///
239/// ```rust
240/// use std::collections::HashMap;
241/// use domain_check_lib::get_preset_tlds_with_custom;
242///
243/// let mut custom = HashMap::new();
244/// custom.insert("my_preset".to_string(), vec!["com".to_string(), "dev".to_string()]);
245///
246/// let tlds = get_preset_tlds_with_custom("my_preset", Some(&custom)).unwrap();
247/// assert_eq!(tlds, vec!["com", "dev"]);
248/// ```
249pub fn get_preset_tlds_with_custom(
250    preset: &str,
251    custom_presets: Option<&std::collections::HashMap<String, Vec<String>>>,
252) -> Option<Vec<String>> {
253    let preset_lower = preset.to_lowercase();
254
255    // 1. Check custom presets first (highest precedence)
256    if let Some(custom_map) = custom_presets {
257        // Check both original case and lowercase
258        if let Some(custom_tlds) = custom_map
259            .get(preset)
260            .or_else(|| custom_map.get(&preset_lower))
261        {
262            return Some(custom_tlds.clone());
263        }
264    }
265
266    // 2. Fall back to built-in presets
267    get_preset_tlds(&preset_lower)
268}
269
270/// Get available preset names.
271///
272/// Useful for CLI help text and validation.
273///
274/// # Returns
275///
276/// Vector of available preset names.
277pub fn get_available_presets() -> Vec<&'static str> {
278    vec![
279        "classic",
280        "country",
281        "creative",
282        "ecommerce",
283        "enterprise",
284        "finance",
285        "popular",
286        "startup",
287        "tech",
288        "trendy",
289        "web",
290    ]
291}
292
293/// Validate that all TLDs in a preset have hardcoded RDAP endpoints.
294///
295/// Returns true only if every TLD has a hardcoded RDAP endpoint in the
296/// built-in registry. TLDs covered by bootstrap or WHOIS fallback will
297/// return false here but still work at runtime.
298///
299/// # Arguments
300///
301/// * `preset_tlds` - TLD list to validate
302///
303/// # Returns
304///
305/// True if all TLDs have hardcoded RDAP endpoints, false otherwise.
306#[allow(dead_code)]
307pub fn validate_preset_tlds(preset_tlds: &[String]) -> bool {
308    let registry = get_rdap_registry_map();
309    preset_tlds
310        .iter()
311        .all(|tld| registry.contains_key(tld.as_str()))
312}
313
314/// Look up RDAP endpoint for a given TLD.
315///
316/// Lookup flow:
317/// 1. Check hardcoded registry (32 TLDs) — instant, offline fallback
318/// 2. Check bootstrap cache hit — O(1) HashMap lookup
319/// 3. Check negative cache (no_rdap set) — skip network if TLD known to lack RDAP
320/// 4. If cache empty or stale (24h): call fetch_full_bootstrap(), re-check
321/// 5. If still not found after full fetch: add TLD to no_rdap set, return error
322///
323/// # Arguments
324///
325/// * `tld` - The top-level domain to look up (e.g., "com", "org")
326/// * `use_bootstrap` - Whether to use IANA bootstrap for unknown TLDs
327///
328/// # Returns
329///
330/// The RDAP endpoint URL if found, or an error if not available.
331pub async fn get_rdap_endpoint(tld: &str, use_bootstrap: bool) -> Result<String, DomainCheckError> {
332    let tld_lower = tld.to_lowercase();
333
334    // 1. Check built-in registry (instant, offline)
335    let registry = get_rdap_registry_map();
336    if let Some(endpoint) = registry.get(tld_lower.as_str()) {
337        return Ok(endpoint.to_string());
338    }
339
340    // 2-3. Check bootstrap cache and negative cache
341    {
342        let cache = bootstrap_cache()
343            .lock()
344            .map_err(|_| DomainCheckError::internal("Failed to acquire bootstrap cache lock"))?;
345
346        // Check positive cache (not stale)
347        if !cache.is_stale() {
348            if let Some(endpoint) = cache.rdap_endpoints.get(&tld_lower) {
349                return Ok(endpoint.clone());
350            }
351        }
352
353        // Check negative cache (TLD known to have no RDAP)
354        if cache.no_rdap.contains(&tld_lower) && !cache.is_stale() {
355            return Err(DomainCheckError::bootstrap(
356                &tld_lower,
357                "TLD has no known RDAP endpoint",
358            ));
359        }
360    }
361
362    // 4. If bootstrap enabled, fetch full bootstrap and re-check
363    if use_bootstrap {
364        // Fetch if cache is empty or stale
365        let needs_fetch = {
366            let cache = bootstrap_cache().lock().map_err(|_| {
367                DomainCheckError::internal("Failed to acquire bootstrap cache lock")
368            })?;
369            !cache.rdap_loaded || cache.is_stale()
370        };
371
372        if needs_fetch {
373            fetch_full_bootstrap().await?;
374        }
375
376        // Re-check after fetch
377        let cache = bootstrap_cache()
378            .lock()
379            .map_err(|_| DomainCheckError::internal("Failed to acquire bootstrap cache lock"))?;
380
381        if let Some(endpoint) = cache.rdap_endpoints.get(&tld_lower) {
382            return Ok(endpoint.clone());
383        }
384
385        // 5. Still not found — add to negative cache and return error
386        drop(cache);
387        {
388            let mut cache = bootstrap_cache().lock().map_err(|_| {
389                DomainCheckError::internal("Failed to acquire bootstrap cache lock")
390            })?;
391            cache.no_rdap.insert(tld_lower.clone());
392        }
393
394        Err(DomainCheckError::bootstrap(
395            &tld_lower,
396            "TLD not found in IANA bootstrap registry",
397        ))
398    } else {
399        Err(DomainCheckError::bootstrap(
400            &tld_lower,
401            "No known RDAP endpoint and bootstrap disabled",
402        ))
403    }
404}
405
406/// Fetch the full IANA bootstrap registry and populate the cache.
407///
408/// Instead of fetching per-TLD, this downloads the complete IANA RDAP bootstrap
409/// JSON and parses all service entries at once. Much more efficient for bulk
410/// operations and provides coverage for ~1,180 TLDs.
411async fn fetch_full_bootstrap() -> Result<(), DomainCheckError> {
412    const BOOTSTRAP_URL: &str = "https://data.iana.org/rdap/dns.json";
413
414    let client = reqwest::Client::builder()
415        .timeout(Duration::from_secs(10))
416        .build()
417        .map_err(|e| {
418            DomainCheckError::network_with_source("Failed to create HTTP client", e.to_string())
419        })?;
420
421    let response = client.get(BOOTSTRAP_URL).send().await.map_err(|e| {
422        DomainCheckError::bootstrap("*", format!("Failed to fetch bootstrap registry: {}", e))
423    })?;
424
425    if !response.status().is_success() {
426        return Err(DomainCheckError::bootstrap(
427            "*",
428            format!("Bootstrap registry returned HTTP {}", response.status()),
429        ));
430    }
431
432    let json: serde_json::Value = response.json().await.map_err(|e| {
433        DomainCheckError::bootstrap("*", format!("Failed to parse bootstrap JSON: {}", e))
434    })?;
435
436    // Validate structure
437    let services = json
438        .get("services")
439        .and_then(|s| s.as_array())
440        .ok_or_else(|| {
441            DomainCheckError::bootstrap(
442                "*",
443                "Invalid bootstrap JSON: missing or invalid 'services' array",
444            )
445        })?;
446
447    let mut endpoints: HashMap<String, String> = HashMap::new();
448
449    for service in services {
450        if let Some(service_array) = service.as_array() {
451            if service_array.len() >= 2 {
452                // Get the endpoint URL(s)
453                let url = service_array[1]
454                    .as_array()
455                    .and_then(|urls| urls.first())
456                    .and_then(|u| u.as_str());
457
458                if let Some(url) = url {
459                    let endpoint = format!("{}/domain/", url.trim_end_matches('/'));
460
461                    // Get all TLDs served by this endpoint
462                    if let Some(tlds) = service_array[0].as_array() {
463                        for t in tlds {
464                            if let Some(tld_str) = t.as_str() {
465                                endpoints.insert(tld_str.to_lowercase(), endpoint.clone());
466                            }
467                        }
468                    }
469                }
470            }
471        }
472    }
473
474    // Update cache atomically
475    let mut cache = bootstrap_cache()
476        .lock()
477        .map_err(|_| DomainCheckError::internal("Failed to acquire bootstrap cache lock"))?;
478
479    cache.rdap_endpoints = endpoints;
480    cache.rdap_loaded = true;
481    cache.last_fetch = Some(Instant::now());
482    cache.no_rdap.clear(); // Reset negative cache on fresh fetch
483
484    Ok(())
485}
486
487/// Pre-warm the bootstrap cache by fetching the full IANA registry.
488///
489/// Call this before bulk operations (e.g., `--all` mode) to ensure all ~1,180
490/// TLDs are available without per-TLD network requests.
491///
492/// This is safe to call multiple times — subsequent calls are no-ops if the
493/// cache is still fresh (within the 24-hour TTL).
494pub async fn initialize_bootstrap() -> Result<(), DomainCheckError> {
495    let needs_fetch = {
496        let cache = bootstrap_cache()
497            .lock()
498            .map_err(|_| DomainCheckError::internal("Failed to acquire bootstrap cache lock"))?;
499        !cache.rdap_loaded || cache.is_stale()
500    };
501
502    if needs_fetch {
503        fetch_full_bootstrap().await?;
504    }
505
506    Ok(())
507}
508
509/// Cache a discovered WHOIS server for a TLD.
510pub fn cache_whois_server(tld: &str, server: &str) -> Result<(), DomainCheckError> {
511    let mut cache = bootstrap_cache().lock().map_err(|_| {
512        DomainCheckError::internal("Failed to acquire bootstrap cache lock for writing")
513    })?;
514
515    cache
516        .whois_servers
517        .insert(tld.to_lowercase(), server.to_string());
518    Ok(())
519}
520
521/// Look up a cached WHOIS server for a TLD.
522///
523/// Checks the bootstrap cache for a previously discovered WHOIS server.
524/// If not cached, the caller should use `discover_whois_server()` from
525/// the whois module and cache the result.
526///
527/// # Arguments
528///
529/// * `tld` - The TLD to look up (e.g., "com", "co")
530///
531/// # Returns
532///
533/// The WHOIS server hostname if cached, or None.
534pub fn get_cached_whois_server(tld: &str) -> Option<String> {
535    let cache = bootstrap_cache().lock().ok()?;
536    let server = cache.whois_servers.get(&tld.to_lowercase())?;
537    if server.is_empty() {
538        None // Empty string means "no server found" (negative cache)
539    } else {
540        Some(server.clone())
541    }
542}
543
544/// Check if a TLD has been negatively cached for WHOIS (no server found).
545pub fn is_whois_negatively_cached(tld: &str) -> bool {
546    if let Ok(cache) = bootstrap_cache().lock() {
547        matches!(cache.whois_servers.get(&tld.to_lowercase()), Some(s) if s.is_empty())
548    } else {
549        false
550    }
551}
552
553/// Get the WHOIS server for a TLD, using cache with IANA referral discovery fallback.
554///
555/// Lookup flow:
556/// 1. Check cache for previously discovered server
557/// 2. If miss and not negatively cached, discover via IANA referral
558/// 3. Cache result (empty string for "no server found" to avoid re-querying)
559///
560/// # Arguments
561///
562/// * `tld` - The TLD to look up
563///
564/// # Returns
565///
566/// The WHOIS server hostname, or None if no server exists for this TLD.
567pub async fn get_whois_server(tld: &str) -> Option<String> {
568    let tld_lower = tld.to_lowercase();
569
570    // Check positive cache
571    if let Some(server) = get_cached_whois_server(&tld_lower) {
572        return Some(server);
573    }
574
575    // Check negative cache
576    if is_whois_negatively_cached(&tld_lower) {
577        return None;
578    }
579
580    // Discover via IANA referral
581    match crate::protocols::whois::discover_whois_server(&tld_lower).await {
582        Some(server) => {
583            let _ = cache_whois_server(&tld_lower, &server);
584            Some(server)
585        }
586        None => {
587            // Cache empty string as negative result
588            let _ = cache_whois_server(&tld_lower, "");
589            None
590        }
591    }
592}
593
594/// Extract TLD from a domain name.
595///
596/// Handles both simple TLDs (example.com -> "com") and multi-level TLDs
597/// (example.co.uk -> "co.uk", though this function will return "uk").
598///
599/// # Arguments
600///
601/// * `domain` - The domain name to extract TLD from
602///
603/// # Returns
604///
605/// The TLD string, or an error if the domain format is invalid.
606pub fn extract_tld(domain: &str) -> Result<String, DomainCheckError> {
607    let parts: Vec<&str> = domain.split('.').collect();
608
609    if parts.len() < 2 {
610        return Err(DomainCheckError::invalid_domain(
611            domain,
612            "Domain must contain at least one dot",
613        ));
614    }
615
616    // Return the last part as TLD
617    // Note: This is simplified and doesn't handle multi-level TLDs like .co.uk
618    // For production use, consider using a library like publicsuffix
619    Ok(parts.last().unwrap().to_lowercase())
620}
621
622/// Clear the bootstrap cache (useful for testing).
623#[allow(dead_code)]
624pub fn clear_bootstrap_cache() -> Result<(), DomainCheckError> {
625    let mut cache = bootstrap_cache().lock().map_err(|_| {
626        DomainCheckError::internal("Failed to acquire bootstrap cache lock for clearing")
627    })?;
628
629    cache.rdap_endpoints.clear();
630    cache.whois_servers.clear();
631    cache.no_rdap.clear();
632    cache.rdap_loaded = false;
633    cache.last_fetch = None;
634    Ok(())
635}
636
637/// Get bootstrap cache statistics (useful for debugging).
638#[allow(dead_code)]
639pub fn get_bootstrap_cache_stats() -> Result<(usize, bool), DomainCheckError> {
640    let cache = bootstrap_cache().lock().map_err(|_| {
641        DomainCheckError::internal("Failed to acquire bootstrap cache lock for stats")
642    })?;
643
644    Ok((cache.rdap_endpoints.len(), cache.is_stale()))
645}
646
647#[cfg(test)]
648mod tests {
649    use super::*;
650
651    // ── extract_tld ─────────────────────────────────────────────────────
652
653    #[test]
654    fn test_extract_tld_basic() {
655        assert_eq!(extract_tld("example.com").unwrap(), "com");
656        assert_eq!(extract_tld("test.org").unwrap(), "org");
657        assert_eq!(extract_tld("sub.example.com").unwrap(), "com");
658    }
659
660    #[test]
661    fn test_extract_tld_case_insensitive() {
662        assert_eq!(extract_tld("EXAMPLE.COM").unwrap(), "com");
663        assert_eq!(extract_tld("Test.ORG").unwrap(), "org");
664    }
665
666    #[test]
667    fn test_extract_tld_no_dot() {
668        assert!(extract_tld("invalid").is_err());
669        let err = extract_tld("invalid").unwrap_err();
670        assert!(err.to_string().contains("at least one dot"));
671    }
672
673    #[test]
674    fn test_extract_tld_empty() {
675        assert!(extract_tld("").is_err());
676    }
677
678    #[test]
679    fn test_extract_tld_multi_level() {
680        // Returns last part only (simplified — doesn't handle co.uk)
681        assert_eq!(extract_tld("example.co.uk").unwrap(), "uk");
682    }
683
684    // ── get_rdap_registry_map ───────────────────────────────────────────
685
686    #[test]
687    fn test_registry_map_contains_common_tlds() {
688        let registry = get_rdap_registry_map();
689        assert!(registry.contains_key("com"));
690        assert!(registry.contains_key("org"));
691        assert!(registry.contains_key("net"));
692        assert!(registry.contains_key("io"));
693        assert!(registry.contains_key("ai"));
694        assert!(registry.contains_key("dev"));
695        assert!(registry.contains_key("app"));
696    }
697
698    #[test]
699    fn test_registry_map_size() {
700        let registry = get_rdap_registry_map();
701        // We have 32 hardcoded TLDs
702        assert!(
703            registry.len() >= 30,
704            "Expected at least 30 entries, got {}",
705            registry.len()
706        );
707    }
708
709    #[test]
710    fn test_all_endpoints_are_valid_https_urls() {
711        let registry = get_rdap_registry_map();
712        for (tld, endpoint) in &registry {
713            assert!(
714                endpoint.starts_with("https://"),
715                "Endpoint for '{}' must use HTTPS: {}",
716                tld,
717                endpoint
718            );
719            assert!(
720                endpoint.ends_with("/domain/"),
721                "Endpoint for '{}' must end with /domain/: {}",
722                tld,
723                endpoint
724            );
725        }
726    }
727
728    #[test]
729    fn test_registry_does_not_contain_dead_cctlds() {
730        let registry = get_rdap_registry_map();
731        // These were removed because their RDAP endpoints are defunct
732        assert!(!registry.contains_key("co"));
733        assert!(!registry.contains_key("eu"));
734        assert!(!registry.contains_key("it"));
735        assert!(!registry.contains_key("jp"));
736        assert!(!registry.contains_key("es"));
737        assert!(!registry.contains_key("cn"));
738    }
739
740    // ── get_rdap_endpoint ───────────────────────────────────────────────
741
742    #[tokio::test]
743    async fn test_get_rdap_endpoint_builtin() {
744        let endpoint = get_rdap_endpoint("com", false).await.unwrap();
745        assert!(endpoint.contains("verisign.com"));
746    }
747
748    #[tokio::test]
749    async fn test_get_rdap_endpoint_case_insensitive() {
750        let endpoint = get_rdap_endpoint("COM", false).await.unwrap();
751        assert!(endpoint.contains("verisign.com"));
752    }
753
754    #[tokio::test]
755    async fn test_get_rdap_endpoint_unknown_no_bootstrap() {
756        let result = get_rdap_endpoint("unknowntld123", false).await;
757        assert!(result.is_err());
758        // Should return a BootstrapError variant
759        let err = result.unwrap_err();
760        assert!(
761            matches!(err, DomainCheckError::BootstrapError { .. }),
762            "Expected BootstrapError, got: {:?}",
763            err
764        );
765    }
766
767    // ── BootstrapCache ──────────────────────────────────────────────────
768
769    #[test]
770    fn test_bootstrap_cache_new() {
771        let cache = BootstrapCache::new();
772        assert!(!cache.rdap_loaded);
773        assert!(cache.last_fetch.is_none());
774        assert!(cache.rdap_endpoints.is_empty());
775        assert!(cache.whois_servers.is_empty());
776        assert!(cache.no_rdap.is_empty());
777        assert!(cache.is_stale());
778    }
779
780    #[test]
781    fn test_bootstrap_cache_is_stale_no_fetch() {
782        let cache = BootstrapCache::new();
783        assert!(cache.is_stale()); // Never fetched = stale
784    }
785
786    #[test]
787    fn test_bootstrap_cache_is_stale_fresh() {
788        let mut cache = BootstrapCache::new();
789        cache.last_fetch = Some(Instant::now());
790        assert!(!cache.is_stale()); // Just fetched = not stale
791    }
792
793    // ── WHOIS server caching ────────────────────────────────────────────
794
795    #[test]
796    fn test_whois_server_caching() {
797        clear_bootstrap_cache().unwrap();
798
799        cache_whois_server("com", "whois.verisign-grs.com").unwrap();
800        assert_eq!(
801            get_cached_whois_server("com"),
802            Some("whois.verisign-grs.com".to_string())
803        );
804
805        clear_bootstrap_cache().unwrap();
806    }
807
808    #[test]
809    fn test_whois_negative_caching() {
810        clear_bootstrap_cache().unwrap();
811
812        cache_whois_server("fake", "").unwrap();
813        assert_eq!(get_cached_whois_server("fake"), None);
814        assert!(is_whois_negatively_cached("fake"));
815
816        clear_bootstrap_cache().unwrap();
817    }
818
819    #[test]
820    fn test_whois_cache_case_insensitive() {
821        clear_bootstrap_cache().unwrap();
822
823        cache_whois_server("COM", "whois.verisign-grs.com").unwrap();
824        assert_eq!(
825            get_cached_whois_server("com"),
826            Some("whois.verisign-grs.com".to_string())
827        );
828
829        clear_bootstrap_cache().unwrap();
830    }
831
832    #[test]
833    fn test_whois_not_negatively_cached_when_absent() {
834        clear_bootstrap_cache().unwrap();
835        assert!(!is_whois_negatively_cached("neverqueried"));
836        clear_bootstrap_cache().unwrap();
837    }
838
839    // ── clear_bootstrap_cache & stats ───────────────────────────────────
840
841    #[test]
842    fn test_clear_bootstrap_cache() {
843        // Populate some data
844        cache_whois_server("test", "whois.test.com").unwrap();
845        clear_bootstrap_cache().unwrap();
846
847        assert_eq!(get_cached_whois_server("test"), None);
848        assert!(!is_whois_negatively_cached("test"));
849    }
850
851    #[test]
852    fn test_get_bootstrap_cache_stats() {
853        clear_bootstrap_cache().unwrap();
854        let (count, stale) = get_bootstrap_cache_stats().unwrap();
855        assert_eq!(count, 0);
856        assert!(stale); // No fetch yet = stale
857        clear_bootstrap_cache().unwrap();
858    }
859
860    // ── validate_preset_tlds ────────────────────────────────────────────
861
862    #[test]
863    fn test_validate_preset_tlds_all_hardcoded() {
864        let tlds = vec!["com".to_string(), "org".to_string(), "net".to_string()];
865        assert!(validate_preset_tlds(&tlds));
866    }
867
868    #[test]
869    fn test_validate_preset_tlds_with_unknown() {
870        let tlds = vec!["com".to_string(), "unknowntld999".to_string()];
871        assert!(!validate_preset_tlds(&tlds));
872    }
873
874    #[test]
875    fn test_validate_preset_tlds_empty() {
876        assert!(validate_preset_tlds(&[]));
877    }
878
879    // ── get_preset_tlds_with_custom ─────────────────────────────────────
880
881    #[test]
882    fn test_custom_preset_takes_precedence() {
883        let mut custom = HashMap::new();
884        custom.insert(
885            "startup".to_string(),
886            vec!["custom1".to_string(), "custom2".to_string()],
887        );
888
889        let result = get_preset_tlds_with_custom("startup", Some(&custom)).unwrap();
890        assert_eq!(result, vec!["custom1", "custom2"]);
891    }
892
893    #[test]
894    fn test_custom_preset_fallback_to_builtin() {
895        let custom: HashMap<String, Vec<String>> = HashMap::new();
896        let result = get_preset_tlds_with_custom("startup", Some(&custom)).unwrap();
897        // Should fall back to built-in startup preset
898        assert!(result.contains(&"com".to_string()));
899    }
900
901    #[test]
902    fn test_custom_preset_exact_case_match() {
903        let mut custom = HashMap::new();
904        custom.insert("MyPreset".to_string(), vec!["com".to_string()]);
905
906        // Exact case match works
907        let result = get_preset_tlds_with_custom("MyPreset", Some(&custom)).unwrap();
908        assert_eq!(result, vec!["com"]);
909    }
910
911    #[test]
912    fn test_custom_preset_lowercase_key_matches_lowercase_query() {
913        let mut custom = HashMap::new();
914        custom.insert("mypreset".to_string(), vec!["org".to_string()]);
915
916        // Lowercase query matches lowercase key via preset_lower fallback
917        let result = get_preset_tlds_with_custom("MYPRESET", Some(&custom)).unwrap();
918        assert_eq!(result, vec!["org"]);
919    }
920
921    #[test]
922    fn test_custom_preset_none_map() {
923        let result = get_preset_tlds_with_custom("startup", None).unwrap();
924        assert!(result.contains(&"com".to_string()));
925    }
926
927    #[test]
928    fn test_custom_preset_unknown_returns_none() {
929        let result = get_preset_tlds_with_custom("nonexistent", None);
930        assert!(result.is_none());
931    }
932}
933
934#[cfg(test)]
935mod preset_tests {
936    use super::*;
937
938    #[test]
939    fn test_get_all_known_tlds() {
940        let tlds = get_all_known_tlds();
941
942        // Should have our expected core TLDs
943        assert!(tlds.len() >= 30);
944        assert!(tlds.contains(&"com".to_string()));
945        assert!(tlds.contains(&"org".to_string()));
946        assert!(tlds.contains(&"io".to_string()));
947        assert!(tlds.contains(&"ai".to_string()));
948
949        // Should be sorted for consistent UX
950        let mut sorted_tlds = tlds.clone();
951        sorted_tlds.sort();
952        assert_eq!(tlds, sorted_tlds);
953    }
954
955    #[test]
956    fn test_startup_preset() {
957        let tlds = get_preset_tlds("startup").unwrap();
958
959        assert_eq!(tlds.len(), 8);
960        assert!(tlds.contains(&"com".to_string()));
961        assert!(tlds.contains(&"io".to_string()));
962        assert!(tlds.contains(&"ai".to_string()));
963        assert!(tlds.contains(&"tech".to_string()));
964
965        // Case insensitive
966        assert_eq!(get_preset_tlds("STARTUP"), get_preset_tlds("startup"));
967    }
968
969    #[test]
970    fn test_enterprise_preset() {
971        let tlds = get_preset_tlds("enterprise").unwrap();
972
973        assert_eq!(tlds.len(), 6);
974        assert!(tlds.contains(&"com".to_string()));
975        assert!(tlds.contains(&"org".to_string()));
976        assert!(tlds.contains(&"biz".to_string()));
977    }
978
979    #[test]
980    fn test_country_preset() {
981        let tlds = get_preset_tlds("country").unwrap();
982
983        assert_eq!(tlds.len(), 9);
984        assert!(tlds.contains(&"us".to_string()));
985        assert!(tlds.contains(&"uk".to_string()));
986        assert!(tlds.contains(&"de".to_string()));
987        assert!(tlds.contains(&"nl".to_string()));
988    }
989
990    #[test]
991    fn test_invalid_preset() {
992        assert!(get_preset_tlds("invalid").is_none());
993        assert!(get_preset_tlds("").is_none());
994    }
995
996    #[test]
997    fn test_available_presets() {
998        let presets = get_available_presets();
999        assert_eq!(presets.len(), 11);
1000        assert!(presets.contains(&"startup"));
1001        assert!(presets.contains(&"enterprise"));
1002        assert!(presets.contains(&"country"));
1003        assert!(presets.contains(&"popular"));
1004        assert!(presets.contains(&"classic"));
1005        assert!(presets.contains(&"tech"));
1006        assert!(presets.contains(&"creative"));
1007        assert!(presets.contains(&"ecommerce"));
1008        assert!(presets.contains(&"finance"));
1009        assert!(presets.contains(&"web"));
1010        assert!(presets.contains(&"trendy"));
1011    }
1012
1013    #[test]
1014    fn test_validate_preset_tlds() {
1015        // Core presets (startup, enterprise, country, popular, classic) should
1016        // have hardcoded RDAP endpoints for offline operation
1017        let core_presets = ["startup", "enterprise", "country", "classic"];
1018        for preset_name in &core_presets {
1019            let tlds = get_preset_tlds(preset_name).unwrap();
1020            assert!(
1021                validate_preset_tlds(&tlds),
1022                "Core preset '{}' contains TLDs without hardcoded RDAP endpoints",
1023                preset_name
1024            );
1025        }
1026    }
1027
1028    #[test]
1029    fn test_all_presets_non_empty() {
1030        for preset_name in get_available_presets() {
1031            let tlds = get_preset_tlds(preset_name).unwrap();
1032            assert!(
1033                !tlds.is_empty(),
1034                "Preset '{}' should not be empty",
1035                preset_name
1036            );
1037        }
1038    }
1039
1040    #[test]
1041    fn test_ecommerce_alias() {
1042        assert_eq!(get_preset_tlds("ecommerce"), get_preset_tlds("shopping"));
1043    }
1044
1045    #[test]
1046    fn test_preset_tlds_subset_of_known() {
1047        // Only validate core presets against hardcoded TLDs
1048        // (extended presets require bootstrap which isn't available in unit tests)
1049        let core_presets = ["startup", "enterprise", "country", "classic"];
1050        let all_tlds = get_all_known_tlds();
1051
1052        for preset_name in &core_presets {
1053            let preset_tlds = get_preset_tlds(preset_name).unwrap();
1054            for tld in preset_tlds {
1055                assert!(
1056                    all_tlds.contains(&tld),
1057                    "Preset '{}' contains unknown TLD: {}",
1058                    preset_name,
1059                    tld
1060                );
1061            }
1062        }
1063    }
1064}