domain_check_lib/protocols/
registry.rs

1//! Domain registry mappings and IANA bootstrap functionality.
2//!
3//! This module provides mappings from TLDs to their corresponding RDAP endpoints,
4//! as well as dynamic discovery through the IANA bootstrap registry.
5
6use crate::error::DomainCheckError;
7use std::collections::HashMap;
8use std::sync::Mutex;
9use std::time::{Duration, Instant};
10
11/// Bootstrap registry cache for discovered RDAP endpoints
12struct BootstrapCache {
13    endpoints: HashMap<String, String>,
14    last_update: Instant,
15}
16
17impl BootstrapCache {
18    fn new() -> Self {
19        Self {
20            endpoints: HashMap::new(),
21            last_update: Instant::now(),
22        }
23    }
24
25    fn get(&self, tld: &str) -> Option<String> {
26        self.endpoints.get(tld).cloned()
27    }
28
29    fn insert(&mut self, tld: String, endpoint: String) {
30        self.endpoints.insert(tld, endpoint);
31        self.last_update = Instant::now();
32    }
33
34    fn is_stale(&self) -> bool {
35        // Cache expires after 1 hour
36        self.last_update.elapsed() > Duration::from_secs(3600)
37    }
38}
39
40// Global bootstrap cache using lazy_static
41lazy_static::lazy_static! {
42    static ref BOOTSTRAP_CACHE: Mutex<BootstrapCache> = Mutex::new(BootstrapCache::new());
43}
44
45/// Get the built-in RDAP registry mappings.
46///
47/// This function returns a map of TLD strings to their corresponding RDAP endpoint URLs.
48/// These mappings are based on known registry endpoints and are updated periodically.
49///
50/// # Returns
51///
52/// A HashMap mapping TLD strings (like "com", "org") to RDAP endpoint base URLs.
53pub fn get_rdap_registry_map() -> HashMap<&'static str, &'static str> {
54    HashMap::from([
55        // Popular gTLDs (Generic Top-Level Domains)
56        ("com", "https://rdap.verisign.com/com/v1/domain/"),
57        ("net", "https://rdap.verisign.com/net/v1/domain/"),
58        (
59            "org",
60            "https://rdap.publicinterestregistry.org/rdap/domain/",
61        ),
62        ("info", "https://rdap.identitydigital.services/rdap/domain/"),
63        ("biz", "https://rdap.nic.biz/domain/"),
64        // Google TLDs
65        ("app", "https://rdap.nic.google/domain/"),
66        ("dev", "https://rdap.nic.google/domain/"),
67        ("page", "https://rdap.nic.google/domain/"),
68        // Other popular gTLDs
69        ("blog", "https://rdap.nic.blog/domain/"),
70        ("shop", "https://rdap.nic.shop/domain/"),
71        ("xyz", "https://rdap.nic.xyz/domain/"),
72        ("tech", "https://rdap.nic.tech/domain/"),
73        ("online", "https://rdap.nic.online/domain/"),
74        ("site", "https://rdap.nic.site/domain/"),
75        ("website", "https://rdap.nic.website/domain/"),
76        // Country Code TLDs (ccTLDs)
77        ("io", "https://rdap.identitydigital.services/rdap/domain/"), // British Indian Ocean Territory
78        ("ai", "https://rdap.nic.ai/domain/"),                        // Anguilla
79        ("co", "https://rdap.nic.co/domain/"),                        // Colombia
80        ("me", "https://rdap.nic.me/domain/"),                        // Montenegro
81        ("us", "https://rdap.nic.us/domain/"),                        // United States
82        ("uk", "https://rdap.nominet.uk/domain/"),                    // United Kingdom
83        ("eu", "https://rdap.eu.org/domain/"),                        // European Union
84        ("de", "https://rdap.denic.de/domain/"),                      // Germany
85        ("ca", "https://rdap.cira.ca/domain/"),                       // Canada
86        ("au", "https://rdap.auda.org.au/domain/"),                   // Australia
87        ("fr", "https://rdap.nic.fr/domain/"),                        // France
88        ("es", "https://rdap.nic.es/domain/"),                        // Spain
89        ("it", "https://rdap.nic.it/domain/"),                        // Italy
90        ("nl", "https://rdap.domain-registry.nl/domain/"),            // Netherlands
91        ("jp", "https://rdap.jprs.jp/domain/"),                       // Japan
92        ("br", "https://rdap.registro.br/domain/"),                   // Brazil
93        ("in", "https://rdap.registry.in/domain/"),                   // India
94        ("cn", "https://rdap.cnnic.cn/domain/"),                      // China
95        // Verisign managed ccTLDs
96        ("tv", "https://rdap.verisign.com/tv/v1/domain/"), // Tuvalu
97        ("cc", "https://rdap.verisign.com/cc/v1/domain/"), // Cocos Islands
98        // Specialty TLDs
99        ("zone", "https://rdap.nic.zone/domain/"),
100        ("cloud", "https://rdap.nic.cloud/domain/"),
101        ("digital", "https://rdap.nic.digital/domain/"),
102    ])
103}
104
105// Add these functions after line 81 in domain-check-lib/src/protocols/registry.rs
106
107/// Get all TLDs that we have RDAP endpoints for.
108///
109/// This function extracts TLD knowledge from our built-in registry mappings,
110/// providing a comprehensive list for the --all flag functionality.
111///
112/// # Returns
113///
114/// Vector of TLD strings (e.g., ["com", "org", "net", ...]) sorted alphabetically.
115pub fn get_all_known_tlds() -> Vec<String> {
116    let registry = get_rdap_registry_map();
117    let mut tlds: Vec<String> = registry.keys().map(|k| k.to_string()).collect();
118    tlds.sort(); // Consistent ordering for user experience
119    tlds
120}
121
122/// Get predefined TLD presets for common use cases.
123///
124/// This function provides curated TLD lists that cover the most common
125/// domain checking scenarios without overwhelming users.
126///
127/// # Arguments
128///
129/// * `preset` - The preset name ("startup", "enterprise", "country")
130///
131/// # Returns
132///
133/// Optional vector of TLD strings, None if preset doesn't exist.
134///
135/// # Examples
136///
137/// ```rust
138/// use domain_check_lib::get_preset_tlds;
139///
140/// let startup_tlds = get_preset_tlds("startup").unwrap();
141/// assert!(startup_tlds.contains(&"io".to_string()));
142/// ```
143pub fn get_preset_tlds(preset: &str) -> Option<Vec<String>> {
144    match preset.to_lowercase().as_str() {
145        "startup" => Some(vec![
146            "com".to_string(),
147            "org".to_string(),
148            "io".to_string(),
149            "ai".to_string(),
150            "tech".to_string(),
151            "app".to_string(),
152            "dev".to_string(),
153            "xyz".to_string(),
154        ]),
155        "enterprise" => Some(vec![
156            "com".to_string(),
157            "org".to_string(),
158            "net".to_string(),
159            "info".to_string(),
160            "biz".to_string(),
161            "us".to_string(),
162        ]),
163        "country" => Some(vec![
164            "us".to_string(),
165            "uk".to_string(),
166            "de".to_string(),
167            "fr".to_string(),
168            "ca".to_string(),
169            "au".to_string(),
170            "jp".to_string(),
171            "br".to_string(),
172            "in".to_string(),
173        ]),
174        _ => None,
175    }
176}
177
178/// Get available preset names.
179///
180/// Useful for CLI help text and validation.
181///
182/// # Returns
183///
184/// Vector of available preset names.
185pub fn get_available_presets() -> Vec<&'static str> {
186    vec!["startup", "enterprise", "country"]
187}
188
189/// Validate that all TLDs in a preset have RDAP endpoints.
190///
191/// This ensures preset TLDs can actually be checked via our registry.
192/// Used internally and for testing.
193///
194/// # Arguments
195///
196/// * `preset_tlds` - TLD list to validate
197///
198/// # Returns
199///
200/// True if all TLDs have known RDAP endpoints, false otherwise.
201#[allow(dead_code)]
202pub fn validate_preset_tlds(preset_tlds: &[String]) -> bool {
203    let registry = get_rdap_registry_map();
204    preset_tlds
205        .iter()
206        .all(|tld| registry.contains_key(tld.as_str()))
207}
208
209/// Look up RDAP endpoint for a given TLD.
210///
211/// First checks the built-in registry, then checks the bootstrap cache,
212/// and optionally discovers new endpoints via IANA bootstrap.
213///
214/// # Arguments
215///
216/// * `tld` - The top-level domain to look up (e.g., "com", "org")
217/// * `use_bootstrap` - Whether to use IANA bootstrap for unknown TLDs
218///
219/// # Returns
220///
221/// The RDAP endpoint URL if found, or an error if not available.
222pub async fn get_rdap_endpoint(tld: &str, use_bootstrap: bool) -> Result<String, DomainCheckError> {
223    let tld_lower = tld.to_lowercase();
224
225    // First, check built-in registry
226    let registry = get_rdap_registry_map();
227    if let Some(endpoint) = registry.get(tld_lower.as_str()) {
228        return Ok(endpoint.to_string());
229    }
230
231    // Check bootstrap cache
232    {
233        let cache = BOOTSTRAP_CACHE
234            .lock()
235            .map_err(|_| DomainCheckError::internal("Failed to acquire bootstrap cache lock"))?;
236
237        if !cache.is_stale() {
238            if let Some(endpoint) = cache.get(&tld_lower) {
239                return Ok(endpoint);
240            }
241        }
242    }
243
244    // If bootstrap is enabled, try to discover the endpoint
245    if use_bootstrap {
246        discover_rdap_endpoint(&tld_lower).await
247    } else {
248        Err(DomainCheckError::bootstrap(
249            &tld_lower,
250            "No known RDAP endpoint and bootstrap disabled",
251        ))
252    }
253}
254
255/// Discover RDAP endpoint for a TLD using IANA bootstrap registry.
256///
257/// This function queries the IANA bootstrap registry to find the RDAP endpoint
258/// for TLDs that are not in our built-in mappings.
259///
260/// # Arguments
261///
262/// * `tld` - The TLD to discover an endpoint for
263///
264/// # Returns
265///
266/// The discovered RDAP endpoint URL, or an error if discovery fails.
267async fn discover_rdap_endpoint(tld: &str) -> Result<String, DomainCheckError> {
268    const BOOTSTRAP_URL: &str = "https://data.iana.org/rdap/dns.json";
269
270    // Create HTTP client with timeout
271    let client = reqwest::Client::builder()
272        .timeout(Duration::from_secs(5))
273        .build()
274        .map_err(|e| {
275            DomainCheckError::network_with_source("Failed to create HTTP client", e.to_string())
276        })?;
277
278    // Fetch bootstrap registry
279    let response = client.get(BOOTSTRAP_URL).send().await.map_err(|e| {
280        DomainCheckError::bootstrap(tld, format!("Failed to fetch bootstrap registry: {}", e))
281    })?;
282
283    if !response.status().is_success() {
284        return Err(DomainCheckError::bootstrap(
285            tld,
286            format!("Bootstrap registry returned HTTP {}", response.status()),
287        ));
288    }
289
290    let json: serde_json::Value = response.json().await.map_err(|e| {
291        DomainCheckError::bootstrap(tld, format!("Failed to parse bootstrap JSON: {}", e))
292    })?;
293
294    // Parse the bootstrap registry format
295    if let Some(services) = json.get("services").and_then(|s| s.as_array()) {
296        for service in services {
297            if let Some(service_array) = service.as_array() {
298                if service_array.len() >= 2 {
299                    // Check if this service handles our TLD
300                    if let Some(tlds) = service_array[0].as_array() {
301                        for t in tlds {
302                            if let Some(t_str) = t.as_str() {
303                                if t_str.to_lowercase() == tld.to_lowercase() {
304                                    // Found our TLD, get the endpoint
305                                    if let Some(urls) = service_array[1].as_array() {
306                                        if let Some(url) = urls.first().and_then(|u| u.as_str()) {
307                                            let endpoint =
308                                                format!("{}/domain/", url.trim_end_matches('/'));
309
310                                            // Cache the discovered endpoint
311                                            cache_discovered_endpoint(tld, &endpoint)?;
312
313                                            return Ok(endpoint);
314                                        }
315                                    }
316                                }
317                            }
318                        }
319                    }
320                }
321            }
322        }
323    }
324
325    Err(DomainCheckError::bootstrap(
326        tld,
327        "TLD not found in IANA bootstrap registry",
328    ))
329}
330
331/// Cache a discovered RDAP endpoint for future use.
332fn cache_discovered_endpoint(tld: &str, endpoint: &str) -> Result<(), DomainCheckError> {
333    let mut cache = BOOTSTRAP_CACHE.lock().map_err(|_| {
334        DomainCheckError::internal("Failed to acquire bootstrap cache lock for writing")
335    })?;
336
337    cache.insert(tld.to_string(), endpoint.to_string());
338    Ok(())
339}
340
341/// Extract TLD from a domain name.
342///
343/// Handles both simple TLDs (example.com -> "com") and multi-level TLDs
344/// (example.co.uk -> "co.uk", though this function will return "uk").
345///
346/// # Arguments
347///
348/// * `domain` - The domain name to extract TLD from
349///
350/// # Returns
351///
352/// The TLD string, or an error if the domain format is invalid.
353pub fn extract_tld(domain: &str) -> Result<String, DomainCheckError> {
354    let parts: Vec<&str> = domain.split('.').collect();
355
356    if parts.len() < 2 {
357        return Err(DomainCheckError::invalid_domain(
358            domain,
359            "Domain must contain at least one dot",
360        ));
361    }
362
363    // Return the last part as TLD
364    // Note: This is simplified and doesn't handle multi-level TLDs like .co.uk
365    // For production use, consider using a library like publicsuffix
366    Ok(parts.last().unwrap().to_lowercase())
367}
368
369/// Clear the bootstrap cache (useful for testing).
370#[allow(dead_code)]
371pub fn clear_bootstrap_cache() -> Result<(), DomainCheckError> {
372    let mut cache = BOOTSTRAP_CACHE.lock().map_err(|_| {
373        DomainCheckError::internal("Failed to acquire bootstrap cache lock for clearing")
374    })?;
375
376    cache.endpoints.clear();
377    cache.last_update = Instant::now();
378    Ok(())
379}
380
381/// Get bootstrap cache statistics (useful for debugging).
382#[allow(dead_code)]
383pub fn get_bootstrap_cache_stats() -> Result<(usize, bool), DomainCheckError> {
384    let cache = BOOTSTRAP_CACHE.lock().map_err(|_| {
385        DomainCheckError::internal("Failed to acquire bootstrap cache lock for stats")
386    })?;
387
388    Ok((cache.endpoints.len(), cache.is_stale()))
389}
390
391#[cfg(test)]
392mod tests {
393    use super::*;
394
395    #[test]
396    fn test_extract_tld() {
397        assert_eq!(extract_tld("example.com").unwrap(), "com");
398        assert_eq!(extract_tld("test.org").unwrap(), "org");
399        assert_eq!(extract_tld("sub.example.com").unwrap(), "com");
400        assert!(extract_tld("invalid").is_err());
401        assert!(extract_tld("").is_err());
402    }
403
404    #[test]
405    fn test_registry_map_contains_common_tlds() {
406        let registry = get_rdap_registry_map();
407        assert!(registry.contains_key("com"));
408        assert!(registry.contains_key("org"));
409        assert!(registry.contains_key("net"));
410        assert!(registry.contains_key("io"));
411    }
412
413    #[tokio::test]
414    async fn test_get_rdap_endpoint_builtin() {
415        let endpoint = get_rdap_endpoint("com", false).await.unwrap();
416        assert!(endpoint.contains("verisign.com"));
417    }
418
419    #[tokio::test]
420    async fn test_get_rdap_endpoint_unknown_no_bootstrap() {
421        let result = get_rdap_endpoint("unknowntld123", false).await;
422        assert!(result.is_err());
423    }
424}
425
426#[cfg(test)]
427mod preset_tests {
428    use super::*;
429
430    #[test]
431    fn test_get_all_known_tlds() {
432        let tlds = get_all_known_tlds();
433
434        // Should have our expected core TLDs
435        assert!(tlds.len() >= 30);
436        assert!(tlds.contains(&"com".to_string()));
437        assert!(tlds.contains(&"org".to_string()));
438        assert!(tlds.contains(&"io".to_string()));
439        assert!(tlds.contains(&"ai".to_string()));
440
441        // Should be sorted for consistent UX
442        let mut sorted_tlds = tlds.clone();
443        sorted_tlds.sort();
444        assert_eq!(tlds, sorted_tlds);
445    }
446
447    #[test]
448    fn test_startup_preset() {
449        let tlds = get_preset_tlds("startup").unwrap();
450
451        assert_eq!(tlds.len(), 8);
452        assert!(tlds.contains(&"com".to_string()));
453        assert!(tlds.contains(&"io".to_string()));
454        assert!(tlds.contains(&"ai".to_string()));
455        assert!(tlds.contains(&"tech".to_string()));
456
457        // Case insensitive
458        assert_eq!(get_preset_tlds("STARTUP"), get_preset_tlds("startup"));
459    }
460
461    #[test]
462    fn test_enterprise_preset() {
463        let tlds = get_preset_tlds("enterprise").unwrap();
464
465        assert_eq!(tlds.len(), 6);
466        assert!(tlds.contains(&"com".to_string()));
467        assert!(tlds.contains(&"org".to_string()));
468        assert!(tlds.contains(&"biz".to_string()));
469    }
470
471    #[test]
472    fn test_country_preset() {
473        let tlds = get_preset_tlds("country").unwrap();
474
475        assert_eq!(tlds.len(), 9);
476        assert!(tlds.contains(&"us".to_string()));
477        assert!(tlds.contains(&"uk".to_string()));
478        assert!(tlds.contains(&"de".to_string()));
479    }
480
481    #[test]
482    fn test_invalid_preset() {
483        assert!(get_preset_tlds("invalid").is_none());
484        assert!(get_preset_tlds("").is_none());
485    }
486
487    #[test]
488    fn test_available_presets() {
489        let presets = get_available_presets();
490        assert_eq!(presets.len(), 3);
491        assert!(presets.contains(&"startup"));
492        assert!(presets.contains(&"enterprise"));
493        assert!(presets.contains(&"country"));
494    }
495
496    #[test]
497    fn test_validate_preset_tlds() {
498        // All preset TLDs should have RDAP endpoints
499        for preset_name in get_available_presets() {
500            let tlds = get_preset_tlds(preset_name).unwrap();
501            assert!(
502                validate_preset_tlds(&tlds),
503                "Preset '{}' contains TLDs without RDAP endpoints",
504                preset_name
505            );
506        }
507    }
508
509    #[test]
510    fn test_preset_tlds_subset_of_known() {
511        let all_tlds = get_all_known_tlds();
512
513        for preset_name in get_available_presets() {
514            let preset_tlds = get_preset_tlds(preset_name).unwrap();
515            for tld in preset_tlds {
516                assert!(
517                    all_tlds.contains(&tld),
518                    "Preset '{}' contains unknown TLD: {}",
519                    preset_name,
520                    tld
521                );
522            }
523        }
524    }
525}