Skip to main content

whois_service/
lib.rs

1//! # Whois Service Library
2//! 
3//! A high-performance, production-ready whois lookup library for Rust.
4//! 
5//! ## Features
6//! 
7//! - Hybrid TLD discovery: hardcoded mappings for popular TLDs + dynamic discovery
8//! - Intelligent whois server detection with fallback strategies
9//! - Structured data parsing with calculated fields (age, expiration)
10//! - Optional caching with smart domain normalization
11//! - Production-ready error handling with graceful degradation
12//! - High-performance async implementation with connection pooling
13//! 
14//! ## Quick Start
15//! 
16//! ```rust,no_run
17//! use whois_service::WhoisClient;
18//! 
19//! #[tokio::main]
20//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
21//!     let client = WhoisClient::new().await?;
22//!     let result = client.lookup("google.com").await?;
23//!     
24//!     println!("Domain: {}", result.domain);
25//!     println!("Registrar: {:?}", result.parsed_data.as_ref().and_then(|p| p.registrar.as_ref()));
26//!     
27//!     Ok(())
28//! }
29//! ```
30
31pub mod whois;
32pub mod rdap;
33pub mod cache;
34pub mod config;
35pub mod errors;
36pub mod tld_mappings;
37pub mod buffer_pool;
38pub mod parser;
39pub mod tld;
40pub mod dates;
41pub mod rate_limiter;
42pub mod ip;
43
44
45// Re-export main types for easy access
46pub use whois::{WhoisService, WhoisResult};
47pub use rdap::{RdapService, RdapResult};
48pub use cache::CacheService;
49pub use config::Config;
50pub use errors::WhoisError;
51pub use tld::extract_tld;
52pub use dates::{parse_date, calculate_date_fields};
53pub use ip::{ValidatedIpAddress, Rir, detect_rir};
54
55use std::sync::Arc;
56
57/// Validated and normalized domain name.
58///
59/// Uses the `addr` crate with Mozilla's Public Suffix List for proper validation.
60///
61/// Features:
62/// - RFC 1035 / RFC 5891 compliance
63/// - Automatic IDNA/punycode handling for internationalized domains
64/// - PSL-aware validation (handles complex TLDs like .co.uk)
65/// - Proper length and character validation per label
66#[derive(Debug, Clone)]
67pub struct ValidatedDomain(pub String);
68
69impl ValidatedDomain {
70    /// Validate and normalize a domain name using addr crate with PSL
71    ///
72    /// This provides:
73    /// - Comprehensive RFC compliance
74    /// - IDNA support (converts unicode domains to punycode automatically)
75    /// - PSL validation (knows about .co.uk, .com.au, etc.)
76    pub fn new(domain: impl Into<String>) -> Result<Self, WhoisError> {
77        use addr::parser::DnsName;
78        use addr::psl::List;
79
80        let domain = domain.into().trim().to_lowercase();
81
82        // Check for empty domain
83        if domain.is_empty() {
84            return Err(WhoisError::InvalidDomain("Empty domain".to_string()));
85        }
86
87        // Must have at least one dot (TLD alone is not a valid lookup target)
88        if !domain.contains('.') {
89            return Err(WhoisError::InvalidDomain("Domain must contain at least one dot".to_string()));
90        }
91
92        // Use addr crate for comprehensive validation
93        // This handles RFC 1035/5891, IDNA, punycode, and PSL validation
94        List.parse_dns_name(&domain)
95            .map_err(|e| WhoisError::InvalidDomain(format!("Invalid domain: {}", e)))?;
96
97        Ok(ValidatedDomain(domain))
98    }
99
100    /// Get the validated domain string
101    pub fn as_str(&self) -> &str {
102        &self.0
103    }
104
105    /// Consume and return the inner string
106    pub fn into_inner(self) -> String {
107        self.0
108    }
109}
110
111impl AsRef<str> for ValidatedDomain {
112    fn as_ref(&self) -> &str {
113        &self.0
114    }
115}
116
117impl std::fmt::Display for ValidatedDomain {
118    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119        write!(f, "{}", self.0)
120    }
121}
122
123/// Detected query type (domain or IP address)
124///
125/// This enum represents the result of auto-detecting whether a query
126/// string is a domain name or an IP address.
127#[derive(Debug, Clone)]
128pub enum DetectedQueryType {
129    /// The query is a valid domain name
130    Domain(ValidatedDomain),
131    /// The query is a valid IP address (IPv4 or IPv6)
132    IpAddress(ValidatedIpAddress),
133}
134
135/// Unified validated query that auto-detects domain vs IP address
136///
137/// This type automatically determines whether the input is a domain name
138/// or an IP address and validates it accordingly.
139///
140/// # Examples
141///
142/// ```
143/// use whois_service::ValidatedQuery;
144///
145/// // Domain detection
146/// let query = ValidatedQuery::new("example.com").unwrap();
147/// assert!(query.is_domain());
148///
149/// // IPv4 detection
150/// let query = ValidatedQuery::new("8.8.8.8").unwrap();
151/// assert!(query.is_ip());
152///
153/// // IPv6 detection
154/// let query = ValidatedQuery::new("2001:4860:4860::8888").unwrap();
155/// assert!(query.is_ip());
156/// ```
157#[derive(Debug, Clone)]
158pub struct ValidatedQuery {
159    query_type: DetectedQueryType,
160    original: String,
161}
162
163impl ValidatedQuery {
164    /// Automatically detect whether input is domain or IP address and validate it
165    ///
166    /// This function tries to parse the input as an IP address first (faster validation),
167    /// then falls back to domain validation if IP parsing fails.
168    ///
169    /// # Errors
170    ///
171    /// Returns an error if the input is neither a valid IP address nor a valid domain name.
172    pub fn new(input: impl Into<String>) -> Result<Self, WhoisError> {
173        let input = input.into();
174        let trimmed = input.trim();
175        let original = input.clone();
176
177        // Try IP address first (faster to validate)
178        if let Ok(ip) = ValidatedIpAddress::new(trimmed) {
179            return Ok(Self {
180                query_type: DetectedQueryType::IpAddress(ip),
181                original,
182            });
183        }
184
185        // Fall back to domain validation
186        let domain = ValidatedDomain::new(trimmed)?;
187        Ok(Self {
188            query_type: DetectedQueryType::Domain(domain),
189            original,
190        })
191    }
192
193    /// Get the query type (domain or IP)
194    pub fn query_type(&self) -> &DetectedQueryType {
195        &self.query_type
196    }
197
198    /// Get the validated query as a string
199    pub fn as_str(&self) -> &str {
200        match &self.query_type {
201            DetectedQueryType::Domain(d) => d.as_str(),
202            DetectedQueryType::IpAddress(ip) => ip.as_str(),
203        }
204    }
205
206    /// Check if this query is a domain
207    pub fn is_domain(&self) -> bool {
208        matches!(self.query_type, DetectedQueryType::Domain(_))
209    }
210
211    /// Check if this query is an IP address
212    pub fn is_ip(&self) -> bool {
213        matches!(self.query_type, DetectedQueryType::IpAddress(_))
214    }
215
216    /// Consume and return the inner string
217    pub fn into_inner(self) -> String {
218        match self.query_type {
219            DetectedQueryType::Domain(d) => d.into_inner(),
220            DetectedQueryType::IpAddress(ip) => ip.into_inner(),
221        }
222    }
223}
224
225impl AsRef<str> for ValidatedQuery {
226    fn as_ref(&self) -> &str {
227        self.as_str()
228    }
229}
230
231impl std::fmt::Display for ValidatedQuery {
232    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233        write!(f, "{}", self.as_str())
234    }
235}
236
237/// Parsed whois data structure with calculated fields
238#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
239#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
240pub struct ParsedWhoisData {
241    /// Domain registrar name
242    #[cfg_attr(feature = "openapi", schema(example = "MarkMonitor Inc."))]
243    pub registrar: Option<String>,
244    
245    /// Domain creation date in ISO 8601 format
246    #[cfg_attr(feature = "openapi", schema(example = "1997-09-15T04:00:00Z"))]
247    pub creation_date: Option<String>,
248    
249    /// Domain expiration date in ISO 8601 format
250    #[cfg_attr(feature = "openapi", schema(example = "2028-09-14T04:00:00Z"))]
251    pub expiration_date: Option<String>,
252    
253    /// Last update date in ISO 8601 format
254    #[cfg_attr(feature = "openapi", schema(example = "2019-09-09T15:39:04Z"))]
255    pub updated_date: Option<String>,
256    
257    /// Domain name servers
258    #[cfg_attr(feature = "openapi", schema(example = json!(["NS1.GOOGLE.COM", "NS2.GOOGLE.COM"])))]
259    pub name_servers: Vec<String>,
260    
261    /// Domain status codes (useful for security analysis)
262    #[cfg_attr(feature = "openapi", schema(example = json!(["clientDeleteProhibited", "clientTransferProhibited"])))]
263    pub status: Vec<String>,
264    
265    /// Registrant name
266    pub registrant_name: Option<String>,
267    
268    /// Registrant email
269    pub registrant_email: Option<String>,
270    
271    /// Administrative contact email
272    pub admin_email: Option<String>,
273    
274    /// Technical contact email
275    pub tech_email: Option<String>,
276    
277    /// Days since domain creation (threat indicator - newly registered domains are suspicious)
278    #[cfg_attr(feature = "openapi", schema(example = 10117))]
279    pub created_ago: Option<i64>,
280    
281    /// Days since last update (activity indicator)
282    #[cfg_attr(feature = "openapi", schema(example = 45))]
283    pub updated_ago: Option<i64>,
284    
285    /// Days until expiration (domain monitoring - negative if expired)
286    #[cfg_attr(feature = "openapi", schema(example = 1204))]
287    pub expires_in: Option<i64>,
288}
289
290impl ParsedWhoisData {
291    /// Create a new ParsedWhoisData with all fields set to None/empty
292    ///
293    /// This eliminates the boilerplate of manually initializing all 13 fields
294    /// in every parser function.
295    pub fn new() -> Self {
296        Self {
297            registrar: None,
298            creation_date: None,
299            expiration_date: None,
300            updated_date: None,
301            name_servers: Vec::new(),
302            status: Vec::new(),
303            registrant_name: None,
304            registrant_email: None,
305            admin_email: None,
306            tech_email: None,
307            created_ago: None,
308            updated_ago: None,
309            expires_in: None,
310        }
311    }
312
313    /// Calculate and update the age-based fields (created_ago, updated_ago, expires_in)
314    ///
315    /// This eliminates the duplicate pattern of calling dates::calculate_date_fields()
316    /// and manually assigning the three return values.
317    pub fn calculate_age_fields(&mut self) {
318        let (created_ago, updated_ago, expires_in) = dates::calculate_date_fields(
319            &self.creation_date,
320            &self.updated_date,
321            &self.expiration_date,
322        );
323        self.created_ago = created_ago;
324        self.updated_ago = updated_ago;
325        self.expires_in = expires_in;
326    }
327}
328
329/// Unified result type for both WHOIS and RDAP lookups
330///
331/// This eliminates the duplication between WhoisResult and RdapResult,
332/// which were structurally identical.
333#[derive(Debug, Clone)]
334pub struct LookupResult {
335    /// The server that was queried (WHOIS or RDAP)
336    pub server: String,
337    /// Raw response data from the server
338    pub raw_data: String,
339    /// Parsed and structured WHOIS data (if parsing succeeded)
340    pub parsed_data: Option<ParsedWhoisData>,
341    /// Parsing analysis and debug information
342    pub parsing_analysis: Vec<String>,
343}
344
345/// High-level whois client with optional caching
346#[derive(Clone)]
347pub struct WhoisClient {
348    service: Arc<WhoisService>,
349    cache: Option<Arc<CacheService>>,
350}
351
352impl WhoisClient {
353    // === Constructor Methods ===
354    
355    /// Create a new whois client with default configuration
356    pub async fn new() -> Result<Self, WhoisError> {
357        let config = Self::load_default_config()?;
358        Self::new_with_config(config).await
359    }
360
361    /// Create a new whois client with custom configuration
362    pub async fn new_with_config(config: Arc<Config>) -> Result<Self, WhoisError> {
363        let service = Arc::new(WhoisService::new(config.clone()).await?);
364        let cache = Self::initialize_cache(config);
365        
366        Ok(Self { service, cache })
367    }
368
369    /// Create a new whois client without caching
370    pub async fn new_without_cache() -> Result<Self, WhoisError> {
371        let config = Self::load_default_config()?;
372        let service = Arc::new(WhoisService::new(config).await?);
373        
374        Ok(Self { service, cache: None })
375    }
376
377    /// Initialize cache
378    fn initialize_cache(config: Arc<Config>) -> Option<Arc<CacheService>> {
379        Some(Arc::new(CacheService::new(config)))
380    }
381
382    // === Public API Methods ===
383
384    /// Perform a whois lookup for the given domain or IP address
385    ///
386    /// This method automatically detects whether the input is a domain or IP address
387    /// and routes the query accordingly. It will use cache if available, unless `fresh` is true.
388    ///
389    /// # Examples
390    ///
391    /// ```no_run
392    /// # use whois_service::WhoisClient;
393    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
394    /// let client = WhoisClient::new().await?;
395    ///
396    /// // Domain lookup
397    /// let result = client.lookup("example.com").await?;
398    ///
399    /// // IPv4 lookup
400    /// let result = client.lookup("8.8.8.8").await?;
401    ///
402    /// // IPv6 lookup
403    /// let result = client.lookup("2001:4860:4860::8888").await?;
404    /// # Ok(())
405    /// # }
406    /// ```
407    pub async fn lookup(&self, query: &str) -> Result<WhoisResponse, WhoisError> {
408        self.lookup_with_options(query, false).await
409    }
410
411    /// Perform a fresh whois lookup, bypassing cache
412    pub async fn lookup_fresh(&self, query: &str) -> Result<WhoisResponse, WhoisError> {
413        self.lookup_with_options(query, true).await
414    }
415
416    /// Perform a whois lookup with caching options
417    ///
418    /// Auto-detects whether the query is a domain or IP address.
419    pub async fn lookup_with_options(&self, query: &str, fresh: bool) -> Result<WhoisResponse, WhoisError> {
420        let start_time = std::time::Instant::now();
421
422        // Auto-detect query type (domain or IP)
423        let validated = ValidatedQuery::new(query)?;
424
425        // Check type before moving validated
426        let is_domain = validated.is_domain();
427        let query_str = validated.as_str().to_string();
428        let original = validated.into_inner();
429
430        // Dispatch based on query type
431        if is_domain {
432            self.lookup_domain_internal(&query_str, fresh, start_time, original).await
433        } else {
434            self.lookup_ip_internal(&query_str, fresh, start_time, original).await
435        }
436    }
437
438    /// Generic internal lookup implementation for both domains and IPs
439    ///
440    /// Consolidates the duplicate code between domain and IP lookups.
441    /// The only difference is which service method to call.
442    async fn lookup_internal(
443        &self,
444        query: &str,
445        is_ip: bool,
446        fresh: bool,
447        start_time: std::time::Instant,
448        original: String,
449    ) -> Result<WhoisResponse, WhoisError> {
450        // If fresh lookup requested, bypass cache
451        if fresh {
452            let result = if is_ip {
453                self.service.lookup_ip(query).await?
454            } else {
455                self.service.lookup(query).await?
456            };
457            let query_time = start_time.elapsed().as_millis() as u64;
458
459            return Ok(WhoisResponse {
460                domain: original,
461                whois_server: result.server,
462                raw_data: result.raw_data,
463                parsed_data: result.parsed_data,
464                cached: false,
465                query_time_ms: query_time,
466                parsing_analysis: None,
467            });
468        }
469
470        // Use cache with automatic query deduplication if available
471        if let Some(cache) = &self.cache {
472            let query_owned = query.to_string();
473            let service = self.service.clone();
474
475            let mut response = cache
476                .get_or_fetch(query, || async move {
477                    let result = if is_ip {
478                        service.lookup_ip(&query_owned).await?
479                    } else {
480                        service.lookup(&query_owned).await?
481                    };
482                    let query_time = start_time.elapsed().as_millis() as u64;
483
484                    Ok(WhoisResponse {
485                        domain: query_owned.clone(),
486                        whois_server: result.server,
487                        raw_data: result.raw_data,
488                        parsed_data: result.parsed_data,
489                        cached: false,
490                        query_time_ms: query_time,
491                        parsing_analysis: None,
492                    })
493                })
494                .await?;
495
496            // Restore original input format (preserves user's input case/whitespace)
497            response.domain = original;
498            Ok(response)
499        } else {
500            // No cache - perform direct lookup
501            let result = if is_ip {
502                self.service.lookup_ip(query).await?
503            } else {
504                self.service.lookup(query).await?
505            };
506            let query_time = start_time.elapsed().as_millis() as u64;
507
508            Ok(WhoisResponse {
509                domain: original,
510                whois_server: result.server,
511                raw_data: result.raw_data,
512                parsed_data: result.parsed_data,
513                cached: false,
514                query_time_ms: query_time,
515                parsing_analysis: None,
516            })
517        }
518    }
519
520    /// Internal domain lookup implementation
521    async fn lookup_domain_internal(
522        &self,
523        domain: &str,
524        fresh: bool,
525        start_time: std::time::Instant,
526        original: String,
527    ) -> Result<WhoisResponse, WhoisError> {
528        self.lookup_internal(domain, false, fresh, start_time, original).await
529    }
530
531    /// Internal IP lookup implementation
532    async fn lookup_ip_internal(
533        &self,
534        ip_addr: &str,
535        fresh: bool,
536        start_time: std::time::Instant,
537        original: String,
538    ) -> Result<WhoisResponse, WhoisError> {
539        self.lookup_internal(ip_addr, true, fresh, start_time, original).await
540    }
541
542
543    // === Utility Methods ===
544
545    /// Get cache statistics if caching is enabled
546    pub fn cache_enabled(&self) -> bool {
547        self.cache.is_some()
548    }
549
550    // === Private Helper Methods ===
551
552    /// Load default configuration - eliminates DRY violation
553    fn load_default_config() -> Result<Arc<Config>, WhoisError> {
554        let config = Arc::new(Config::load().map_err(WhoisError::ConfigError)?);
555        Ok(config)
556    }
557}
558
559/// Response structure for whois lookups
560#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
561#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))]
562pub struct WhoisResponse {
563    pub domain: String,
564    pub whois_server: String,
565    pub raw_data: String,
566    pub parsed_data: Option<ParsedWhoisData>,
567    pub cached: bool,
568    pub query_time_ms: u64,
569    #[serde(skip_serializing_if = "Option::is_none")]
570    pub parsing_analysis: Option<Vec<String>>,
571}
572
573#[cfg(test)]
574mod tests {
575    use super::*;
576
577    #[tokio::test]
578    async fn test_whois_client_creation() {
579        let client = WhoisClient::new_without_cache().await;
580        assert!(client.is_ok());
581    }
582
583    #[tokio::test]
584    async fn test_domain_validation() {
585        let client = WhoisClient::new_without_cache().await.unwrap();
586
587        // Test empty domain
588        let result = client.lookup("").await;
589        assert!(matches!(result, Err(WhoisError::InvalidDomain(_))));
590
591        // Test invalid domain (no dot)
592        let result = client.lookup("invalid").await;
593        assert!(matches!(result, Err(WhoisError::InvalidDomain(_))));
594    }
595
596    #[test]
597    fn test_validated_domain_valid() {
598        // Standard domains
599        assert!(ValidatedDomain::new("example.com").is_ok());
600        assert!(ValidatedDomain::new("sub.example.com").is_ok());
601        assert!(ValidatedDomain::new("deep.sub.example.com").is_ok());
602
603        // Uppercase should be normalized
604        assert!(ValidatedDomain::new("EXAMPLE.COM").is_ok());
605        assert!(ValidatedDomain::new("Example.Com").is_ok());
606
607        // With whitespace (should be trimmed)
608        assert!(ValidatedDomain::new("  example.com  ").is_ok());
609
610        // Complex TLDs
611        assert!(ValidatedDomain::new("example.co.uk").is_ok());
612        assert!(ValidatedDomain::new("example.com.au").is_ok());
613
614        // Hyphens in labels
615        assert!(ValidatedDomain::new("my-site.example.com").is_ok());
616        assert!(ValidatedDomain::new("a-b-c.example.com").is_ok());
617    }
618
619    #[test]
620    fn test_validated_domain_invalid() {
621        // Empty domain
622        assert!(ValidatedDomain::new("").is_err());
623        assert!(ValidatedDomain::new("   ").is_err());
624
625        // No dot (TLD only)
626        assert!(ValidatedDomain::new("com").is_err());
627        assert!(ValidatedDomain::new("localhost").is_err());
628
629        // Invalid dot patterns (checked before addr validation)
630        assert!(ValidatedDomain::new("example..com").is_err());
631
632        // Note: addr library may accept some edge cases by normalizing them
633        // It relies on PSL and DNS RFCs for validation
634        // The main validation ensures proper domain structure and PSL compliance
635
636        // Note: Length validation is handled by addr library
637        // It follows RFC 1035 requirements for label and total domain length
638    }
639
640    #[test]
641    fn test_validated_domain_normalization() {
642        // Verify lowercase normalization
643        let domain = ValidatedDomain::new("EXAMPLE.COM").unwrap();
644        assert_eq!(domain.as_str(), "example.com");
645
646        // Verify trimming
647        let domain = ValidatedDomain::new("  example.com  ").unwrap();
648        assert_eq!(domain.as_str(), "example.com");
649
650        // Verify mixed case
651        let domain = ValidatedDomain::new("Example.Com").unwrap();
652        assert_eq!(domain.as_str(), "example.com");
653    }
654
655    #[test]
656    fn test_validated_domain_edge_cases() {
657        // Single character labels
658        assert!(ValidatedDomain::new("a.b.c").is_ok());
659
660        // Numeric domains
661        assert!(ValidatedDomain::new("123.456.com").is_ok());
662
663        // All numeric (valid as DNS name)
664        assert!(ValidatedDomain::new("123.456").is_ok());
665
666        // Maximum label length (63 chars)
667        let max_label = "a".repeat(63);
668        assert!(ValidatedDomain::new(format!("{}.com", max_label)).is_ok());
669
670        // Long but valid domain
671        let valid_long = format!("{}.{}.{}.com", "a".repeat(50), "b".repeat(50), "c".repeat(50));
672        assert!(ValidatedDomain::new(valid_long).is_ok());
673    }
674
675    #[test]
676    fn test_validated_domain_methods() {
677        let domain = ValidatedDomain::new("example.com").unwrap();
678
679        // Test as_str()
680        assert_eq!(domain.as_str(), "example.com");
681
682        // Test AsRef<str>
683        let s: &str = domain.as_ref();
684        assert_eq!(s, "example.com");
685
686        // Test Display
687        assert_eq!(format!("{}", domain), "example.com");
688
689        // Test into_inner()
690        let domain2 = ValidatedDomain::new("test.com").unwrap();
691        let inner = domain2.into_inner();
692        assert_eq!(inner, "test.com");
693    }
694}