Skip to main content

seer_core/
lookup.rs

1use std::collections::HashMap;
2use std::net::Ipv6Addr;
3use std::str::FromStr;
4use std::sync::{Arc, Mutex, Weak};
5use std::time::Duration;
6
7use chrono::{DateTime, Utc};
8use once_cell::sync::Lazy;
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use tokio::sync::Notify;
12use tracing::{debug, instrument, warn};
13
14use tokio::time::timeout as tokio_timeout;
15
16use crate::availability::{AvailabilityChecker, AvailabilityResult};
17use crate::cache::TtlCache;
18use crate::error::{Result, SeerError};
19use crate::rdap::{RdapClient, RdapResponse};
20use crate::whois::{get_registry_url, get_tld, WhoisClient, WhoisResponse};
21
22/// Cache TTL for lookup results (5 minutes).
23const LOOKUP_CACHE_TTL: Duration = Duration::from_secs(5 * 60);
24
25/// Grace period for the second protocol after the first one finishes.
26/// If WHOIS finishes and RDAP hasn't responded within this window, we
27/// use the WHOIS result rather than waiting the full RDAP timeout.
28const PROTOCOL_GRACE_PERIOD: Duration = Duration::from_secs(5);
29
30/// Maximum length for public-facing error strings.
31const MAX_PUBLIC_ERROR_LEN: usize = 256;
32
33/// Global cache for lookup results to avoid redundant network calls.
34static LOOKUP_CACHE: Lazy<TtlCache<String, LookupResult>> =
35    Lazy::new(|| TtlCache::new(LOOKUP_CACHE_TTL));
36
37/// In-flight lookup coalescing map: normalized-domain -> Weak<Notify>.
38/// Only one network race runs per unique domain at a time; concurrent callers
39/// wait on the shared Notify and then read the result from LOOKUP_CACHE.
40static LOOKUP_INFLIGHT: Lazy<Mutex<HashMap<String, Weak<Notify>>>> =
41    Lazy::new(|| Mutex::new(HashMap::new()));
42
43/// Regex patterns for stripping IP literals from public error messages.
44static IPV4_RE: Lazy<Regex> =
45    Lazy::new(|| Regex::new(r"\b(?:\d{1,3}\.){3}\d{1,3}\b").expect("IPV4_RE is a valid regex"));
46
47/// Candidate pattern for IPv6 literals: a hex/colon token containing either
48/// a `::` compression or at least three colons. This catches plausible IPv6
49/// addresses cheaply; each match is then validated by `Ipv6Addr::from_str`
50/// before redaction, so MAC fragments, hex hashes, and similar colon-laden
51/// tokens are left alone.
52static IPV6_CANDIDATE_RE: Lazy<Regex> = Lazy::new(|| {
53    Regex::new(r"\b[0-9a-fA-F:]*(?:::|(?:[0-9a-fA-F]{1,4}:){3,})[0-9a-fA-F:]*\b")
54        .expect("IPV6_CANDIDATE_RE is a valid regex")
55});
56
57/// Redact substrings that parse as valid IPv6 addresses, leaving non-IPv6
58/// tokens (e.g. `af:ba:12`) untouched.
59fn strip_ipv6(msg: &str) -> String {
60    IPV6_CANDIDATE_RE
61        .replace_all(msg, |caps: &regex::Captures| {
62            let candidate = &caps[0];
63            if Ipv6Addr::from_str(candidate).is_ok() {
64                "[ip-redacted]".to_string()
65            } else {
66                candidate.to_string()
67            }
68        })
69        .into_owned()
70}
71
72/// Test-only hook: counts the number of times `lookup_concurrent` is actually
73/// invoked (i.e., the underlying network race runs). Used to verify request
74/// coalescing. Not exposed outside the crate.
75#[cfg(test)]
76static LOOKUP_CONCURRENT_CALLS: Lazy<std::sync::atomic::AtomicUsize> =
77    Lazy::new(|| std::sync::atomic::AtomicUsize::new(0));
78
79/// Sanitizes an error message for inclusion in a public-facing response.
80///
81/// Strips IPv4 and IPv6 literals (to avoid leaking internal addresses when
82/// an SSRF guard rejects a resolved URL) and caps the total length to
83/// [`MAX_PUBLIC_ERROR_LEN`] characters.
84fn sanitize_error_for_public(msg: &str) -> String {
85    let s = IPV4_RE.replace_all(msg, "[ip-redacted]");
86    let s = strip_ipv6(&s);
87    if s.chars().count() > MAX_PUBLIC_ERROR_LEN {
88        let mut trunc: String = s.chars().take(MAX_PUBLIC_ERROR_LEN).collect();
89        trunc.push('…');
90        trunc
91    } else {
92        s
93    }
94}
95
96/// RAII guard for the in-flight-lookup slot. On drop, removes the entry
97/// from `LOOKUP_INFLIGHT` and notifies any waiters so they can read the
98/// freshly-populated cache.
99///
100/// NOTE on failed-owner retry semantics:
101/// When the owning task's lookup fails, `InflightGuard::drop` runs, the
102/// `HashMap` entry is removed, and `notify_waiters()` fires. Waiters wake,
103/// observe an empty cache, and one of them becomes the new owner — triggering
104/// a fresh network race. This means transient failures are automatically
105/// retried by any concurrent waiter. Callers that observe a timeout error
106/// should not assume no work is in flight; another concurrent caller may
107/// already be retrying.
108struct InflightGuard {
109    key: String,
110    notify: Arc<Notify>,
111}
112
113impl Drop for InflightGuard {
114    fn drop(&mut self) {
115        // Recover from mutex poisoning rather than leaking the HashMap entry.
116        // A poisoned mutex here would otherwise strand the key in the map
117        // until process exit, permanently blocking future lookups for this
118        // domain from acquiring ownership.
119        let mut inflight = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
120        inflight.remove(&self.key);
121        self.notify.notify_waiters();
122    }
123}
124
125/// Internal classification of the RDAP leg of a concurrent lookup.
126///
127/// Distinguishing `NoData` (HTTP 200 but response was missing useful fields)
128/// from `Error` lets the orchestrator prefer a thin WHOIS result over the
129/// availability fallback when RDAP silently returned nothing.
130enum RdapOutcome {
131    Useful(RdapResponse),
132    NoData(RdapResponse),
133    Error(SeerError),
134    /// RDAP future did not complete within the grace period after the other
135    /// protocol finished.
136    GraceTimeout,
137}
138
139/// Progress callback for smart lookup operations.
140/// Called with a message describing the current phase of the lookup.
141pub type LookupProgressCallback = Arc<dyn Fn(&str) + Send + Sync>;
142
143#[derive(Debug, Clone, Serialize, Deserialize)]
144#[serde(tag = "source", rename_all = "lowercase")]
145pub enum LookupResult {
146    Rdap {
147        data: Box<RdapResponse>,
148        #[serde(skip_serializing_if = "Option::is_none")]
149        whois_fallback: Option<WhoisResponse>,
150    },
151    Whois {
152        data: WhoisResponse,
153        rdap_error: Option<String>,
154        #[serde(skip_serializing_if = "Option::is_none")]
155        rdap_fallback: Option<Box<RdapResponse>>,
156    },
157    Available {
158        data: Box<AvailabilityResult>,
159        rdap_error: String,
160        whois_error: String,
161    },
162}
163
164impl LookupResult {
165    /// Returns the domain name from the lookup result.
166    pub fn domain_name(&self) -> Option<String> {
167        match self {
168            LookupResult::Rdap { data, .. } => data.domain_name().map(String::from),
169            LookupResult::Whois { data, .. } => Some(data.domain.clone()),
170            LookupResult::Available { data, .. } => Some(data.domain.clone()),
171        }
172    }
173
174    /// Returns the registrar name, preferring RDAP data with WHOIS fallback.
175    pub fn registrar(&self) -> Option<String> {
176        match self {
177            LookupResult::Rdap {
178                data,
179                whois_fallback,
180            } => data
181                .get_registrar()
182                .or_else(|| whois_fallback.as_ref().and_then(|w| w.registrar.clone())),
183            LookupResult::Whois { data, .. } => data.registrar.clone(),
184            LookupResult::Available { .. } => None,
185        }
186    }
187
188    /// Returns the registrant organization, preferring RDAP data with WHOIS fallback.
189    pub fn organization(&self) -> Option<String> {
190        match self {
191            LookupResult::Rdap {
192                data,
193                whois_fallback,
194            } => data
195                .get_registrant_organization()
196                .or_else(|| whois_fallback.as_ref().and_then(|w| w.organization.clone())),
197            LookupResult::Whois { data, .. } => data.organization.clone(),
198            LookupResult::Available { .. } => None,
199        }
200    }
201
202    /// Returns true if the result came from RDAP.
203    pub fn is_rdap(&self) -> bool {
204        matches!(self, LookupResult::Rdap { .. })
205    }
206
207    /// Returns true if the result came from WHOIS.
208    pub fn is_whois(&self) -> bool {
209        matches!(self, LookupResult::Whois { .. })
210    }
211
212    /// Returns true if the result is an availability check fallback.
213    pub fn is_available(&self) -> bool {
214        matches!(self, LookupResult::Available { .. })
215    }
216
217    /// Returns the expiration date and registrar info from the lookup result.
218    pub fn expiration_info(&self) -> (Option<DateTime<Utc>>, Option<String>) {
219        match self {
220            LookupResult::Rdap {
221                data,
222                whois_fallback,
223            } => {
224                // Try to get expiration from RDAP events
225                let expiration_date = data
226                    .events
227                    .iter()
228                    .find(|e| e.event_action == "expiration")
229                    .and_then(|e| e.parsed_date())
230                    .or_else(|| {
231                        // Fallback to WHOIS if available
232                        whois_fallback.as_ref().and_then(|w| w.expiration_date)
233                    });
234
235                let registrar = data
236                    .get_registrar()
237                    .or_else(|| whois_fallback.as_ref().and_then(|w| w.registrar.clone()));
238
239                (expiration_date, registrar)
240            }
241            LookupResult::Whois { data, .. } => (data.expiration_date, data.registrar.clone()),
242            LookupResult::Available { .. } => (None, None),
243        }
244    }
245}
246
247/// Before caching, trim raw WHOIS response to limit cache memory.
248/// A full WHOIS raw_response can be up to 1 MB; we cap it at 32 KB which is
249/// plenty for the parsed fields while preventing the cache from ballooning.
250fn trim_for_cache(mut result: LookupResult) -> LookupResult {
251    const MAX_RAW: usize = 32 * 1024;
252
253    match result {
254        LookupResult::Whois { ref mut data, .. } => {
255            if data.raw_response.len() > MAX_RAW {
256                data.raw_response.truncate(MAX_RAW);
257                data.raw_response.push_str("\n... [truncated for cache]");
258            }
259        }
260        LookupResult::Rdap {
261            ref mut whois_fallback,
262            ..
263        } => {
264            if let Some(ref mut w) = whois_fallback {
265                if w.raw_response.len() > MAX_RAW {
266                    w.raw_response.truncate(MAX_RAW);
267                    w.raw_response.push_str("\n... [truncated for cache]");
268                }
269            }
270        }
271        LookupResult::Available { .. } => {}
272    }
273
274    result
275}
276
277#[derive(Debug, Clone)]
278pub struct SmartLookup {
279    rdap_client: RdapClient,
280    whois_client: WhoisClient,
281    availability_checker: AvailabilityChecker,
282    /// Deprecated: both protocols are now always attempted concurrently.
283    prefer_rdap: bool,
284    /// Deprecated: WHOIS data is now always attached when available.
285    include_fallback: bool,
286}
287
288impl Default for SmartLookup {
289    fn default() -> Self {
290        Self::new()
291    }
292}
293
294impl SmartLookup {
295    /// Creates a new SmartLookup that runs RDAP and WHOIS concurrently,
296    /// falling back to an availability check if both fail.
297    pub fn new() -> Self {
298        Self {
299            rdap_client: RdapClient::new(),
300            whois_client: WhoisClient::new(),
301            availability_checker: AvailabilityChecker::new(),
302            prefer_rdap: true,
303            include_fallback: false,
304        }
305    }
306
307    /// Deprecated: both protocols are now always attempted concurrently.
308    /// This method is kept for API compatibility but has no effect.
309    #[deprecated(note = "This field has no effect. RDAP is always tried concurrently with WHOIS.")]
310    pub fn prefer_rdap(mut self, prefer: bool) -> Self {
311        self.prefer_rdap = prefer;
312        self
313    }
314
315    /// Deprecated: WHOIS data is now always attached when available.
316    /// This method is kept for API compatibility but has no effect.
317    #[deprecated(note = "This field has no effect. RDAP is always tried concurrently with WHOIS.")]
318    pub fn include_fallback(mut self, include: bool) -> Self {
319        self.include_fallback = include;
320        self
321    }
322
323    /// Performs a smart lookup for a domain, trying both RDAP and WHOIS concurrently.
324    /// Falls back to an availability check if both fail.
325    /// Results are cached for 5 minutes to avoid redundant network calls.
326    #[instrument(skip(self), fields(domain = %domain))]
327    pub async fn lookup(&self, domain: &str) -> Result<LookupResult> {
328        self.lookup_with_progress(domain, None).await
329    }
330
331    /// Performs a lookup with an optional progress callback.
332    /// The callback is called with messages describing the current phase.
333    /// Results are cached for 5 minutes. Concurrent lookups for the same
334    /// domain are coalesced — only one network race runs per domain at a time.
335    #[instrument(skip(self, progress), fields(domain = %domain))]
336    pub async fn lookup_with_progress(
337        &self,
338        domain: &str,
339        progress: Option<LookupProgressCallback>,
340    ) -> Result<LookupResult> {
341        let normalized = crate::validation::normalize_domain(domain)?;
342
343        // Check cache first
344        if let Some(cached) = LOOKUP_CACHE.get(&normalized) {
345            debug!(domain = %normalized, "Returning cached lookup result");
346            return Ok(cached);
347        }
348
349        // Coalesce in-flight lookups: if another task is already running a
350        // race for this domain, wait on its Notify rather than starting a
351        // second race. Two branches:
352        //   - Waiter: another task owns the slot; await its notify, then
353        //     read the cache. If the cache is still empty (owner failed),
354        //     loop and re-contend for ownership.
355        //   - Owner: no entry exists; insert a Weak handle, hold the Arc
356        //     for the duration of the work, then remove and notify on drop.
357        //
358        // A `loop` with a separate lock-scope per iteration keeps the
359        // `MutexGuard` from being held across any `.await`.
360        let _guard = loop {
361            enum Slot {
362                Waiter(Arc<Notify>),
363                Owner(InflightGuard),
364            }
365
366            let slot = {
367                let mut inflight = LOOKUP_INFLIGHT
368                    .lock()
369                    .expect("LOOKUP_INFLIGHT mutex poisoned");
370                match inflight.get(&normalized).and_then(|w| w.upgrade()) {
371                    Some(existing) => Slot::Waiter(existing),
372                    None => {
373                        let n = Arc::new(Notify::new());
374                        inflight.insert(normalized.clone(), Arc::downgrade(&n));
375                        Slot::Owner(InflightGuard {
376                            key: normalized.clone(),
377                            notify: n,
378                        })
379                    }
380                }
381            };
382
383            match slot {
384                Slot::Waiter(n) => {
385                    debug!(domain = %normalized, "Waiting for in-flight lookup to complete");
386                    n.notified().await;
387                    if let Some(cached) = LOOKUP_CACHE.get(&normalized) {
388                        return Ok(cached);
389                    }
390                    // Owner finished without populating the cache (failed
391                    // or errored). Re-contend for ownership.
392                    continue;
393                }
394                Slot::Owner(guard) => break guard,
395            }
396        };
397
398        let result = self.lookup_concurrent(&normalized, progress).await?;
399
400        // Cache a trimmed copy to limit memory usage before releasing
401        // waiters (via guard drop) so they observe the cached value.
402        LOOKUP_CACHE.insert(normalized.clone(), trim_for_cache(result.clone()));
403
404        Ok(result)
405    }
406
407    /// Clears the lookup result cache.
408    pub fn clear_cache() {
409        LOOKUP_CACHE.clear();
410    }
411
412    #[instrument(skip(self, progress), fields(domain = %domain))]
413    async fn lookup_concurrent(
414        &self,
415        domain: &str,
416        progress: Option<LookupProgressCallback>,
417    ) -> Result<LookupResult> {
418        #[cfg(test)]
419        LOOKUP_CONCURRENT_CALLS.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
420
421        debug!(domain = %domain, "Attempting RDAP and WHOIS concurrently");
422
423        if let Some(ref cb) = progress {
424            cb("Querying RDAP and WHOIS concurrently");
425        }
426
427        let rdap_fut = self.rdap_client.lookup_domain(domain);
428        let whois_fut = self.whois_client.lookup(domain);
429
430        tokio::pin!(rdap_fut);
431        tokio::pin!(whois_fut);
432
433        // Race: whichever finishes first gets a grace period for the other.
434        //
435        // We track whether each side completed naturally or was truncated by
436        // the grace period, so downstream error messages can distinguish a
437        // true timeout from a loser-truncation.
438        enum LegOutcome<T> {
439            Completed(T),
440            GraceTruncated,
441        }
442
443        let (rdap_leg, whois_leg) = tokio::select! {
444            rdap_res = &mut rdap_fut => {
445                // RDAP finished first — give WHOIS a grace period
446                let whois_leg = match tokio_timeout(PROTOCOL_GRACE_PERIOD, whois_fut).await {
447                    Ok(res) => LegOutcome::Completed(res),
448                    Err(_) => {
449                        debug!("WHOIS did not finish within grace period, proceeding with RDAP only");
450                        LegOutcome::GraceTruncated
451                    }
452                };
453                (LegOutcome::Completed(rdap_res), whois_leg)
454            }
455            whois_res = &mut whois_fut => {
456                // WHOIS finished first — give RDAP a grace period
457                let rdap_leg = match tokio_timeout(PROTOCOL_GRACE_PERIOD, rdap_fut).await {
458                    Ok(res) => LegOutcome::Completed(res),
459                    Err(_) => {
460                        debug!("RDAP did not finish within grace period, proceeding with WHOIS only");
461                        LegOutcome::GraceTruncated
462                    }
463                };
464                (rdap_leg, LegOutcome::Completed(whois_res))
465            }
466        };
467
468        // Classify the RDAP leg.
469        let rdap_outcome = match rdap_leg {
470            LegOutcome::Completed(Ok(data)) => {
471                if self.is_rdap_response_useful(&data) {
472                    RdapOutcome::Useful(data)
473                } else {
474                    RdapOutcome::NoData(data)
475                }
476            }
477            LegOutcome::Completed(Err(e)) => RdapOutcome::Error(e),
478            LegOutcome::GraceTruncated => RdapOutcome::GraceTimeout,
479        };
480
481        // Phase 1: If RDAP returned useful data, use it as primary.
482        if let RdapOutcome::Useful(rdap_data) = rdap_outcome {
483            debug!("RDAP lookup successful");
484            let whois_fallback = match whois_leg {
485                LegOutcome::Completed(Ok(w)) => Some(w),
486                _ => None,
487            };
488            return Ok(LookupResult::Rdap {
489                data: Box::new(rdap_data),
490                whois_fallback,
491            });
492        }
493
494        // RDAP was not useful (NoData, Error, or GraceTimeout). Prefer WHOIS
495        // if it returned any response, even a thin one — this is safer than
496        // falling back to the availability heuristic when we have actual
497        // registry data in hand.
498        let (rdap_error_str, rdap_fallback_data) = match rdap_outcome {
499            RdapOutcome::Useful(_) => {
500                // Unreachable in this branch (we returned above), but handle
501                // defensively rather than panicking across the FFI boundary.
502                debug!("Unexpected RdapOutcome::Useful in fallback branch");
503                (String::from("RDAP ok"), None)
504            }
505            RdapOutcome::NoData(data) => {
506                ("RDAP response incomplete".to_string(), Some(Box::new(data)))
507            }
508            RdapOutcome::Error(e) => (e.to_string(), None),
509            RdapOutcome::GraceTimeout => (
510                format!(
511                    "RDAP did not return within {}s grace period after WHOIS won",
512                    PROTOCOL_GRACE_PERIOD.as_secs()
513                ),
514                None,
515            ),
516        };
517
518        if let LegOutcome::Completed(Ok(whois_data)) = whois_leg {
519            debug!("Using WHOIS result (RDAP not useful)");
520            if let Some(ref cb) = progress {
521                cb("RDAP not available (using WHOIS)");
522            }
523            return Ok(LookupResult::Whois {
524                data: whois_data,
525                rdap_error: Some(rdap_error_str),
526                rdap_fallback: rdap_fallback_data,
527            });
528        }
529
530        // Both sides failed to provide useful data. Craft a precise WHOIS
531        // error string that distinguishes true errors from grace-period
532        // truncation.
533        let whois_error_str = match whois_leg {
534            LegOutcome::Completed(Err(e)) => e.to_string(),
535            LegOutcome::Completed(Ok(_)) => {
536                // Already handled above; treat defensively.
537                debug!("Unexpected completed-Ok WHOIS in availability fallback branch");
538                "WHOIS returned but was not used".to_string()
539            }
540            LegOutcome::GraceTruncated => format!(
541                "WHOIS did not return within {}s grace period after RDAP won",
542                PROTOCOL_GRACE_PERIOD.as_secs()
543            ),
544        };
545
546        self.availability_fallback(domain, rdap_error_str, whois_error_str, progress)
547            .await
548    }
549
550    async fn availability_fallback(
551        &self,
552        domain: &str,
553        rdap_error: String,
554        whois_error: String,
555        progress: Option<LookupProgressCallback>,
556    ) -> Result<LookupResult> {
557        if let Some(ref cb) = progress {
558            cb("RDAP and WHOIS unavailable (checking availability)");
559        }
560        warn!(
561            domain = %domain,
562            rdap_error = %rdap_error,
563            whois_error = %whois_error,
564            "Both RDAP and WHOIS failed, falling back to availability check"
565        );
566
567        match self.availability_checker.check(domain).await {
568            Ok(avail) => Ok(LookupResult::Available {
569                data: Box::new(avail),
570                rdap_error: sanitize_error_for_public(&rdap_error),
571                whois_error: sanitize_error_for_public(&whois_error),
572            }),
573            Err(avail_err) => {
574                let tld = get_tld(domain).unwrap_or("unknown");
575                let registry_url = get_registry_url(tld).unwrap_or_else(|| {
576                    format!("https://www.iana.org/domains/root/db/{}.html", tld)
577                });
578                Err(SeerError::LookupFailed {
579                    domain: domain.to_string(),
580                    details: format!(
581                        "RDAP failed ({}), WHOIS failed ({}), availability check failed ({})",
582                        rdap_error, whois_error, avail_err
583                    ),
584                    registry_url,
585                })
586            }
587        }
588    }
589
590    fn is_rdap_response_useful(&self, response: &RdapResponse) -> bool {
591        // Check if we have at least some meaningful data
592        let has_name = response.ldh_name.is_some() || response.unicode_name.is_some();
593        let has_dates = response
594            .events
595            .iter()
596            .any(|e| e.event_action == "registration" || e.event_action == "expiration");
597        let has_entities = !response.entities.is_empty();
598        let has_nameservers = !response.nameservers.is_empty();
599        let has_status = !response.status.is_empty();
600
601        // Consider useful if we have the name plus at least one other piece of info
602        has_name && (has_dates || has_entities || has_nameservers || has_status)
603    }
604}
605
606#[cfg(test)]
607mod tests {
608    use super::*;
609
610    #[test]
611    fn test_lookup_result_domain_name_whois() {
612        let result = LookupResult::Whois {
613            data: WhoisResponse {
614                domain: "example.com".to_string(),
615                registrar: Some("Test Registrar".to_string()),
616                registrant: None,
617                organization: None,
618                registrant_email: None,
619                registrant_phone: None,
620                registrant_address: None,
621                registrant_country: None,
622                admin_name: None,
623                admin_organization: None,
624                admin_email: None,
625                admin_phone: None,
626                tech_name: None,
627                tech_organization: None,
628                tech_email: None,
629                tech_phone: None,
630                creation_date: None,
631                expiration_date: None,
632                updated_date: None,
633                status: vec![],
634                nameservers: vec![],
635                dnssec: None,
636                whois_server: "whois.example.com".to_string(),
637                raw_response: String::new(),
638            },
639            rdap_error: None,
640            rdap_fallback: None,
641        };
642
643        assert_eq!(result.domain_name(), Some("example.com".to_string()));
644        assert_eq!(result.registrar(), Some("Test Registrar".to_string()));
645        assert!(result.is_whois());
646        assert!(!result.is_rdap());
647        assert!(!result.is_available());
648    }
649
650    #[test]
651    fn test_lookup_result_serialization() {
652        let result = LookupResult::Whois {
653            data: WhoisResponse {
654                domain: "test.com".to_string(),
655                registrar: None,
656                registrant: None,
657                organization: None,
658                registrant_email: None,
659                registrant_phone: None,
660                registrant_address: None,
661                registrant_country: None,
662                admin_name: None,
663                admin_organization: None,
664                admin_email: None,
665                admin_phone: None,
666                tech_name: None,
667                tech_organization: None,
668                tech_email: None,
669                tech_phone: None,
670                creation_date: None,
671                expiration_date: None,
672                updated_date: None,
673                status: vec![],
674                nameservers: vec![],
675                dnssec: None,
676                whois_server: String::new(),
677                raw_response: String::new(),
678            },
679            rdap_error: Some("RDAP failed".to_string()),
680            rdap_fallback: None,
681        };
682
683        let json = serde_json::to_string(&result).unwrap();
684        assert!(json.contains("\"source\":\"whois\""));
685        assert!(json.contains("RDAP failed"));
686    }
687
688    #[test]
689    fn test_lookup_result_available_serialization() {
690        let result = LookupResult::Available {
691            data: Box::new(AvailabilityResult {
692                domain: "test123.xyz".to_string(),
693                available: true,
694                confidence: "medium".to_string(),
695                method: "whois_error".to_string(),
696                details: Some("WHOIS server indicates no matching records".to_string()),
697            }),
698            rdap_error: "RDAP failed".to_string(),
699            whois_error: "WHOIS failed".to_string(),
700        };
701
702        let json = serde_json::to_string(&result).unwrap();
703        assert!(json.contains("\"source\":\"available\""));
704        assert!(json.contains("\"available\":true"));
705        assert!(json.contains("test123.xyz"));
706
707        assert_eq!(result.domain_name(), Some("test123.xyz".to_string()));
708        assert!(result.is_available());
709        assert!(!result.is_rdap());
710        assert!(!result.is_whois());
711        assert!(result.registrar().is_none());
712        assert_eq!(result.expiration_info(), (None, None));
713    }
714
715    #[test]
716    #[allow(deprecated)]
717    fn test_smart_lookup_builder() {
718        let lookup = SmartLookup::new().prefer_rdap(false).include_fallback(true);
719        assert!(!lookup.prefer_rdap);
720        assert!(lookup.include_fallback);
721    }
722
723    #[test]
724    fn test_lookup_cache_clear() {
725        SmartLookup::clear_cache();
726        assert!(LOOKUP_CACHE.is_empty());
727    }
728
729    // ---------------- sanitize_error_for_public ----------------
730
731    #[test]
732    fn test_sanitize_strips_ipv4() {
733        let msg = "RDAP URL resolves to reserved IP 10.0.0.1 which is forbidden";
734        let sanitized = sanitize_error_for_public(msg);
735        assert!(
736            !sanitized.contains("10.0.0.1"),
737            "IPv4 should be stripped, got: {}",
738            sanitized
739        );
740        assert!(sanitized.contains("[ip-redacted]"));
741    }
742
743    #[test]
744    fn test_sanitize_strips_multiple_ipv4() {
745        let msg = "Could not connect to 192.168.1.1 after trying 127.0.0.1";
746        let sanitized = sanitize_error_for_public(msg);
747        assert!(!sanitized.contains("192.168.1.1"));
748        assert!(!sanitized.contains("127.0.0.1"));
749        // Two redactions expected.
750        assert_eq!(sanitized.matches("[ip-redacted]").count(), 2);
751    }
752
753    #[test]
754    fn test_sanitize_strips_ipv6() {
755        let msg = "RDAP URL resolves to reserved IP fe80::1 which is forbidden";
756        let sanitized = sanitize_error_for_public(msg);
757        assert!(!sanitized.contains("fe80::1"));
758        assert!(sanitized.contains("[ip-redacted]"));
759    }
760
761    #[test]
762    fn sanitize_leaves_mac_address_like_tokens_alone() {
763        let msg = "error code af:ba:12 at line 5";
764        let out = sanitize_error_for_public(msg);
765        assert!(
766            out.contains("af:ba:12"),
767            "MAC fragment should not be stripped: {}",
768            out
769        );
770    }
771
772    #[test]
773    fn sanitize_strips_real_ipv6() {
774        let msg = "cannot reach 2001:db8::1 — timeout";
775        let out = sanitize_error_for_public(msg);
776        assert!(!out.contains("2001:db8::1"));
777        assert!(out.contains("[ip-redacted]"));
778    }
779
780    #[test]
781    fn sanitize_strips_fe80_link_local() {
782        let msg = "peer at fe80::1 unreachable";
783        let out = sanitize_error_for_public(msg);
784        assert!(out.contains("[ip-redacted]"));
785    }
786
787    #[test]
788    fn test_sanitize_truncates_long_message() {
789        // Build a 500-char message with no IPs.
790        let long = "a".repeat(500);
791        let sanitized = sanitize_error_for_public(&long);
792        // Should cap at MAX_PUBLIC_ERROR_LEN chars + ellipsis.
793        let char_count = sanitized.chars().count();
794        assert_eq!(char_count, MAX_PUBLIC_ERROR_LEN + 1);
795        assert!(sanitized.ends_with('…'));
796    }
797
798    #[test]
799    fn test_sanitize_preserves_short_messages() {
800        let msg = "RDAP timed out after 15s";
801        let sanitized = sanitize_error_for_public(msg);
802        assert_eq!(sanitized, msg);
803    }
804
805    // ---------------- RdapOutcome classification ----------------
806
807    #[test]
808    fn test_is_rdap_response_useful_detects_no_data() {
809        use crate::rdap::RdapResponse;
810        // Construct a response with a name but no events, entities, NS, or status
811        // — this is the "200 OK but no useful fields" case that should be
812        // classified as RdapOutcome::NoData (not Useful, not Error).
813        let resp = RdapResponse {
814            ldh_name: Some("example.com".to_string()),
815            ..Default::default()
816        };
817        let lookup = SmartLookup::new();
818        assert!(
819            !lookup.is_rdap_response_useful(&resp),
820            "Response with only a name should be classified as NoData"
821        );
822
823        // And one with a name + status IS useful (sanity check).
824        let useful = RdapResponse {
825            ldh_name: Some("example.com".to_string()),
826            status: vec!["active".to_string()],
827            ..Default::default()
828        };
829        assert!(lookup.is_rdap_response_useful(&useful));
830    }
831
832    // ---------------- Coalescing ----------------
833
834    // Verifies that when multiple concurrent lookups hit the in-flight map
835    // for the same domain, later arrivals observe the existing Weak<Notify>
836    // and become waiters rather than racing a second lookup. We test the
837    // map-level primitive here because the full SmartLookup pipeline
838    // requires network access to exercise.
839    #[tokio::test]
840    async fn test_inflight_coalescing_map() {
841        // Clear any prior state.
842        {
843            let mut m = LOOKUP_INFLIGHT.lock().unwrap();
844            m.clear();
845        }
846
847        let domain = "__test_coalesce.example.".to_string();
848
849        // First caller: no entry → becomes owner.
850        let owner_notify = Arc::new(Notify::new());
851        {
852            let mut m = LOOKUP_INFLIGHT.lock().unwrap();
853            assert!(m.get(&domain).and_then(|w| w.upgrade()).is_none());
854            m.insert(domain.clone(), Arc::downgrade(&owner_notify));
855        }
856
857        // Second caller: sees the existing Weak and upgrades.
858        let waiter = {
859            let m = LOOKUP_INFLIGHT.lock().unwrap();
860            m.get(&domain)
861                .and_then(|w| w.upgrade())
862                .expect("Second caller must observe in-flight entry")
863        };
864
865        // Waiter listens in the background.
866        let waiter_clone = waiter.clone();
867        let handle = tokio::spawn(async move {
868            waiter_clone.notified().await;
869        });
870
871        // Simulate owner completing.
872        tokio::time::sleep(Duration::from_millis(20)).await;
873        {
874            let mut m = LOOKUP_INFLIGHT.lock().unwrap();
875            m.remove(&domain);
876        }
877        owner_notify.notify_waiters();
878
879        // Waiter should unblock quickly.
880        tokio::time::timeout(Duration::from_secs(1), handle)
881            .await
882            .expect("waiter must unblock after notify")
883            .expect("waiter task joined cleanly");
884
885        // After owner removes entry and drops its Arc, the Weak is dead.
886        drop(owner_notify);
887        drop(waiter);
888        let m = LOOKUP_INFLIGHT.lock().unwrap();
889        assert!(m.get(&domain).and_then(|w| w.upgrade()).is_none());
890    }
891
892    // Demonstrates that the `sanitize_error_for_public` helper is applied
893    // to the rdap_error / whois_error fields written into the `Available`
894    // variant. We check the call site indirectly: construct a Available
895    // manually and then verify a raw error with an IP becomes redacted.
896    // (Integration via real clients would require network.)
897    #[test]
898    fn test_sanitize_applied_to_available_fields() {
899        let rdap_raw = "RDAP URL resolves to reserved IP 10.0.0.1";
900        let whois_raw = "connection refused at 192.168.0.5";
901        let sanitized_rdap = sanitize_error_for_public(rdap_raw);
902        let sanitized_whois = sanitize_error_for_public(whois_raw);
903        let result = LookupResult::Available {
904            data: Box::new(AvailabilityResult {
905                domain: "unreg.test".to_string(),
906                available: true,
907                confidence: "low".to_string(),
908                method: "heuristic".to_string(),
909                details: None,
910            }),
911            rdap_error: sanitized_rdap,
912            whois_error: sanitized_whois,
913        };
914        if let LookupResult::Available {
915            rdap_error,
916            whois_error,
917            ..
918        } = result
919        {
920            assert!(!rdap_error.contains("10.0.0.1"));
921            assert!(!whois_error.contains("192.168.0.5"));
922            assert!(rdap_error.contains("[ip-redacted]"));
923            assert!(whois_error.contains("[ip-redacted]"));
924        } else {
925            panic!("expected Available variant");
926        }
927    }
928}