Skip to main content

seer_core/
lookup.rs

1use std::collections::HashMap;
2use std::net::Ipv6Addr;
3use std::str::FromStr;
4use std::sync::{Arc, Mutex, Weak};
5use std::time::Duration;
6
7use chrono::{DateTime, Utc};
8use once_cell::sync::Lazy;
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use tokio::sync::Notify;
12use tracing::{debug, instrument, warn};
13
14use tokio::time::timeout as tokio_timeout;
15
16use crate::availability::{AvailabilityChecker, AvailabilityResult};
17use crate::cache::TtlCache;
18use crate::dns::{DnsPresence, DnsResolver};
19use crate::error::{Result, SeerError};
20use crate::rdap::{rdap_error_is_404, RdapClient, RdapResponse};
21use crate::whois::{get_registry_url, get_tld, WhoisClient, WhoisResponse};
22
23/// Cache TTL for lookup results (5 minutes).
24const LOOKUP_CACHE_TTL: Duration = Duration::from_secs(5 * 60);
25
26/// Grace period for the second protocol after the first one finishes.
27/// If WHOIS finishes and RDAP hasn't responded within this window, we
28/// use the WHOIS result rather than waiting the full RDAP timeout.
29const PROTOCOL_GRACE_PERIOD: Duration = Duration::from_secs(5);
30
31/// Maximum length for public-facing error strings.
32const MAX_PUBLIC_ERROR_LEN: usize = 256;
33
34/// Global cache for lookup results to avoid redundant network calls.
35static LOOKUP_CACHE: Lazy<TtlCache<String, LookupResult>> =
36    Lazy::new(|| TtlCache::new(LOOKUP_CACHE_TTL));
37
38/// In-flight lookup coalescing map: normalized-domain -> Weak<Notify>.
39/// Only one network race runs per unique domain at a time; concurrent callers
40/// wait on the shared Notify and then read the result from LOOKUP_CACHE.
41static LOOKUP_INFLIGHT: Lazy<Mutex<HashMap<String, Weak<Notify>>>> =
42    Lazy::new(|| Mutex::new(HashMap::new()));
43
44/// Regex patterns for stripping IP literals from public error messages.
45static IPV4_RE: Lazy<Regex> =
46    Lazy::new(|| Regex::new(r"\b(?:\d{1,3}\.){3}\d{1,3}\b").expect("IPV4_RE is a valid regex"));
47
48/// Candidate pattern for IPv6 literals: a hex/colon token containing either
49/// a `::` compression or at least three colons. This catches plausible IPv6
50/// addresses cheaply; each match is then validated by `Ipv6Addr::from_str`
51/// before redaction, so MAC fragments, hex hashes, and similar colon-laden
52/// tokens are left alone.
53static IPV6_CANDIDATE_RE: Lazy<Regex> = Lazy::new(|| {
54    Regex::new(r"\b[0-9a-fA-F:]*(?:::|(?:[0-9a-fA-F]{1,4}:){3,})[0-9a-fA-F:]*\b")
55        .expect("IPV6_CANDIDATE_RE is a valid regex")
56});
57
58/// Redact substrings that parse as valid IPv6 addresses, leaving non-IPv6
59/// tokens (e.g. `af:ba:12`) untouched.
60fn strip_ipv6(msg: &str) -> String {
61    IPV6_CANDIDATE_RE
62        .replace_all(msg, |caps: &regex::Captures| {
63            let candidate = &caps[0];
64            if Ipv6Addr::from_str(candidate).is_ok() {
65                "[ip-redacted]".to_string()
66            } else {
67                candidate.to_string()
68            }
69        })
70        .into_owned()
71}
72
73/// Test-only hook: counts the number of times `lookup_concurrent` is actually
74/// invoked (i.e., the underlying network race runs). Used to verify request
75/// coalescing. Not exposed outside the crate.
76#[cfg(test)]
77static LOOKUP_CONCURRENT_CALLS: Lazy<std::sync::atomic::AtomicUsize> =
78    Lazy::new(|| std::sync::atomic::AtomicUsize::new(0));
79
80/// Returns true if the parsed WHOIS response lacks all key registration
81/// signals: no registrar, no creation date, and no expiration date.
82///
83/// This is a necessary-but-not-sufficient signal for domain availability;
84/// `lookup_concurrent` combines it with an RDAP 404 before routing to the
85/// availability path. Nameservers alone don't disqualify thinness — some
86/// registries return placeholder nameservers for unregistered domains.
87fn whois_response_is_thin(w: &WhoisResponse) -> bool {
88    w.registrar.is_none() && w.creation_date.is_none() && w.expiration_date.is_none()
89}
90
91/// Decides whether a WHOIS response + RDAP error combination should route
92/// to the availability path. Returns `(confidence, method)` when routing is
93/// warranted, `None` to keep the existing `LookupResult::Whois` behavior.
94///
95/// Case A: WHOIS explicitly indicates no registration (highest priority).
96/// Case B: WHOIS returned but lacks registration data AND RDAP returned 404.
97fn classify_whois_leg(
98    w: &WhoisResponse,
99    rdap_err: &SeerError,
100) -> Option<(&'static str, &'static str)> {
101    if w.is_available() {
102        return Some(("high", "whois"));
103    }
104    if whois_response_is_thin(w) && rdap_error_is_404(rdap_err) {
105        return Some(("medium", "whois_thin_response"));
106    }
107    None
108}
109
110/// Wraps `classify_whois_leg` with the "RDAP returned 200" veto: a successful
111/// RDAP response (HTTP 200, even if the body is thin) is positive evidence
112/// that the domain object exists, so we never let a WHOIS-only signal flip
113/// the verdict to "available" in that case. This guards against WHOIS
114/// propagation lag against freshly-provisioned domains the registry has
115/// already begun serving via RDAP. v0.26.6 regression fix.
116fn should_route_to_availability(
117    rdap_returned_200: bool,
118    rdap_seer_error: Option<&SeerError>,
119    whois_data: &WhoisResponse,
120) -> Option<(&'static str, &'static str)> {
121    if rdap_returned_200 {
122        return None;
123    }
124    // `is_available()` streams the raw response (~1 MB worst case) line by
125    // line. Compute it once and reuse — `classify_whois_leg` also calls it,
126    // so the original code paid the scan twice on every non-404 RDAP-error
127    // path. We pre-check Case A here; if it doesn't fire we drop into the
128    // 404+thin Case B branch via `classify_whois_leg`.
129    if whois_data.is_available() {
130        return Some(("high", "whois"));
131    }
132    rdap_seer_error.and_then(|e| {
133        // Case B only: WHOIS is not available, so the only remaining path
134        // is "thin WHOIS + RDAP 404". `classify_whois_leg` will re-check
135        // `is_available()` for free (it's false now), so this is a single
136        // additional thin-check call.
137        classify_whois_leg(whois_data, e)
138    })
139}
140
141/// Decides whether a thin WHOIS leg should be reclassified as "available" on
142/// the strength of a DNS NXDOMAIN. Pure so the veto rules are unit-tested
143/// without a resolver.
144///
145/// Routes to availability only when ALL hold:
146/// * the WHOIS body was thin — no registrar/dates (`is_thin`),
147/// * RDAP did NOT return an HTTP 200 (`rdap_returned_200` is false) — a 200,
148///   even with a thin body, proves the domain object exists, and
149/// * the apex has no DNS presence ([`DnsPresence::Absent`] / NXDOMAIN).
150fn nxdomain_confirms_available(is_thin: bool, rdap_returned_200: bool, dns: DnsPresence) -> bool {
151    is_thin && !rdap_returned_200 && matches!(dns, DnsPresence::Absent)
152}
153
154/// Symmetric counterpart to [`nxdomain_confirms_available`]: decides whether a
155/// thin / no-service WHOIS leg plus an RDAP failure should be reported as
156/// *registered* on the strength of a positive DNS delegation.
157///
158/// Routes to "registered" only when ALL hold:
159/// * the WHOIS body was thin — no registrar/dates (`is_thin`),
160/// * RDAP did NOT return an HTTP 200 (`rdap_returned_200` is false), and
161/// * the apex IS delegated in DNS ([`DnsPresence::Present`] — has NS records).
162///
163/// This prevents emitting an empty [`LookupResult::Whois`] for a domain that is
164/// provably registered when the registry offers no usable WHOIS (e.g. Identity
165/// Digital RDAP-only TLDs like `.email`) and RDAP was throttled or
166/// grace-truncated. `DnsPresence::Unknown` deliberately does not qualify — a
167/// failed DNS probe is not positive evidence of registration.
168fn dns_present_confirms_registered(
169    is_thin: bool,
170    rdap_returned_200: bool,
171    dns: DnsPresence,
172) -> bool {
173    is_thin && !rdap_returned_200 && matches!(dns, DnsPresence::Present)
174}
175
176/// Sanitizes an error message for inclusion in a public-facing response.
177///
178/// Strips IPv4 and IPv6 literals (to avoid leaking internal addresses when
179/// an SSRF guard rejects a resolved URL) and caps the total length to
180/// [`MAX_PUBLIC_ERROR_LEN`] characters.
181fn sanitize_error_for_public(msg: &str) -> String {
182    let s = IPV4_RE.replace_all(msg, "[ip-redacted]");
183    let s = strip_ipv6(&s);
184    if s.chars().count() > MAX_PUBLIC_ERROR_LEN {
185        let mut trunc: String = s.chars().take(MAX_PUBLIC_ERROR_LEN).collect();
186        trunc.push('…');
187        trunc
188    } else {
189        s
190    }
191}
192
193/// RAII guard for the in-flight-lookup slot. On drop, removes the entry
194/// from `LOOKUP_INFLIGHT` and notifies any waiters so they can read the
195/// freshly-populated cache.
196///
197/// NOTE on failed-owner retry semantics:
198/// When the owning task's lookup fails, `InflightGuard::drop` runs, the
199/// `HashMap` entry is removed, and `notify_waiters()` fires. Waiters wake,
200/// observe an empty cache, and one of them becomes the new owner — triggering
201/// a fresh network race. This means transient failures are automatically
202/// retried by any concurrent waiter. Callers that observe a timeout error
203/// should not assume no work is in flight; another concurrent caller may
204/// already be retrying.
205struct InflightGuard {
206    key: String,
207    notify: Arc<Notify>,
208}
209
210impl Drop for InflightGuard {
211    fn drop(&mut self) {
212        // Always remove the entry before notifying. The earlier `try_lock`
213        // design skipped removal under contention, but that left a stale
214        // `Weak<Notify>` in the map: a caller arriving in the brief window
215        // between `notify_waiters()` firing and the owner's `Arc<Notify>`
216        // dropping could upgrade the Weak, register as a waiter on the
217        // already-fired Notify, and block forever (notify_waiters only
218        // wakes currently-registered waiters; it does not accumulate
219        // permits for later registrations).
220        //
221        // Contention windows on this `std::sync::Mutex<HashMap>` are
222        // microseconds — the brief block here is safer than the stale-entry
223        // hazard. Poisoned-mutex recovery is preserved.
224        let mut inflight = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
225        inflight.remove(&self.key);
226        drop(inflight);
227        self.notify.notify_waiters();
228    }
229}
230
231/// Internal classification of the RDAP leg of a concurrent lookup.
232///
233/// Distinguishing `NoData` (HTTP 200 but response was missing useful fields)
234/// from `Error` lets the orchestrator prefer a thin WHOIS result over the
235/// availability fallback when RDAP silently returned nothing.
236enum RdapOutcome {
237    Useful(RdapResponse),
238    NoData(RdapResponse),
239    Error(SeerError),
240    /// RDAP future did not complete within the grace period after the other
241    /// protocol finished.
242    GraceTimeout,
243}
244
245/// Progress callback for smart lookup operations.
246/// Called with a message describing the current phase of the lookup.
247pub type LookupProgressCallback = Arc<dyn Fn(&str) + Send + Sync>;
248
249#[derive(Debug, Clone, Serialize, Deserialize)]
250#[serde(tag = "source", rename_all = "lowercase")]
251pub enum LookupResult {
252    Rdap {
253        data: Box<RdapResponse>,
254        #[serde(skip_serializing_if = "Option::is_none")]
255        whois_fallback: Option<WhoisResponse>,
256    },
257    Whois {
258        data: WhoisResponse,
259        rdap_error: Option<String>,
260        #[serde(skip_serializing_if = "Option::is_none")]
261        rdap_fallback: Option<Box<RdapResponse>>,
262    },
263    Available {
264        data: Box<AvailabilityResult>,
265        rdap_error: String,
266        whois_error: String,
267        /// Raw WHOIS response, when one was available at routing time
268        /// (Cases A and B in the design spec). `None` preserves the
269        /// pre-existing "both protocols errored" semantics.
270        #[serde(default, skip_serializing_if = "Option::is_none")]
271        whois_data: Option<WhoisResponse>,
272    },
273}
274
275impl LookupResult {
276    /// Returns the domain name from the lookup result.
277    pub fn domain_name(&self) -> Option<String> {
278        match self {
279            LookupResult::Rdap { data, .. } => data.domain_name().map(String::from),
280            LookupResult::Whois { data, .. } => Some(data.domain.clone()),
281            LookupResult::Available { data, .. } => Some(data.domain.clone()),
282        }
283    }
284
285    /// Returns the registrar name, preferring RDAP data with WHOIS fallback.
286    pub fn registrar(&self) -> Option<String> {
287        match self {
288            LookupResult::Rdap {
289                data,
290                whois_fallback,
291            } => data
292                .get_registrar()
293                .or_else(|| whois_fallback.as_ref().and_then(|w| w.registrar.clone())),
294            LookupResult::Whois { data, .. } => data.registrar.clone(),
295            LookupResult::Available { .. } => None,
296        }
297    }
298
299    /// Returns the registrant organization, preferring RDAP data with WHOIS fallback.
300    pub fn organization(&self) -> Option<String> {
301        match self {
302            LookupResult::Rdap {
303                data,
304                whois_fallback,
305            } => data
306                .get_registrant_organization()
307                .or_else(|| whois_fallback.as_ref().and_then(|w| w.organization.clone())),
308            LookupResult::Whois { data, .. } => data.organization.clone(),
309            LookupResult::Available { .. } => None,
310        }
311    }
312
313    /// Returns true if the result came from RDAP.
314    pub fn is_rdap(&self) -> bool {
315        matches!(self, LookupResult::Rdap { .. })
316    }
317
318    /// Returns true if the result came from WHOIS.
319    pub fn is_whois(&self) -> bool {
320        matches!(self, LookupResult::Whois { .. })
321    }
322
323    /// Returns true if the result is an availability check fallback.
324    pub fn is_available(&self) -> bool {
325        matches!(self, LookupResult::Available { .. })
326    }
327
328    /// Returns the expiration date and registrar info from the lookup result.
329    pub fn expiration_info(&self) -> (Option<DateTime<Utc>>, Option<String>) {
330        match self {
331            LookupResult::Rdap {
332                data,
333                whois_fallback,
334            } => {
335                // Try to get expiration from RDAP events
336                let expiration_date = data
337                    .events
338                    .iter()
339                    .find(|e| e.event_action == "expiration")
340                    .and_then(|e| e.parsed_date())
341                    .or_else(|| {
342                        // Fallback to WHOIS if available
343                        whois_fallback.as_ref().and_then(|w| w.expiration_date)
344                    });
345
346                let registrar = data
347                    .get_registrar()
348                    .or_else(|| whois_fallback.as_ref().and_then(|w| w.registrar.clone()));
349
350                (expiration_date, registrar)
351            }
352            LookupResult::Whois { data, .. } => (data.expiration_date, data.registrar.clone()),
353            LookupResult::Available { .. } => (None, None),
354        }
355    }
356}
357
358/// Before caching, trim raw WHOIS response to limit cache memory.
359/// A full WHOIS raw_response can be up to 1 MB; we cap it at 32 KB which is
360/// plenty for the parsed fields while preventing the cache from ballooning.
361fn trim_for_cache(mut result: LookupResult) -> LookupResult {
362    const MAX_RAW: usize = 32 * 1024;
363
364    match result {
365        LookupResult::Whois { ref mut data, .. } => {
366            if data.raw_response.len() > MAX_RAW {
367                data.raw_response.truncate(MAX_RAW);
368                data.raw_response.push_str("\n... [truncated for cache]");
369            }
370        }
371        LookupResult::Rdap {
372            ref mut whois_fallback,
373            ..
374        } => {
375            if let Some(ref mut w) = whois_fallback {
376                if w.raw_response.len() > MAX_RAW {
377                    w.raw_response.truncate(MAX_RAW);
378                    w.raw_response.push_str("\n... [truncated for cache]");
379                }
380            }
381        }
382        LookupResult::Available {
383            ref mut whois_data, ..
384        } => {
385            if let Some(ref mut w) = whois_data {
386                if w.raw_response.len() > MAX_RAW {
387                    w.raw_response.truncate(MAX_RAW);
388                    w.raw_response.push_str("\n... [truncated for cache]");
389                }
390            }
391        }
392    }
393
394    result
395}
396
397#[derive(Debug, Clone)]
398pub struct SmartLookup {
399    rdap_client: RdapClient,
400    whois_client: WhoisClient,
401    availability_checker: AvailabilityChecker,
402    dns_resolver: DnsResolver,
403    /// Deprecated: both protocols are now always attempted concurrently.
404    prefer_rdap: bool,
405    /// Deprecated: WHOIS data is now always attached when available.
406    include_fallback: bool,
407}
408
409impl Default for SmartLookup {
410    fn default() -> Self {
411        Self::new()
412    }
413}
414
415impl SmartLookup {
416    /// Creates a new SmartLookup that runs RDAP and WHOIS concurrently,
417    /// falling back to an availability check if both fail.
418    pub fn new() -> Self {
419        Self {
420            rdap_client: RdapClient::new(),
421            whois_client: WhoisClient::new(),
422            availability_checker: AvailabilityChecker::new(),
423            dns_resolver: DnsResolver::new(),
424            prefer_rdap: true,
425            include_fallback: false,
426        }
427    }
428
429    /// Deprecated: both protocols are now always attempted concurrently.
430    /// This method is kept for API compatibility but has no effect.
431    #[deprecated(note = "This field has no effect. RDAP is always tried concurrently with WHOIS.")]
432    pub fn prefer_rdap(mut self, prefer: bool) -> Self {
433        self.prefer_rdap = prefer;
434        self
435    }
436
437    /// Deprecated: WHOIS data is now always attached when available.
438    /// This method is kept for API compatibility but has no effect.
439    #[deprecated(note = "This field has no effect. RDAP is always tried concurrently with WHOIS.")]
440    pub fn include_fallback(mut self, include: bool) -> Self {
441        self.include_fallback = include;
442        self
443    }
444
445    /// Performs a smart lookup for a domain, trying both RDAP and WHOIS concurrently.
446    /// Falls back to an availability check if both fail.
447    /// Results are cached for 5 minutes to avoid redundant network calls.
448    #[instrument(skip(self), fields(domain = %domain))]
449    pub async fn lookup(&self, domain: &str) -> Result<LookupResult> {
450        self.lookup_with_progress(domain, None).await
451    }
452
453    /// Performs a lookup with an optional progress callback.
454    /// The callback is called with messages describing the current phase.
455    /// Results are cached for 5 minutes. Concurrent lookups for the same
456    /// domain are coalesced — only one network race runs per domain at a time.
457    #[instrument(skip(self, progress), fields(domain = %domain))]
458    pub async fn lookup_with_progress(
459        &self,
460        domain: &str,
461        progress: Option<LookupProgressCallback>,
462    ) -> Result<LookupResult> {
463        let normalized = crate::validation::normalize_domain(domain)?;
464
465        // Check cache first
466        if let Some(cached) = LOOKUP_CACHE.get(&normalized) {
467            debug!(domain = %normalized, "Returning cached lookup result");
468            return Ok(cached);
469        }
470
471        // Coalesce in-flight lookups: if another task is already running a
472        // race for this domain, wait on its Notify rather than starting a
473        // second race. Two branches:
474        //   - Waiter: another task owns the slot; await its notify, then
475        //     read the cache. If the cache is still empty (owner failed),
476        //     loop and re-contend for ownership.
477        //   - Owner: no entry exists; insert a Weak handle, hold the Arc
478        //     for the duration of the work, then remove and notify on drop.
479        //
480        // A `loop` with a separate lock-scope per iteration keeps the
481        // `MutexGuard` from being held across any `.await`.
482        let _guard = loop {
483            enum Slot {
484                Waiter(Arc<Notify>),
485                Owner(InflightGuard),
486            }
487
488            let slot = {
489                // Recover from poisoning rather than panicking: a prior
490                // owner's panic should not permanently wedge the in-flight
491                // tracker for every future lookup.
492                let mut inflight = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
493                match inflight.get(&normalized).and_then(|w| w.upgrade()) {
494                    Some(existing) => Slot::Waiter(existing),
495                    None => {
496                        let n = Arc::new(Notify::new());
497                        inflight.insert(normalized.clone(), Arc::downgrade(&n));
498                        Slot::Owner(InflightGuard {
499                            key: normalized.clone(),
500                            notify: n,
501                        })
502                    }
503                }
504            };
505
506            match slot {
507                Slot::Waiter(n) => {
508                    debug!(domain = %normalized, "Waiting for in-flight lookup to complete");
509                    n.notified().await;
510                    if let Some(cached) = LOOKUP_CACHE.get(&normalized) {
511                        return Ok(cached);
512                    }
513                    // Owner finished without populating the cache (failed
514                    // or errored). Re-contend for ownership.
515                    continue;
516                }
517                Slot::Owner(guard) => break guard,
518            }
519        };
520
521        let result = self.lookup_concurrent(&normalized, progress).await?;
522
523        // Cache a trimmed copy to limit memory usage before releasing
524        // waiters (via guard drop) so they observe the cached value.
525        LOOKUP_CACHE.insert(normalized.clone(), trim_for_cache(result.clone()));
526
527        Ok(result)
528    }
529
530    /// Clears the lookup result cache.
531    pub fn clear_cache() {
532        LOOKUP_CACHE.clear();
533    }
534
535    #[instrument(skip(self, progress), fields(domain = %domain))]
536    async fn lookup_concurrent(
537        &self,
538        domain: &str,
539        progress: Option<LookupProgressCallback>,
540    ) -> Result<LookupResult> {
541        #[cfg(test)]
542        LOOKUP_CONCURRENT_CALLS.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
543
544        debug!(domain = %domain, "Attempting RDAP and WHOIS concurrently");
545
546        if let Some(ref cb) = progress {
547            cb("Querying RDAP and WHOIS concurrently");
548        }
549
550        let rdap_fut = self.rdap_client.lookup_domain(domain);
551        let whois_fut = self.whois_client.lookup(domain);
552
553        tokio::pin!(rdap_fut);
554        tokio::pin!(whois_fut);
555
556        // Race: whichever finishes first gets a grace period for the other.
557        //
558        // We track whether each side completed naturally or was truncated by
559        // the grace period, so downstream error messages can distinguish a
560        // true timeout from a loser-truncation.
561        enum LegOutcome<T> {
562            Completed(T),
563            GraceTruncated,
564        }
565
566        let (rdap_leg, whois_leg) = tokio::select! {
567            rdap_res = &mut rdap_fut => {
568                // RDAP finished first — give WHOIS a grace period
569                let whois_leg = match tokio_timeout(PROTOCOL_GRACE_PERIOD, whois_fut).await {
570                    Ok(res) => LegOutcome::Completed(res),
571                    Err(_) => {
572                        debug!("WHOIS did not finish within grace period, proceeding with RDAP only");
573                        LegOutcome::GraceTruncated
574                    }
575                };
576                (LegOutcome::Completed(rdap_res), whois_leg)
577            }
578            whois_res = &mut whois_fut => {
579                // WHOIS finished first — give RDAP a grace period
580                let rdap_leg = match tokio_timeout(PROTOCOL_GRACE_PERIOD, rdap_fut).await {
581                    Ok(res) => LegOutcome::Completed(res),
582                    Err(_) => {
583                        debug!("RDAP did not finish within grace period, proceeding with WHOIS only");
584                        LegOutcome::GraceTruncated
585                    }
586                };
587                (rdap_leg, LegOutcome::Completed(whois_res))
588            }
589        };
590
591        // Classify the RDAP leg.
592        let rdap_outcome = match rdap_leg {
593            LegOutcome::Completed(Ok(data)) => {
594                if self.is_rdap_response_useful(&data) {
595                    RdapOutcome::Useful(data)
596                } else {
597                    RdapOutcome::NoData(data)
598                }
599            }
600            LegOutcome::Completed(Err(e)) => RdapOutcome::Error(e),
601            LegOutcome::GraceTruncated => RdapOutcome::GraceTimeout,
602        };
603
604        // Phase 1: If RDAP returned useful data, use it as primary.
605        if let RdapOutcome::Useful(rdap_data) = rdap_outcome {
606            debug!("RDAP lookup successful");
607            let whois_fallback = match whois_leg {
608                LegOutcome::Completed(Ok(w)) => Some(w),
609                _ => None,
610            };
611            return Ok(LookupResult::Rdap {
612                data: Box::new(rdap_data),
613                whois_fallback,
614            });
615        }
616
617        // RDAP was not useful (NoData, Error, or GraceTimeout). Prefer WHOIS
618        // if it returned any response, even a thin one — this is safer than
619        // falling back to the availability heuristic when we have actual
620        // registry data in hand.
621        //
622        // We separately track whether RDAP returned an HTTP 200 (NoData):
623        // even a thin RDAP 200 is positive evidence the domain object
624        // exists. In that case we must NOT reclassify a WHOIS "no match"
625        // signal as availability — WHOIS lag against a freshly-provisioned
626        // domain would otherwise produce a false "available" verdict.
627        let rdap_returned_200 = matches!(rdap_outcome, RdapOutcome::NoData(_));
628        let (rdap_error_str, rdap_fallback_data, rdap_seer_error) = match rdap_outcome {
629            RdapOutcome::Useful(_) => {
630                // Unreachable in this branch (we returned above), but handle
631                // defensively rather than panicking across the FFI boundary.
632                debug!("Unexpected RdapOutcome::Useful in fallback branch");
633                (String::from("RDAP ok"), None, None)
634            }
635            RdapOutcome::NoData(data) => (
636                "RDAP response incomplete".to_string(),
637                Some(Box::new(data)),
638                None,
639            ),
640            RdapOutcome::Error(e) => (e.to_string(), None, Some(e)),
641            RdapOutcome::GraceTimeout => (
642                format!(
643                    "RDAP did not return within {}s grace period after WHOIS won",
644                    PROTOCOL_GRACE_PERIOD.as_secs()
645                ),
646                None,
647                None,
648            ),
649        };
650
651        if let LegOutcome::Completed(Ok(whois_data)) = whois_leg {
652            // Check Cases A and B: should we reclassify as Available? The
653            // `should_route_to_availability` helper also enforces the
654            // "RDAP returned 200 vetoes WHOIS availability claims" rule.
655            let availability_match = should_route_to_availability(
656                rdap_returned_200,
657                rdap_seer_error.as_ref(),
658                &whois_data,
659            );
660
661            if let Some((confidence, method)) = availability_match {
662                debug!(
663                    domain = %domain,
664                    confidence = %confidence,
665                    "Reclassifying WHOIS as availability signal"
666                );
667                if let Some(ref cb) = progress {
668                    cb("Domain appears unregistered");
669                }
670                let details = match confidence {
671                    "high" => Some("WHOIS indicates domain is not registered".to_string()),
672                    "medium" => Some(
673                        "WHOIS returned no registrar or registration dates; RDAP returned 404"
674                            .to_string(),
675                    ),
676                    _ => None,
677                };
678                let avail = AvailabilityResult {
679                    domain: domain.to_string(),
680                    available: true,
681                    confidence: confidence.to_string(),
682                    method: method.to_string(),
683                    details,
684                };
685                return Ok(LookupResult::Available {
686                    data: Box::new(avail),
687                    rdap_error: sanitize_error_for_public(&rdap_error_str),
688                    whois_error: String::new(),
689                    whois_data: Some(whois_data),
690                });
691            }
692
693            // Fix #2 safety net: a thin WHOIS body plus an RDAP failure that
694            // was not an authoritative 404 leaves us without registry data.
695            // If the apex also has no DNS presence (NXDOMAIN), reclassify as
696            // likely-available rather than emitting an empty WHOIS record. The
697            // cheap thin / not-200 preconditions gate the DNS probe so we
698            // don't pay for it on the common paths.
699            let whois_is_thin = whois_response_is_thin(&whois_data);
700            if whois_is_thin && !rdap_returned_200 {
701                let dns_presence = self.dns_resolver.presence(domain).await;
702                if nxdomain_confirms_available(whois_is_thin, rdap_returned_200, dns_presence) {
703                    debug!(domain = %domain, "Thin WHOIS + NXDOMAIN, reclassifying as available");
704                    if let Some(ref cb) = progress {
705                        cb("Domain appears unregistered (no DNS presence)");
706                    }
707                    let avail = AvailabilityResult {
708                        domain: domain.to_string(),
709                        available: true,
710                        confidence: "medium".to_string(),
711                        method: "dns_nxdomain".to_string(),
712                        details: Some(
713                            "No registry data available; domain has no DNS presence (NXDOMAIN)"
714                                .to_string(),
715                        ),
716                    };
717                    return Ok(LookupResult::Available {
718                        data: Box::new(avail),
719                        rdap_error: sanitize_error_for_public(&rdap_error_str),
720                        whois_error: String::new(),
721                        whois_data: Some(whois_data),
722                    });
723                }
724
725                // Symmetric safety net: thin / no-service WHOIS, RDAP not a
726                // 200, but the apex IS delegated in DNS — the domain is
727                // registered. Report that (with the DNS-derived reason) instead
728                // of emitting an empty WHOIS record. Fixes RDAP-only TLDs (e.g.
729                // Identity Digital's .email/.life/.ninja) whose `whois.nic.*`
730                // answers "TLD is not supported." and whose throttled RDAP can
731                // be grace-truncated by that fast non-answer.
732                if dns_present_confirms_registered(whois_is_thin, rdap_returned_200, dns_presence) {
733                    debug!(domain = %domain, "Thin/no-service WHOIS + DNS delegation, reporting registered");
734                    if let Some(ref cb) = progress {
735                        cb("Domain is registered (registry detail unavailable)");
736                    }
737                    let details = if whois_data.registry_unavailable() {
738                        "Domain is registered (the apex is delegated in DNS). This TLD's \
739                         registry provides no port-43 WHOIS data and RDAP was unavailable \
740                         (rate-limited or unreachable); retry shortly for full RDAP detail."
741                    } else {
742                        "Domain is registered (the apex is delegated in DNS). Registry detail \
743                         was unavailable (RDAP rate-limited or unreachable and WHOIS returned \
744                         no data); retry shortly for full detail."
745                    };
746                    let avail = AvailabilityResult {
747                        domain: domain.to_string(),
748                        available: false,
749                        confidence: "high".to_string(),
750                        method: "dns_present".to_string(),
751                        details: Some(details.to_string()),
752                    };
753                    return Ok(LookupResult::Available {
754                        data: Box::new(avail),
755                        rdap_error: sanitize_error_for_public(&rdap_error_str),
756                        whois_error: String::new(),
757                        whois_data: Some(whois_data),
758                    });
759                }
760            }
761            debug!("Using WHOIS result (RDAP not useful)");
762            if let Some(ref cb) = progress {
763                cb("RDAP not available (using WHOIS)");
764            }
765            return Ok(LookupResult::Whois {
766                data: whois_data,
767                rdap_error: Some(rdap_error_str),
768                rdap_fallback: rdap_fallback_data,
769            });
770        }
771
772        // Both sides failed to provide useful data. Craft a precise WHOIS
773        // error string that distinguishes true errors from grace-period
774        // truncation.
775        let whois_error_str = match whois_leg {
776            LegOutcome::Completed(Err(e)) => e.to_string(),
777            LegOutcome::Completed(Ok(_)) => {
778                // Already handled above; treat defensively.
779                debug!("Unexpected completed-Ok WHOIS in availability fallback branch");
780                "WHOIS returned but was not used".to_string()
781            }
782            LegOutcome::GraceTruncated => format!(
783                "WHOIS did not return within {}s grace period after RDAP won",
784                PROTOCOL_GRACE_PERIOD.as_secs()
785            ),
786        };
787
788        self.availability_fallback(domain, rdap_error_str, whois_error_str, progress)
789            .await
790    }
791
792    async fn availability_fallback(
793        &self,
794        domain: &str,
795        rdap_error: String,
796        whois_error: String,
797        progress: Option<LookupProgressCallback>,
798    ) -> Result<LookupResult> {
799        if let Some(ref cb) = progress {
800            cb("RDAP and WHOIS unavailable (checking availability)");
801        }
802        warn!(
803            domain = %domain,
804            rdap_error = %rdap_error,
805            whois_error = %whois_error,
806            "Both RDAP and WHOIS failed, falling back to availability check"
807        );
808
809        match self.availability_checker.check(domain).await {
810            Ok(avail) => Ok(LookupResult::Available {
811                data: Box::new(avail),
812                rdap_error: sanitize_error_for_public(&rdap_error),
813                whois_error: sanitize_error_for_public(&whois_error),
814                whois_data: None,
815            }),
816            Err(avail_err) => {
817                let tld = get_tld(domain).unwrap_or("unknown");
818                let registry_url = get_registry_url(tld).unwrap_or_else(|| {
819                    format!("https://www.iana.org/domains/root/db/{}.html", tld)
820                });
821                Err(SeerError::LookupFailed {
822                    domain: domain.to_string(),
823                    details: format!(
824                        "RDAP failed ({}), WHOIS failed ({}), availability check failed ({})",
825                        rdap_error, whois_error, avail_err
826                    ),
827                    registry_url,
828                })
829            }
830        }
831    }
832
833    fn is_rdap_response_useful(&self, response: &RdapResponse) -> bool {
834        // Check if we have at least some meaningful data
835        let has_name = response.ldh_name.is_some() || response.unicode_name.is_some();
836        let has_dates = response
837            .events
838            .iter()
839            .any(|e| e.event_action == "registration" || e.event_action == "expiration");
840        let has_entities = !response.entities.is_empty();
841        let has_nameservers = !response.nameservers.is_empty();
842        let has_status = !response.status.is_empty();
843
844        // Consider useful if we have the name plus at least one other piece of info
845        has_name && (has_dates || has_entities || has_nameservers || has_status)
846    }
847}
848
849#[cfg(test)]
850mod tests {
851    use super::*;
852
853    /// Global serialization mutex for the three tests that share
854    /// `LOOKUP_INFLIGHT` state (coalescing, poison recovery, drop recovery).
855    /// Running them in parallel creates two races:
856    ///   1. Guard drop uses `try_lock`; if another test holds the mutex, the
857    ///      Drop path skips cleanup → stale entries fail later assertions.
858    ///   2. Poisoning one test leaves the mutex poisoned for the next test,
859    ///      which is handled by `unwrap_or_else` but still disturbs state.
860    /// Per-test unique keys (see `unique_test_key`) prevent entry-level
861    /// collisions; this mutex prevents lock-contention races on Drop.
862    static INFLIGHT_TEST_SERIAL: Mutex<()> = Mutex::new(());
863
864    #[test]
865    fn test_lookup_result_domain_name_whois() {
866        let result = LookupResult::Whois {
867            data: WhoisResponse {
868                domain: "example.com".to_string(),
869                registrar: Some("Test Registrar".to_string()),
870                registrant: None,
871                organization: None,
872                registrant_email: None,
873                registrant_phone: None,
874                registrant_address: None,
875                registrant_country: None,
876                admin_name: None,
877                admin_organization: None,
878                admin_email: None,
879                admin_phone: None,
880                tech_name: None,
881                tech_organization: None,
882                tech_email: None,
883                tech_phone: None,
884                creation_date: None,
885                expiration_date: None,
886                updated_date: None,
887                status: vec![],
888                nameservers: vec![],
889                dnssec: None,
890                whois_server: "whois.example.com".to_string(),
891                raw_response: String::new(),
892            },
893            rdap_error: None,
894            rdap_fallback: None,
895        };
896
897        assert_eq!(result.domain_name(), Some("example.com".to_string()));
898        assert_eq!(result.registrar(), Some("Test Registrar".to_string()));
899        assert!(result.is_whois());
900        assert!(!result.is_rdap());
901        assert!(!result.is_available());
902    }
903
904    #[test]
905    fn test_lookup_result_serialization() {
906        let result = LookupResult::Whois {
907            data: WhoisResponse {
908                domain: "test.com".to_string(),
909                registrar: None,
910                registrant: None,
911                organization: None,
912                registrant_email: None,
913                registrant_phone: None,
914                registrant_address: None,
915                registrant_country: None,
916                admin_name: None,
917                admin_organization: None,
918                admin_email: None,
919                admin_phone: None,
920                tech_name: None,
921                tech_organization: None,
922                tech_email: None,
923                tech_phone: None,
924                creation_date: None,
925                expiration_date: None,
926                updated_date: None,
927                status: vec![],
928                nameservers: vec![],
929                dnssec: None,
930                whois_server: String::new(),
931                raw_response: String::new(),
932            },
933            rdap_error: Some("RDAP failed".to_string()),
934            rdap_fallback: None,
935        };
936
937        let json = serde_json::to_string(&result).unwrap();
938        assert!(json.contains("\"source\":\"whois\""));
939        assert!(json.contains("RDAP failed"));
940    }
941
942    #[test]
943    fn test_lookup_result_available_serialization() {
944        let result = LookupResult::Available {
945            data: Box::new(AvailabilityResult {
946                domain: "test123.xyz".to_string(),
947                available: true,
948                confidence: "medium".to_string(),
949                method: "whois_error".to_string(),
950                details: Some("WHOIS server indicates no matching records".to_string()),
951            }),
952            rdap_error: "RDAP failed".to_string(),
953            whois_error: "WHOIS failed".to_string(),
954            whois_data: None,
955        };
956
957        let json = serde_json::to_string(&result).unwrap();
958        assert!(json.contains("\"source\":\"available\""));
959        assert!(json.contains("\"available\":true"));
960        assert!(json.contains("test123.xyz"));
961
962        assert_eq!(result.domain_name(), Some("test123.xyz".to_string()));
963        assert!(result.is_available());
964        assert!(!result.is_rdap());
965        assert!(!result.is_whois());
966        assert!(result.registrar().is_none());
967        assert_eq!(result.expiration_info(), (None, None));
968    }
969
970    #[test]
971    #[allow(deprecated)]
972    fn test_smart_lookup_builder() {
973        let lookup = SmartLookup::new().prefer_rdap(false).include_fallback(true);
974        assert!(!lookup.prefer_rdap);
975        assert!(lookup.include_fallback);
976    }
977
978    #[test]
979    fn test_lookup_cache_clear() {
980        SmartLookup::clear_cache();
981        assert!(LOOKUP_CACHE.is_empty());
982    }
983
984    // ---------------- sanitize_error_for_public ----------------
985
986    #[test]
987    fn test_sanitize_strips_ipv4() {
988        let msg = "RDAP URL resolves to reserved IP 10.0.0.1 which is forbidden";
989        let sanitized = sanitize_error_for_public(msg);
990        assert!(
991            !sanitized.contains("10.0.0.1"),
992            "IPv4 should be stripped, got: {}",
993            sanitized
994        );
995        assert!(sanitized.contains("[ip-redacted]"));
996    }
997
998    #[test]
999    fn test_sanitize_strips_multiple_ipv4() {
1000        let msg = "Could not connect to 192.168.1.1 after trying 127.0.0.1";
1001        let sanitized = sanitize_error_for_public(msg);
1002        assert!(!sanitized.contains("192.168.1.1"));
1003        assert!(!sanitized.contains("127.0.0.1"));
1004        // Two redactions expected.
1005        assert_eq!(sanitized.matches("[ip-redacted]").count(), 2);
1006    }
1007
1008    #[test]
1009    fn test_sanitize_strips_ipv6() {
1010        let msg = "RDAP URL resolves to reserved IP fe80::1 which is forbidden";
1011        let sanitized = sanitize_error_for_public(msg);
1012        assert!(!sanitized.contains("fe80::1"));
1013        assert!(sanitized.contains("[ip-redacted]"));
1014    }
1015
1016    #[test]
1017    fn sanitize_leaves_mac_address_like_tokens_alone() {
1018        let msg = "error code af:ba:12 at line 5";
1019        let out = sanitize_error_for_public(msg);
1020        assert!(
1021            out.contains("af:ba:12"),
1022            "MAC fragment should not be stripped: {}",
1023            out
1024        );
1025    }
1026
1027    #[test]
1028    fn sanitize_strips_real_ipv6() {
1029        let msg = "cannot reach 2001:db8::1 — timeout";
1030        let out = sanitize_error_for_public(msg);
1031        assert!(!out.contains("2001:db8::1"));
1032        assert!(out.contains("[ip-redacted]"));
1033    }
1034
1035    #[test]
1036    fn sanitize_strips_fe80_link_local() {
1037        let msg = "peer at fe80::1 unreachable";
1038        let out = sanitize_error_for_public(msg);
1039        assert!(out.contains("[ip-redacted]"));
1040    }
1041
1042    #[test]
1043    fn test_sanitize_truncates_long_message() {
1044        // Build a 500-char message with no IPs.
1045        let long = "a".repeat(500);
1046        let sanitized = sanitize_error_for_public(&long);
1047        // Should cap at MAX_PUBLIC_ERROR_LEN chars + ellipsis.
1048        let char_count = sanitized.chars().count();
1049        assert_eq!(char_count, MAX_PUBLIC_ERROR_LEN + 1);
1050        assert!(sanitized.ends_with('…'));
1051    }
1052
1053    #[test]
1054    fn test_sanitize_preserves_short_messages() {
1055        let msg = "RDAP timed out after 15s";
1056        let sanitized = sanitize_error_for_public(msg);
1057        assert_eq!(sanitized, msg);
1058    }
1059
1060    // ---------------- RdapOutcome classification ----------------
1061
1062    #[test]
1063    fn test_is_rdap_response_useful_detects_no_data() {
1064        use crate::rdap::RdapResponse;
1065        // Construct a response with a name but no events, entities, NS, or status
1066        // — this is the "200 OK but no useful fields" case that should be
1067        // classified as RdapOutcome::NoData (not Useful, not Error).
1068        let resp = RdapResponse {
1069            ldh_name: Some("example.com".to_string()),
1070            ..Default::default()
1071        };
1072        let lookup = SmartLookup::new();
1073        assert!(
1074            !lookup.is_rdap_response_useful(&resp),
1075            "Response with only a name should be classified as NoData"
1076        );
1077
1078        // And one with a name + status IS useful (sanity check).
1079        let useful = RdapResponse {
1080            ldh_name: Some("example.com".to_string()),
1081            status: vec!["active".to_string()],
1082            ..Default::default()
1083        };
1084        assert!(lookup.is_rdap_response_useful(&useful));
1085    }
1086
1087    // ---------------- Coalescing ----------------
1088
1089    // Verifies that when multiple concurrent lookups hit the in-flight map
1090    // for the same domain, later arrivals observe the existing Weak<Notify>
1091    // and become waiters rather than racing a second lookup. We test the
1092    // map-level primitive here because the full SmartLookup pipeline
1093    // requires network access to exercise.
1094    #[tokio::test]
1095    async fn test_inflight_coalescing_map() {
1096        // Serialize with sibling poisoning tests: we share LOOKUP_INFLIGHT
1097        // state, and `InflightGuard::drop` uses `try_lock` — if a sibling
1098        // holds the mutex during drop, cleanup is skipped and assertions
1099        // fail.
1100        let _serial = INFLIGHT_TEST_SERIAL
1101            .lock()
1102            .unwrap_or_else(|p| p.into_inner());
1103        // Poison-tolerant: the sibling poisoning regression tests may run
1104        // earlier under `cargo test` parallelism and leave LOOKUP_INFLIGHT
1105        // poisoned. The production code recovers via `unwrap_or_else`,
1106        // so this test does the same.
1107        //
1108        // Use a per-run unique key so this test cannot race with the other
1109        // tests that touch LOOKUP_INFLIGHT. Previously we `clear()`ed the
1110        // whole map, which raced with peer tests' entries.
1111        let domain = unique_test_key("__coalesce");
1112
1113        // Defensive: ensure our specific key is not present.
1114        {
1115            let mut m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1116            m.remove(&domain);
1117        }
1118
1119        // First caller: no entry → becomes owner.
1120        let owner_notify = Arc::new(Notify::new());
1121        {
1122            let mut m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1123            assert!(m.get(&domain).and_then(|w| w.upgrade()).is_none());
1124            m.insert(domain.clone(), Arc::downgrade(&owner_notify));
1125        }
1126
1127        // Second caller: sees the existing Weak and upgrades.
1128        let waiter = {
1129            let m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1130            m.get(&domain)
1131                .and_then(|w| w.upgrade())
1132                .expect("Second caller must observe in-flight entry")
1133        };
1134
1135        // Waiter listens in the background.
1136        let waiter_clone = waiter.clone();
1137        let handle = tokio::spawn(async move {
1138            waiter_clone.notified().await;
1139        });
1140
1141        // Simulate owner completing.
1142        tokio::time::sleep(Duration::from_millis(20)).await;
1143        {
1144            let mut m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1145            m.remove(&domain);
1146        }
1147        owner_notify.notify_waiters();
1148
1149        // Waiter should unblock quickly.
1150        tokio::time::timeout(Duration::from_secs(1), handle)
1151            .await
1152            .expect("waiter must unblock after notify")
1153            .expect("waiter task joined cleanly");
1154
1155        // After owner removes entry and drops its Arc, the Weak is dead.
1156        drop(owner_notify);
1157        drop(waiter);
1158        let m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1159        assert!(m.get(&domain).and_then(|w| w.upgrade()).is_none());
1160    }
1161
1162    /// Builds a domain key guaranteed unique per test invocation, so that
1163    /// tests touching the shared LOOKUP_INFLIGHT static never collide when
1164    /// `cargo test` runs them in parallel. We include a nanosecond timestamp
1165    /// plus an atomic counter to defeat even hash-identical calls within the
1166    /// same nanosecond.
1167    fn unique_test_key(prefix: &str) -> String {
1168        use std::sync::atomic::{AtomicU64, Ordering};
1169        use std::time::{SystemTime, UNIX_EPOCH};
1170        static COUNTER: AtomicU64 = AtomicU64::new(0);
1171        let nanos = SystemTime::now()
1172            .duration_since(UNIX_EPOCH)
1173            .map(|d| d.as_nanos())
1174            .unwrap_or(0);
1175        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
1176        format!("{}_{}_{}.example.", prefix, nanos, n)
1177    }
1178
1179    // Demonstrates that the `sanitize_error_for_public` helper is applied
1180    // to the rdap_error / whois_error fields written into the `Available`
1181    // variant. We check the call site indirectly: construct a Available
1182    // manually and then verify a raw error with an IP becomes redacted.
1183    // (Integration via real clients would require network.)
1184    #[test]
1185    fn test_sanitize_applied_to_available_fields() {
1186        let rdap_raw = "RDAP URL resolves to reserved IP 10.0.0.1";
1187        let whois_raw = "connection refused at 192.168.0.5";
1188        let sanitized_rdap = sanitize_error_for_public(rdap_raw);
1189        let sanitized_whois = sanitize_error_for_public(whois_raw);
1190        let result = LookupResult::Available {
1191            data: Box::new(AvailabilityResult {
1192                domain: "unreg.test".to_string(),
1193                available: true,
1194                confidence: "low".to_string(),
1195                method: "heuristic".to_string(),
1196                details: None,
1197            }),
1198            rdap_error: sanitized_rdap,
1199            whois_error: sanitized_whois,
1200            whois_data: None,
1201        };
1202        if let LookupResult::Available {
1203            rdap_error,
1204            whois_error,
1205            ..
1206        } = result
1207        {
1208            assert!(!rdap_error.contains("10.0.0.1"));
1209            assert!(!whois_error.contains("192.168.0.5"));
1210            assert!(rdap_error.contains("[ip-redacted]"));
1211            assert!(whois_error.contains("[ip-redacted]"));
1212        } else {
1213            panic!("expected Available variant");
1214        }
1215    }
1216
1217    #[test]
1218    fn rdap_error_is_404_matches_standard_404() {
1219        let e = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1220        assert!(rdap_error_is_404(&e));
1221    }
1222
1223    #[test]
1224    fn rdap_error_is_404_matches_without_reason_phrase() {
1225        let e = SeerError::RdapError("query failed with status 404".to_string());
1226        assert!(rdap_error_is_404(&e));
1227    }
1228
1229    #[test]
1230    fn rdap_error_is_404_rejects_other_statuses() {
1231        let e = SeerError::RdapError("query failed with status 500 Server Error".to_string());
1232        assert!(!rdap_error_is_404(&e));
1233        let e = SeerError::RdapError("query failed with status 400 Bad Request".to_string());
1234        assert!(!rdap_error_is_404(&e));
1235    }
1236
1237    #[test]
1238    fn rdap_error_is_404_rejects_non_http_errors() {
1239        let e = SeerError::RdapError("connection timeout".to_string());
1240        assert!(!rdap_error_is_404(&e));
1241        let e = SeerError::Timeout("rdap".to_string());
1242        assert!(!rdap_error_is_404(&e));
1243    }
1244
1245    #[test]
1246    fn rdap_error_is_404_rejects_incidental_404_in_message() {
1247        // A 404 substring inside a non-status context must not match.
1248        let e = SeerError::RdapError("error 40404: database corruption".to_string());
1249        assert!(!rdap_error_is_404(&e));
1250    }
1251
1252    // ---------------- whois_response_is_thin ----------------
1253
1254    fn empty_whois(domain: &str) -> WhoisResponse {
1255        WhoisResponse {
1256            domain: domain.to_string(),
1257            registrar: None,
1258            registrant: None,
1259            organization: None,
1260            registrant_email: None,
1261            registrant_phone: None,
1262            registrant_address: None,
1263            registrant_country: None,
1264            admin_name: None,
1265            admin_organization: None,
1266            admin_email: None,
1267            admin_phone: None,
1268            tech_name: None,
1269            tech_organization: None,
1270            tech_email: None,
1271            tech_phone: None,
1272            creation_date: None,
1273            expiration_date: None,
1274            updated_date: None,
1275            nameservers: vec![],
1276            status: vec![],
1277            dnssec: None,
1278            whois_server: String::new(),
1279            raw_response: String::new(),
1280        }
1281    }
1282
1283    #[test]
1284    fn whois_response_is_thin_when_all_key_fields_missing() {
1285        let w = empty_whois("example.com");
1286        assert!(whois_response_is_thin(&w));
1287    }
1288
1289    #[test]
1290    fn whois_response_is_not_thin_when_registrar_present() {
1291        let mut w = empty_whois("example.com");
1292        w.registrar = Some("Test Registrar".to_string());
1293        assert!(!whois_response_is_thin(&w));
1294    }
1295
1296    #[test]
1297    fn whois_response_is_not_thin_when_creation_date_present() {
1298        let mut w = empty_whois("example.com");
1299        w.creation_date = Some(Utc::now());
1300        assert!(!whois_response_is_thin(&w));
1301    }
1302
1303    #[test]
1304    fn whois_response_is_not_thin_when_expiration_date_present() {
1305        let mut w = empty_whois("example.com");
1306        w.expiration_date = Some(Utc::now());
1307        assert!(!whois_response_is_thin(&w));
1308    }
1309
1310    #[test]
1311    fn whois_response_is_thin_even_with_nameservers_alone() {
1312        let mut w = empty_whois("example.com");
1313        w.nameservers = vec!["ns1.example.net".to_string()];
1314        assert!(whois_response_is_thin(&w));
1315    }
1316
1317    // ---------------- classify_whois_leg ----------------
1318
1319    use crate::rdap::RdapResponse;
1320
1321    #[allow(dead_code)]
1322    fn make_empty_rdap_response() -> RdapResponse {
1323        serde_json::from_value(serde_json::json!({
1324            "objectClassName": "domain",
1325        }))
1326        .expect("valid minimal RDAP response")
1327    }
1328
1329    #[test]
1330    fn classify_whois_leg_case_a_high_confidence() {
1331        let mut w = empty_whois("zaccodes.com");
1332        w.raw_response = "No match for \"ZACCODES.COM\".".to_string();
1333        assert!(w.is_available());
1334        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1335        let (verdict, method) =
1336            classify_whois_leg(&w, &rdap_err).expect("expected a routing decision");
1337        assert_eq!(verdict, "high");
1338        assert_eq!(method, "whois");
1339    }
1340
1341    #[test]
1342    fn classify_whois_leg_case_b_medium_confidence() {
1343        let w = empty_whois("example.xyz");
1344        assert!(!w.is_available(), "this WHOIS body has no 'no match' text");
1345        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1346        let (verdict, method) =
1347            classify_whois_leg(&w, &rdap_err).expect("expected a routing decision");
1348        assert_eq!(verdict, "medium");
1349        assert_eq!(method, "whois_thin_response");
1350    }
1351
1352    #[test]
1353    fn classify_whois_leg_rejects_thin_whois_without_404() {
1354        let w = empty_whois("example.xyz");
1355        let rdap_err = SeerError::RdapError("connection timeout".to_string());
1356        assert!(classify_whois_leg(&w, &rdap_err).is_none());
1357    }
1358
1359    #[test]
1360    fn classify_whois_leg_rejects_whois_with_real_data() {
1361        let mut w = empty_whois("legacy.tld");
1362        w.registrar = Some("Legacy Registry".to_string());
1363        w.creation_date = Some(Utc::now());
1364        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1365        assert!(classify_whois_leg(&w, &rdap_err).is_none());
1366    }
1367
1368    #[test]
1369    fn classify_whois_leg_case_a_wins_over_case_b() {
1370        let mut w = empty_whois("example.com");
1371        w.raw_response = "No match for \"EXAMPLE.COM\".".to_string();
1372        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1373        let (verdict, _) = classify_whois_leg(&w, &rdap_err).unwrap();
1374        assert_eq!(verdict, "high");
1375    }
1376
1377    // ---------------- should_route_to_availability ----------------
1378    //
1379    // Regression coverage for the v0.26.6 fix: when RDAP returned an HTTP 200
1380    // (even with thin body), a WHOIS "no match" must NOT be treated as
1381    // evidence of availability — that would let propagation lag flip the
1382    // verdict for a domain the registry has already provisioned.
1383
1384    #[test]
1385    fn rdap_200_vetoes_whois_no_match() {
1386        let mut w = empty_whois("freshly-registered.com");
1387        w.raw_response = "No match for \"FRESHLY-REGISTERED.COM\".".to_string();
1388        // rdap_returned_200 = true, no rdap_seer_error (NoData has no error).
1389        assert!(
1390            should_route_to_availability(true, None, &w).is_none(),
1391            "RDAP 200 must veto WHOIS-only availability claim",
1392        );
1393    }
1394
1395    #[test]
1396    fn rdap_200_vetoes_even_with_thin_whois() {
1397        let w = empty_whois("freshly-registered.com");
1398        // Thin WHOIS without is_available() patterns.
1399        assert!(
1400            should_route_to_availability(true, None, &w).is_none(),
1401            "RDAP 200 must veto even when WHOIS is thin",
1402        );
1403    }
1404
1405    #[test]
1406    fn rdap_404_with_whois_no_match_routes_to_available() {
1407        let mut w = empty_whois("genuinely-free.com");
1408        w.raw_response = "No match for \"GENUINELY-FREE.COM\".".to_string();
1409        let rdap_err = SeerError::RdapError("query failed with status 404".to_string());
1410        let result = should_route_to_availability(false, Some(&rdap_err), &w);
1411        assert_eq!(result, Some(("high", "whois")));
1412    }
1413
1414    #[test]
1415    fn rdap_error_with_whois_is_available_still_routes_case_a() {
1416        let mut w = empty_whois("genuinely-free.com");
1417        w.raw_response = "Domain not found".to_string();
1418        // RDAP errored for a non-404 reason (e.g. bootstrap failure); WHOIS
1419        // signal alone should still route to availability.
1420        let rdap_err = SeerError::RdapBootstrapError("all registries failed".to_string());
1421        let result = should_route_to_availability(false, Some(&rdap_err), &w);
1422        assert_eq!(result, Some(("high", "whois")));
1423    }
1424
1425    #[test]
1426    fn rdap_grace_timeout_with_whois_is_available_routes_case_a() {
1427        // GraceTimeout path: rdap_returned_200 = false, rdap_seer_error = None.
1428        let mut w = empty_whois("genuinely-free.com");
1429        w.raw_response = "No match".to_string();
1430        let result = should_route_to_availability(false, None, &w);
1431        assert_eq!(result, Some(("high", "whois")));
1432    }
1433
1434    #[test]
1435    fn no_rdap_200_no_error_thick_whois_stays_in_whois_path() {
1436        let mut w = empty_whois("registered.com");
1437        w.registrar = Some("Example Registrar Ltd".to_string());
1438        // GraceTimeout-like: rdap_returned_200=false, no error, and WHOIS
1439        // does not look free. Must return None so the caller picks
1440        // `LookupResult::Whois`.
1441        assert!(should_route_to_availability(false, None, &w).is_none());
1442    }
1443
1444    // ---------------- nxdomain_confirms_available ----------------
1445
1446    #[test]
1447    fn nxdomain_confirms_available_thin_no200_absent() {
1448        assert!(nxdomain_confirms_available(
1449            true,
1450            false,
1451            DnsPresence::Absent
1452        ));
1453    }
1454
1455    #[test]
1456    fn nxdomain_confirms_available_vetoed_by_rdap_200() {
1457        // A 200 from RDAP (object exists) must veto the NXDOMAIN signal even
1458        // if the apex currently has no delegation.
1459        assert!(!nxdomain_confirms_available(
1460            true,
1461            true,
1462            DnsPresence::Absent
1463        ));
1464    }
1465
1466    #[test]
1467    fn nxdomain_confirms_available_requires_thin_whois() {
1468        // A WHOIS body with real data is never overridden by DNS.
1469        assert!(!nxdomain_confirms_available(
1470            false,
1471            false,
1472            DnsPresence::Absent
1473        ));
1474    }
1475
1476    #[test]
1477    fn nxdomain_confirms_available_requires_absent_dns() {
1478        assert!(!nxdomain_confirms_available(
1479            true,
1480            false,
1481            DnsPresence::Present
1482        ));
1483        assert!(!nxdomain_confirms_available(
1484            true,
1485            false,
1486            DnsPresence::Unknown
1487        ));
1488    }
1489
1490    // ---------------- dns_present_confirms_registered ----------------
1491
1492    #[test]
1493    fn dns_present_confirms_registered_thin_no200_present() {
1494        // The zac.email / Identity-Digital case: a no-service WHOIS leg, RDAP
1495        // unavailable (throttled / grace-truncated), but the apex IS delegated
1496        // in DNS — the domain is registered and must not render as blank.
1497        assert!(dns_present_confirms_registered(
1498            true,
1499            false,
1500            DnsPresence::Present
1501        ));
1502    }
1503
1504    #[test]
1505    fn dns_present_confirms_registered_requires_present_dns() {
1506        // NXDOMAIN is the "available" signal, not "registered"; Unknown is not
1507        // positive evidence of registration.
1508        assert!(!dns_present_confirms_registered(
1509            true,
1510            false,
1511            DnsPresence::Absent
1512        ));
1513        assert!(!dns_present_confirms_registered(
1514            true,
1515            false,
1516            DnsPresence::Unknown
1517        ));
1518    }
1519
1520    #[test]
1521    fn dns_present_confirms_registered_requires_thin_whois() {
1522        // A WHOIS body with real registration data uses the normal Whois path.
1523        assert!(!dns_present_confirms_registered(
1524            false,
1525            false,
1526            DnsPresence::Present
1527        ));
1528    }
1529
1530    #[test]
1531    fn dns_present_confirms_registered_vetoed_by_rdap_200() {
1532        // A thin RDAP 200 already proves the object exists; keep that path.
1533        assert!(!dns_present_confirms_registered(
1534            true,
1535            true,
1536            DnsPresence::Present
1537        ));
1538    }
1539
1540    // ---------------- Mutex poisoning recovery ----------------
1541
1542    /// Regression: a panic inside `LOOKUP_INFLIGHT.lock()` must not wedge
1543    /// the tracker forever. After the mutex is poisoned, subsequent
1544    /// acquisition attempts must still succeed via `unwrap_or_else`.
1545    ///
1546    /// This isolates the lookup_with_progress acquisition site (formerly a
1547    /// `.expect("LOOKUP_INFLIGHT mutex poisoned")`) by exercising the same
1548    /// `.lock().unwrap_or_else(|p| p.into_inner())` pattern directly.
1549    #[test]
1550    fn lookup_inflight_recovers_from_poisoned_mutex() {
1551        use std::panic::{catch_unwind, AssertUnwindSafe};
1552
1553        // Serialize with sibling tests that also touch LOOKUP_INFLIGHT.
1554        let _serial = INFLIGHT_TEST_SERIAL
1555            .lock()
1556            .unwrap_or_else(|p| p.into_inner());
1557
1558        // Poison the real static by panicking while holding the guard.
1559        let _ = catch_unwind(AssertUnwindSafe(|| {
1560            let _guard = LOOKUP_INFLIGHT.lock().unwrap();
1561            panic!("poisoning LOOKUP_INFLIGHT for test");
1562        }));
1563
1564        // At this point LOOKUP_INFLIGHT is poisoned. Plain .lock() would
1565        // return Err(PoisonError). The recovery pattern used in
1566        // lookup_with_progress must still yield a usable guard.
1567        let mut guard = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1568        // Use a per-run unique canary so parallel tests cannot collide.
1569        let canary = unique_test_key("__poison_recovery");
1570        guard.insert(canary.clone(), Weak::new());
1571        assert!(guard.contains_key(&canary));
1572        guard.remove(&canary);
1573    }
1574
1575    /// Regression: InflightGuard::drop must also tolerate mutex poisoning
1576    /// without panicking — the Poisoned arm should still remove the entry.
1577    #[test]
1578    fn inflight_guard_drop_recovers_from_poisoned_mutex() {
1579        use std::panic::{catch_unwind, AssertUnwindSafe};
1580
1581        // Serialize with sibling tests that also touch LOOKUP_INFLIGHT —
1582        // the critical race was `InflightGuard::drop` using `try_lock`
1583        // and silently skipping cleanup when a parallel test held the
1584        // mutex, leaving this test's entry in the map and failing the
1585        // final assertion.
1586        let _serial = INFLIGHT_TEST_SERIAL
1587            .lock()
1588            .unwrap_or_else(|p| p.into_inner());
1589
1590        // Seed an entry and arm a guard for it. Use a per-run unique key
1591        // so this test can never collide with siblings under parallel
1592        // `cargo test` — previously a hard-coded key raced with the peer
1593        // coalescing test's `m.clear()` call.
1594        let key = unique_test_key("__drop_poison");
1595        let notify = Arc::new(Notify::new());
1596        {
1597            let mut map = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1598            map.insert(key.clone(), Arc::downgrade(&notify));
1599        }
1600        let guard = InflightGuard {
1601            key: key.clone(),
1602            notify: notify.clone(),
1603        };
1604
1605        // Poison the mutex.
1606        let _ = catch_unwind(AssertUnwindSafe(|| {
1607            let _g = LOOKUP_INFLIGHT.lock().unwrap();
1608            panic!("poisoning LOOKUP_INFLIGHT for drop test");
1609        }));
1610
1611        // Dropping the guard must not panic and must remove the entry via
1612        // the Poisoned branch of the new try_lock match.
1613        drop(guard);
1614
1615        let map = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1616        assert!(
1617            !map.contains_key(&key),
1618            "poisoned-mutex drop path should still remove the in-flight entry"
1619        );
1620    }
1621}