Skip to main content

seer_core/
lookup.rs

1use std::collections::HashMap;
2use std::net::Ipv6Addr;
3use std::str::FromStr;
4use std::sync::{Arc, Mutex, Weak};
5use std::time::Duration;
6
7use chrono::{DateTime, Utc};
8use once_cell::sync::Lazy;
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use tokio::sync::Notify;
12use tracing::{debug, instrument, warn};
13
14use tokio::time::timeout as tokio_timeout;
15
16use crate::availability::{AvailabilityChecker, AvailabilityResult};
17use crate::cache::TtlCache;
18use crate::dns::{DnsPresence, DnsResolver};
19use crate::error::{Result, SeerError};
20use crate::rdap::{rdap_error_is_404, RdapClient, RdapResponse};
21use crate::whois::{get_registry_url, get_tld, WhoisClient, WhoisResponse};
22
23/// Cache TTL for lookup results (5 minutes).
24const LOOKUP_CACHE_TTL: Duration = Duration::from_secs(5 * 60);
25
26/// Grace period for the second protocol after the first one finishes.
27/// If WHOIS finishes and RDAP hasn't responded within this window, we
28/// use the WHOIS result rather than waiting the full RDAP timeout.
29const PROTOCOL_GRACE_PERIOD: Duration = Duration::from_secs(5);
30
31/// Maximum length for public-facing error strings.
32const MAX_PUBLIC_ERROR_LEN: usize = 256;
33
34/// Global cache for lookup results to avoid redundant network calls.
35static LOOKUP_CACHE: Lazy<TtlCache<String, LookupResult>> =
36    Lazy::new(|| TtlCache::new(LOOKUP_CACHE_TTL));
37
38/// In-flight lookup coalescing map: normalized-domain -> Weak<Notify>.
39/// Only one network race runs per unique domain at a time; concurrent callers
40/// wait on the shared Notify and then read the result from LOOKUP_CACHE.
41static LOOKUP_INFLIGHT: Lazy<Mutex<HashMap<String, Weak<Notify>>>> =
42    Lazy::new(|| Mutex::new(HashMap::new()));
43
44/// Regex patterns for stripping IP literals from public error messages.
45static IPV4_RE: Lazy<Regex> =
46    Lazy::new(|| Regex::new(r"\b(?:\d{1,3}\.){3}\d{1,3}\b").expect("IPV4_RE is a valid regex"));
47
48/// Candidate pattern for IPv6 literals: a hex/colon token containing either
49/// a `::` compression or at least three colons. This catches plausible IPv6
50/// addresses cheaply; each match is then validated by `Ipv6Addr::from_str`
51/// before redaction, so MAC fragments, hex hashes, and similar colon-laden
52/// tokens are left alone.
53static IPV6_CANDIDATE_RE: Lazy<Regex> = Lazy::new(|| {
54    Regex::new(r"\b[0-9a-fA-F:]*(?:::|(?:[0-9a-fA-F]{1,4}:){3,})[0-9a-fA-F:]*\b")
55        .expect("IPV6_CANDIDATE_RE is a valid regex")
56});
57
58/// Redact substrings that parse as valid IPv6 addresses, leaving non-IPv6
59/// tokens (e.g. `af:ba:12`) untouched.
60fn strip_ipv6(msg: &str) -> String {
61    IPV6_CANDIDATE_RE
62        .replace_all(msg, |caps: &regex::Captures| {
63            let candidate = &caps[0];
64            if Ipv6Addr::from_str(candidate).is_ok() {
65                "[ip-redacted]".to_string()
66            } else {
67                candidate.to_string()
68            }
69        })
70        .into_owned()
71}
72
73/// Test-only hook: counts the number of times `lookup_concurrent` is actually
74/// invoked (i.e., the underlying network race runs). Used to verify request
75/// coalescing. Not exposed outside the crate.
76#[cfg(test)]
77static LOOKUP_CONCURRENT_CALLS: Lazy<std::sync::atomic::AtomicUsize> =
78    Lazy::new(|| std::sync::atomic::AtomicUsize::new(0));
79
80/// Returns true if the parsed WHOIS response lacks all key registration
81/// signals: no registrar, no creation date, and no expiration date.
82///
83/// This is a necessary-but-not-sufficient signal for domain availability;
84/// `lookup_concurrent` combines it with an RDAP 404 before routing to the
85/// availability path. Nameservers alone don't disqualify thinness — some
86/// registries return placeholder nameservers for unregistered domains.
87fn whois_response_is_thin(w: &WhoisResponse) -> bool {
88    w.registrar.is_none() && w.creation_date.is_none() && w.expiration_date.is_none()
89}
90
91/// Decides whether a WHOIS response + RDAP error combination should route
92/// to the availability path. Returns `(confidence, method)` when routing is
93/// warranted, `None` to keep the existing `LookupResult::Whois` behavior.
94///
95/// Case A: WHOIS explicitly indicates no registration (highest priority).
96/// Case B: WHOIS returned but lacks registration data AND RDAP returned 404.
97fn classify_whois_leg(
98    w: &WhoisResponse,
99    rdap_err: &SeerError,
100) -> Option<(&'static str, &'static str)> {
101    if w.is_available() {
102        return Some(("high", "whois"));
103    }
104    if whois_response_is_thin(w) && rdap_error_is_404(rdap_err) {
105        return Some(("medium", "whois_thin_response"));
106    }
107    None
108}
109
110/// Wraps `classify_whois_leg` with the "RDAP returned 200" veto: a successful
111/// RDAP response (HTTP 200, even if the body is thin) is positive evidence
112/// that the domain object exists, so we never let a WHOIS-only signal flip
113/// the verdict to "available" in that case. This guards against WHOIS
114/// propagation lag against freshly-provisioned domains the registry has
115/// already begun serving via RDAP. v0.26.6 regression fix.
116fn should_route_to_availability(
117    rdap_returned_200: bool,
118    rdap_seer_error: Option<&SeerError>,
119    whois_data: &WhoisResponse,
120) -> Option<(&'static str, &'static str)> {
121    if rdap_returned_200 {
122        return None;
123    }
124    // `is_available()` streams the raw response (~1 MB worst case) line by
125    // line. Compute it once and reuse — `classify_whois_leg` also calls it,
126    // so the original code paid the scan twice on every non-404 RDAP-error
127    // path. We pre-check Case A here; if it doesn't fire we drop into the
128    // 404+thin Case B branch via `classify_whois_leg`.
129    if whois_data.is_available() {
130        return Some(("high", "whois"));
131    }
132    rdap_seer_error.and_then(|e| {
133        // Case B only: WHOIS is not available, so the only remaining path
134        // is "thin WHOIS + RDAP 404". `classify_whois_leg` will re-check
135        // `is_available()` for free (it's false now), so this is a single
136        // additional thin-check call.
137        classify_whois_leg(whois_data, e)
138    })
139}
140
141/// Decides whether a thin WHOIS leg should be reclassified as "available" on
142/// the strength of a DNS NXDOMAIN. Pure so the veto rules are unit-tested
143/// without a resolver.
144///
145/// Routes to availability only when ALL hold:
146/// * the WHOIS body was thin — no registrar/dates (`is_thin`),
147/// * RDAP did NOT return an HTTP 200 (`rdap_returned_200` is false) — a 200,
148///   even with a thin body, proves the domain object exists, and
149/// * the apex has no DNS presence ([`DnsPresence::Absent`] / NXDOMAIN).
150fn nxdomain_confirms_available(is_thin: bool, rdap_returned_200: bool, dns: DnsPresence) -> bool {
151    is_thin && !rdap_returned_200 && matches!(dns, DnsPresence::Absent)
152}
153
154/// Sanitizes an error message for inclusion in a public-facing response.
155///
156/// Strips IPv4 and IPv6 literals (to avoid leaking internal addresses when
157/// an SSRF guard rejects a resolved URL) and caps the total length to
158/// [`MAX_PUBLIC_ERROR_LEN`] characters.
159fn sanitize_error_for_public(msg: &str) -> String {
160    let s = IPV4_RE.replace_all(msg, "[ip-redacted]");
161    let s = strip_ipv6(&s);
162    if s.chars().count() > MAX_PUBLIC_ERROR_LEN {
163        let mut trunc: String = s.chars().take(MAX_PUBLIC_ERROR_LEN).collect();
164        trunc.push('…');
165        trunc
166    } else {
167        s
168    }
169}
170
171/// RAII guard for the in-flight-lookup slot. On drop, removes the entry
172/// from `LOOKUP_INFLIGHT` and notifies any waiters so they can read the
173/// freshly-populated cache.
174///
175/// NOTE on failed-owner retry semantics:
176/// When the owning task's lookup fails, `InflightGuard::drop` runs, the
177/// `HashMap` entry is removed, and `notify_waiters()` fires. Waiters wake,
178/// observe an empty cache, and one of them becomes the new owner — triggering
179/// a fresh network race. This means transient failures are automatically
180/// retried by any concurrent waiter. Callers that observe a timeout error
181/// should not assume no work is in flight; another concurrent caller may
182/// already be retrying.
183struct InflightGuard {
184    key: String,
185    notify: Arc<Notify>,
186}
187
188impl Drop for InflightGuard {
189    fn drop(&mut self) {
190        // Always remove the entry before notifying. The earlier `try_lock`
191        // design skipped removal under contention, but that left a stale
192        // `Weak<Notify>` in the map: a caller arriving in the brief window
193        // between `notify_waiters()` firing and the owner's `Arc<Notify>`
194        // dropping could upgrade the Weak, register as a waiter on the
195        // already-fired Notify, and block forever (notify_waiters only
196        // wakes currently-registered waiters; it does not accumulate
197        // permits for later registrations).
198        //
199        // Contention windows on this `std::sync::Mutex<HashMap>` are
200        // microseconds — the brief block here is safer than the stale-entry
201        // hazard. Poisoned-mutex recovery is preserved.
202        let mut inflight = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
203        inflight.remove(&self.key);
204        drop(inflight);
205        self.notify.notify_waiters();
206    }
207}
208
209/// Internal classification of the RDAP leg of a concurrent lookup.
210///
211/// Distinguishing `NoData` (HTTP 200 but response was missing useful fields)
212/// from `Error` lets the orchestrator prefer a thin WHOIS result over the
213/// availability fallback when RDAP silently returned nothing.
214enum RdapOutcome {
215    Useful(RdapResponse),
216    NoData(RdapResponse),
217    Error(SeerError),
218    /// RDAP future did not complete within the grace period after the other
219    /// protocol finished.
220    GraceTimeout,
221}
222
223/// Progress callback for smart lookup operations.
224/// Called with a message describing the current phase of the lookup.
225pub type LookupProgressCallback = Arc<dyn Fn(&str) + Send + Sync>;
226
227#[derive(Debug, Clone, Serialize, Deserialize)]
228#[serde(tag = "source", rename_all = "lowercase")]
229pub enum LookupResult {
230    Rdap {
231        data: Box<RdapResponse>,
232        #[serde(skip_serializing_if = "Option::is_none")]
233        whois_fallback: Option<WhoisResponse>,
234    },
235    Whois {
236        data: WhoisResponse,
237        rdap_error: Option<String>,
238        #[serde(skip_serializing_if = "Option::is_none")]
239        rdap_fallback: Option<Box<RdapResponse>>,
240    },
241    Available {
242        data: Box<AvailabilityResult>,
243        rdap_error: String,
244        whois_error: String,
245        /// Raw WHOIS response, when one was available at routing time
246        /// (Cases A and B in the design spec). `None` preserves the
247        /// pre-existing "both protocols errored" semantics.
248        #[serde(default, skip_serializing_if = "Option::is_none")]
249        whois_data: Option<WhoisResponse>,
250    },
251}
252
253impl LookupResult {
254    /// Returns the domain name from the lookup result.
255    pub fn domain_name(&self) -> Option<String> {
256        match self {
257            LookupResult::Rdap { data, .. } => data.domain_name().map(String::from),
258            LookupResult::Whois { data, .. } => Some(data.domain.clone()),
259            LookupResult::Available { data, .. } => Some(data.domain.clone()),
260        }
261    }
262
263    /// Returns the registrar name, preferring RDAP data with WHOIS fallback.
264    pub fn registrar(&self) -> Option<String> {
265        match self {
266            LookupResult::Rdap {
267                data,
268                whois_fallback,
269            } => data
270                .get_registrar()
271                .or_else(|| whois_fallback.as_ref().and_then(|w| w.registrar.clone())),
272            LookupResult::Whois { data, .. } => data.registrar.clone(),
273            LookupResult::Available { .. } => None,
274        }
275    }
276
277    /// Returns the registrant organization, preferring RDAP data with WHOIS fallback.
278    pub fn organization(&self) -> Option<String> {
279        match self {
280            LookupResult::Rdap {
281                data,
282                whois_fallback,
283            } => data
284                .get_registrant_organization()
285                .or_else(|| whois_fallback.as_ref().and_then(|w| w.organization.clone())),
286            LookupResult::Whois { data, .. } => data.organization.clone(),
287            LookupResult::Available { .. } => None,
288        }
289    }
290
291    /// Returns true if the result came from RDAP.
292    pub fn is_rdap(&self) -> bool {
293        matches!(self, LookupResult::Rdap { .. })
294    }
295
296    /// Returns true if the result came from WHOIS.
297    pub fn is_whois(&self) -> bool {
298        matches!(self, LookupResult::Whois { .. })
299    }
300
301    /// Returns true if the result is an availability check fallback.
302    pub fn is_available(&self) -> bool {
303        matches!(self, LookupResult::Available { .. })
304    }
305
306    /// Returns the expiration date and registrar info from the lookup result.
307    pub fn expiration_info(&self) -> (Option<DateTime<Utc>>, Option<String>) {
308        match self {
309            LookupResult::Rdap {
310                data,
311                whois_fallback,
312            } => {
313                // Try to get expiration from RDAP events
314                let expiration_date = data
315                    .events
316                    .iter()
317                    .find(|e| e.event_action == "expiration")
318                    .and_then(|e| e.parsed_date())
319                    .or_else(|| {
320                        // Fallback to WHOIS if available
321                        whois_fallback.as_ref().and_then(|w| w.expiration_date)
322                    });
323
324                let registrar = data
325                    .get_registrar()
326                    .or_else(|| whois_fallback.as_ref().and_then(|w| w.registrar.clone()));
327
328                (expiration_date, registrar)
329            }
330            LookupResult::Whois { data, .. } => (data.expiration_date, data.registrar.clone()),
331            LookupResult::Available { .. } => (None, None),
332        }
333    }
334}
335
336/// Before caching, trim raw WHOIS response to limit cache memory.
337/// A full WHOIS raw_response can be up to 1 MB; we cap it at 32 KB which is
338/// plenty for the parsed fields while preventing the cache from ballooning.
339fn trim_for_cache(mut result: LookupResult) -> LookupResult {
340    const MAX_RAW: usize = 32 * 1024;
341
342    match result {
343        LookupResult::Whois { ref mut data, .. } => {
344            if data.raw_response.len() > MAX_RAW {
345                data.raw_response.truncate(MAX_RAW);
346                data.raw_response.push_str("\n... [truncated for cache]");
347            }
348        }
349        LookupResult::Rdap {
350            ref mut whois_fallback,
351            ..
352        } => {
353            if let Some(ref mut w) = whois_fallback {
354                if w.raw_response.len() > MAX_RAW {
355                    w.raw_response.truncate(MAX_RAW);
356                    w.raw_response.push_str("\n... [truncated for cache]");
357                }
358            }
359        }
360        LookupResult::Available {
361            ref mut whois_data, ..
362        } => {
363            if let Some(ref mut w) = whois_data {
364                if w.raw_response.len() > MAX_RAW {
365                    w.raw_response.truncate(MAX_RAW);
366                    w.raw_response.push_str("\n... [truncated for cache]");
367                }
368            }
369        }
370    }
371
372    result
373}
374
375#[derive(Debug, Clone)]
376pub struct SmartLookup {
377    rdap_client: RdapClient,
378    whois_client: WhoisClient,
379    availability_checker: AvailabilityChecker,
380    dns_resolver: DnsResolver,
381    /// Deprecated: both protocols are now always attempted concurrently.
382    prefer_rdap: bool,
383    /// Deprecated: WHOIS data is now always attached when available.
384    include_fallback: bool,
385}
386
387impl Default for SmartLookup {
388    fn default() -> Self {
389        Self::new()
390    }
391}
392
393impl SmartLookup {
394    /// Creates a new SmartLookup that runs RDAP and WHOIS concurrently,
395    /// falling back to an availability check if both fail.
396    pub fn new() -> Self {
397        Self {
398            rdap_client: RdapClient::new(),
399            whois_client: WhoisClient::new(),
400            availability_checker: AvailabilityChecker::new(),
401            dns_resolver: DnsResolver::new(),
402            prefer_rdap: true,
403            include_fallback: false,
404        }
405    }
406
407    /// Deprecated: both protocols are now always attempted concurrently.
408    /// This method is kept for API compatibility but has no effect.
409    #[deprecated(note = "This field has no effect. RDAP is always tried concurrently with WHOIS.")]
410    pub fn prefer_rdap(mut self, prefer: bool) -> Self {
411        self.prefer_rdap = prefer;
412        self
413    }
414
415    /// Deprecated: WHOIS data is now always attached when available.
416    /// This method is kept for API compatibility but has no effect.
417    #[deprecated(note = "This field has no effect. RDAP is always tried concurrently with WHOIS.")]
418    pub fn include_fallback(mut self, include: bool) -> Self {
419        self.include_fallback = include;
420        self
421    }
422
423    /// Performs a smart lookup for a domain, trying both RDAP and WHOIS concurrently.
424    /// Falls back to an availability check if both fail.
425    /// Results are cached for 5 minutes to avoid redundant network calls.
426    #[instrument(skip(self), fields(domain = %domain))]
427    pub async fn lookup(&self, domain: &str) -> Result<LookupResult> {
428        self.lookup_with_progress(domain, None).await
429    }
430
431    /// Performs a lookup with an optional progress callback.
432    /// The callback is called with messages describing the current phase.
433    /// Results are cached for 5 minutes. Concurrent lookups for the same
434    /// domain are coalesced — only one network race runs per domain at a time.
435    #[instrument(skip(self, progress), fields(domain = %domain))]
436    pub async fn lookup_with_progress(
437        &self,
438        domain: &str,
439        progress: Option<LookupProgressCallback>,
440    ) -> Result<LookupResult> {
441        let normalized = crate::validation::normalize_domain(domain)?;
442
443        // Check cache first
444        if let Some(cached) = LOOKUP_CACHE.get(&normalized) {
445            debug!(domain = %normalized, "Returning cached lookup result");
446            return Ok(cached);
447        }
448
449        // Coalesce in-flight lookups: if another task is already running a
450        // race for this domain, wait on its Notify rather than starting a
451        // second race. Two branches:
452        //   - Waiter: another task owns the slot; await its notify, then
453        //     read the cache. If the cache is still empty (owner failed),
454        //     loop and re-contend for ownership.
455        //   - Owner: no entry exists; insert a Weak handle, hold the Arc
456        //     for the duration of the work, then remove and notify on drop.
457        //
458        // A `loop` with a separate lock-scope per iteration keeps the
459        // `MutexGuard` from being held across any `.await`.
460        let _guard = loop {
461            enum Slot {
462                Waiter(Arc<Notify>),
463                Owner(InflightGuard),
464            }
465
466            let slot = {
467                // Recover from poisoning rather than panicking: a prior
468                // owner's panic should not permanently wedge the in-flight
469                // tracker for every future lookup.
470                let mut inflight = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
471                match inflight.get(&normalized).and_then(|w| w.upgrade()) {
472                    Some(existing) => Slot::Waiter(existing),
473                    None => {
474                        let n = Arc::new(Notify::new());
475                        inflight.insert(normalized.clone(), Arc::downgrade(&n));
476                        Slot::Owner(InflightGuard {
477                            key: normalized.clone(),
478                            notify: n,
479                        })
480                    }
481                }
482            };
483
484            match slot {
485                Slot::Waiter(n) => {
486                    debug!(domain = %normalized, "Waiting for in-flight lookup to complete");
487                    n.notified().await;
488                    if let Some(cached) = LOOKUP_CACHE.get(&normalized) {
489                        return Ok(cached);
490                    }
491                    // Owner finished without populating the cache (failed
492                    // or errored). Re-contend for ownership.
493                    continue;
494                }
495                Slot::Owner(guard) => break guard,
496            }
497        };
498
499        let result = self.lookup_concurrent(&normalized, progress).await?;
500
501        // Cache a trimmed copy to limit memory usage before releasing
502        // waiters (via guard drop) so they observe the cached value.
503        LOOKUP_CACHE.insert(normalized.clone(), trim_for_cache(result.clone()));
504
505        Ok(result)
506    }
507
508    /// Clears the lookup result cache.
509    pub fn clear_cache() {
510        LOOKUP_CACHE.clear();
511    }
512
513    #[instrument(skip(self, progress), fields(domain = %domain))]
514    async fn lookup_concurrent(
515        &self,
516        domain: &str,
517        progress: Option<LookupProgressCallback>,
518    ) -> Result<LookupResult> {
519        #[cfg(test)]
520        LOOKUP_CONCURRENT_CALLS.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
521
522        debug!(domain = %domain, "Attempting RDAP and WHOIS concurrently");
523
524        if let Some(ref cb) = progress {
525            cb("Querying RDAP and WHOIS concurrently");
526        }
527
528        let rdap_fut = self.rdap_client.lookup_domain(domain);
529        let whois_fut = self.whois_client.lookup(domain);
530
531        tokio::pin!(rdap_fut);
532        tokio::pin!(whois_fut);
533
534        // Race: whichever finishes first gets a grace period for the other.
535        //
536        // We track whether each side completed naturally or was truncated by
537        // the grace period, so downstream error messages can distinguish a
538        // true timeout from a loser-truncation.
539        enum LegOutcome<T> {
540            Completed(T),
541            GraceTruncated,
542        }
543
544        let (rdap_leg, whois_leg) = tokio::select! {
545            rdap_res = &mut rdap_fut => {
546                // RDAP finished first — give WHOIS a grace period
547                let whois_leg = match tokio_timeout(PROTOCOL_GRACE_PERIOD, whois_fut).await {
548                    Ok(res) => LegOutcome::Completed(res),
549                    Err(_) => {
550                        debug!("WHOIS did not finish within grace period, proceeding with RDAP only");
551                        LegOutcome::GraceTruncated
552                    }
553                };
554                (LegOutcome::Completed(rdap_res), whois_leg)
555            }
556            whois_res = &mut whois_fut => {
557                // WHOIS finished first — give RDAP a grace period
558                let rdap_leg = match tokio_timeout(PROTOCOL_GRACE_PERIOD, rdap_fut).await {
559                    Ok(res) => LegOutcome::Completed(res),
560                    Err(_) => {
561                        debug!("RDAP did not finish within grace period, proceeding with WHOIS only");
562                        LegOutcome::GraceTruncated
563                    }
564                };
565                (rdap_leg, LegOutcome::Completed(whois_res))
566            }
567        };
568
569        // Classify the RDAP leg.
570        let rdap_outcome = match rdap_leg {
571            LegOutcome::Completed(Ok(data)) => {
572                if self.is_rdap_response_useful(&data) {
573                    RdapOutcome::Useful(data)
574                } else {
575                    RdapOutcome::NoData(data)
576                }
577            }
578            LegOutcome::Completed(Err(e)) => RdapOutcome::Error(e),
579            LegOutcome::GraceTruncated => RdapOutcome::GraceTimeout,
580        };
581
582        // Phase 1: If RDAP returned useful data, use it as primary.
583        if let RdapOutcome::Useful(rdap_data) = rdap_outcome {
584            debug!("RDAP lookup successful");
585            let whois_fallback = match whois_leg {
586                LegOutcome::Completed(Ok(w)) => Some(w),
587                _ => None,
588            };
589            return Ok(LookupResult::Rdap {
590                data: Box::new(rdap_data),
591                whois_fallback,
592            });
593        }
594
595        // RDAP was not useful (NoData, Error, or GraceTimeout). Prefer WHOIS
596        // if it returned any response, even a thin one — this is safer than
597        // falling back to the availability heuristic when we have actual
598        // registry data in hand.
599        //
600        // We separately track whether RDAP returned an HTTP 200 (NoData):
601        // even a thin RDAP 200 is positive evidence the domain object
602        // exists. In that case we must NOT reclassify a WHOIS "no match"
603        // signal as availability — WHOIS lag against a freshly-provisioned
604        // domain would otherwise produce a false "available" verdict.
605        let rdap_returned_200 = matches!(rdap_outcome, RdapOutcome::NoData(_));
606        let (rdap_error_str, rdap_fallback_data, rdap_seer_error) = match rdap_outcome {
607            RdapOutcome::Useful(_) => {
608                // Unreachable in this branch (we returned above), but handle
609                // defensively rather than panicking across the FFI boundary.
610                debug!("Unexpected RdapOutcome::Useful in fallback branch");
611                (String::from("RDAP ok"), None, None)
612            }
613            RdapOutcome::NoData(data) => (
614                "RDAP response incomplete".to_string(),
615                Some(Box::new(data)),
616                None,
617            ),
618            RdapOutcome::Error(e) => (e.to_string(), None, Some(e)),
619            RdapOutcome::GraceTimeout => (
620                format!(
621                    "RDAP did not return within {}s grace period after WHOIS won",
622                    PROTOCOL_GRACE_PERIOD.as_secs()
623                ),
624                None,
625                None,
626            ),
627        };
628
629        if let LegOutcome::Completed(Ok(whois_data)) = whois_leg {
630            // Check Cases A and B: should we reclassify as Available? The
631            // `should_route_to_availability` helper also enforces the
632            // "RDAP returned 200 vetoes WHOIS availability claims" rule.
633            let availability_match = should_route_to_availability(
634                rdap_returned_200,
635                rdap_seer_error.as_ref(),
636                &whois_data,
637            );
638
639            if let Some((confidence, method)) = availability_match {
640                debug!(
641                    domain = %domain,
642                    confidence = %confidence,
643                    "Reclassifying WHOIS as availability signal"
644                );
645                if let Some(ref cb) = progress {
646                    cb("Domain appears unregistered");
647                }
648                let details = match confidence {
649                    "high" => Some("WHOIS indicates domain is not registered".to_string()),
650                    "medium" => Some(
651                        "WHOIS returned no registrar or registration dates; RDAP returned 404"
652                            .to_string(),
653                    ),
654                    _ => None,
655                };
656                let avail = AvailabilityResult {
657                    domain: domain.to_string(),
658                    available: true,
659                    confidence: confidence.to_string(),
660                    method: method.to_string(),
661                    details,
662                };
663                return Ok(LookupResult::Available {
664                    data: Box::new(avail),
665                    rdap_error: sanitize_error_for_public(&rdap_error_str),
666                    whois_error: String::new(),
667                    whois_data: Some(whois_data),
668                });
669            }
670
671            // Fix #2 safety net: a thin WHOIS body plus an RDAP failure that
672            // was not an authoritative 404 leaves us without registry data.
673            // If the apex also has no DNS presence (NXDOMAIN), reclassify as
674            // likely-available rather than emitting an empty WHOIS record. The
675            // cheap thin / not-200 preconditions gate the DNS probe so we
676            // don't pay for it on the common paths.
677            let whois_is_thin = whois_response_is_thin(&whois_data);
678            if whois_is_thin && !rdap_returned_200 {
679                let dns_presence = self.dns_resolver.presence(domain).await;
680                if nxdomain_confirms_available(whois_is_thin, rdap_returned_200, dns_presence) {
681                    debug!(domain = %domain, "Thin WHOIS + NXDOMAIN, reclassifying as available");
682                    if let Some(ref cb) = progress {
683                        cb("Domain appears unregistered (no DNS presence)");
684                    }
685                    let avail = AvailabilityResult {
686                        domain: domain.to_string(),
687                        available: true,
688                        confidence: "medium".to_string(),
689                        method: "dns_nxdomain".to_string(),
690                        details: Some(
691                            "No registry data available; domain has no DNS presence (NXDOMAIN)"
692                                .to_string(),
693                        ),
694                    };
695                    return Ok(LookupResult::Available {
696                        data: Box::new(avail),
697                        rdap_error: sanitize_error_for_public(&rdap_error_str),
698                        whois_error: String::new(),
699                        whois_data: Some(whois_data),
700                    });
701                }
702            }
703            debug!("Using WHOIS result (RDAP not useful)");
704            if let Some(ref cb) = progress {
705                cb("RDAP not available (using WHOIS)");
706            }
707            return Ok(LookupResult::Whois {
708                data: whois_data,
709                rdap_error: Some(rdap_error_str),
710                rdap_fallback: rdap_fallback_data,
711            });
712        }
713
714        // Both sides failed to provide useful data. Craft a precise WHOIS
715        // error string that distinguishes true errors from grace-period
716        // truncation.
717        let whois_error_str = match whois_leg {
718            LegOutcome::Completed(Err(e)) => e.to_string(),
719            LegOutcome::Completed(Ok(_)) => {
720                // Already handled above; treat defensively.
721                debug!("Unexpected completed-Ok WHOIS in availability fallback branch");
722                "WHOIS returned but was not used".to_string()
723            }
724            LegOutcome::GraceTruncated => format!(
725                "WHOIS did not return within {}s grace period after RDAP won",
726                PROTOCOL_GRACE_PERIOD.as_secs()
727            ),
728        };
729
730        self.availability_fallback(domain, rdap_error_str, whois_error_str, progress)
731            .await
732    }
733
734    async fn availability_fallback(
735        &self,
736        domain: &str,
737        rdap_error: String,
738        whois_error: String,
739        progress: Option<LookupProgressCallback>,
740    ) -> Result<LookupResult> {
741        if let Some(ref cb) = progress {
742            cb("RDAP and WHOIS unavailable (checking availability)");
743        }
744        warn!(
745            domain = %domain,
746            rdap_error = %rdap_error,
747            whois_error = %whois_error,
748            "Both RDAP and WHOIS failed, falling back to availability check"
749        );
750
751        match self.availability_checker.check(domain).await {
752            Ok(avail) => Ok(LookupResult::Available {
753                data: Box::new(avail),
754                rdap_error: sanitize_error_for_public(&rdap_error),
755                whois_error: sanitize_error_for_public(&whois_error),
756                whois_data: None,
757            }),
758            Err(avail_err) => {
759                let tld = get_tld(domain).unwrap_or("unknown");
760                let registry_url = get_registry_url(tld).unwrap_or_else(|| {
761                    format!("https://www.iana.org/domains/root/db/{}.html", tld)
762                });
763                Err(SeerError::LookupFailed {
764                    domain: domain.to_string(),
765                    details: format!(
766                        "RDAP failed ({}), WHOIS failed ({}), availability check failed ({})",
767                        rdap_error, whois_error, avail_err
768                    ),
769                    registry_url,
770                })
771            }
772        }
773    }
774
775    fn is_rdap_response_useful(&self, response: &RdapResponse) -> bool {
776        // Check if we have at least some meaningful data
777        let has_name = response.ldh_name.is_some() || response.unicode_name.is_some();
778        let has_dates = response
779            .events
780            .iter()
781            .any(|e| e.event_action == "registration" || e.event_action == "expiration");
782        let has_entities = !response.entities.is_empty();
783        let has_nameservers = !response.nameservers.is_empty();
784        let has_status = !response.status.is_empty();
785
786        // Consider useful if we have the name plus at least one other piece of info
787        has_name && (has_dates || has_entities || has_nameservers || has_status)
788    }
789}
790
791#[cfg(test)]
792mod tests {
793    use super::*;
794
795    /// Global serialization mutex for the three tests that share
796    /// `LOOKUP_INFLIGHT` state (coalescing, poison recovery, drop recovery).
797    /// Running them in parallel creates two races:
798    ///   1. Guard drop uses `try_lock`; if another test holds the mutex, the
799    ///      Drop path skips cleanup → stale entries fail later assertions.
800    ///   2. Poisoning one test leaves the mutex poisoned for the next test,
801    ///      which is handled by `unwrap_or_else` but still disturbs state.
802    /// Per-test unique keys (see `unique_test_key`) prevent entry-level
803    /// collisions; this mutex prevents lock-contention races on Drop.
804    static INFLIGHT_TEST_SERIAL: Mutex<()> = Mutex::new(());
805
806    #[test]
807    fn test_lookup_result_domain_name_whois() {
808        let result = LookupResult::Whois {
809            data: WhoisResponse {
810                domain: "example.com".to_string(),
811                registrar: Some("Test Registrar".to_string()),
812                registrant: None,
813                organization: None,
814                registrant_email: None,
815                registrant_phone: None,
816                registrant_address: None,
817                registrant_country: None,
818                admin_name: None,
819                admin_organization: None,
820                admin_email: None,
821                admin_phone: None,
822                tech_name: None,
823                tech_organization: None,
824                tech_email: None,
825                tech_phone: None,
826                creation_date: None,
827                expiration_date: None,
828                updated_date: None,
829                status: vec![],
830                nameservers: vec![],
831                dnssec: None,
832                whois_server: "whois.example.com".to_string(),
833                raw_response: String::new(),
834            },
835            rdap_error: None,
836            rdap_fallback: None,
837        };
838
839        assert_eq!(result.domain_name(), Some("example.com".to_string()));
840        assert_eq!(result.registrar(), Some("Test Registrar".to_string()));
841        assert!(result.is_whois());
842        assert!(!result.is_rdap());
843        assert!(!result.is_available());
844    }
845
846    #[test]
847    fn test_lookup_result_serialization() {
848        let result = LookupResult::Whois {
849            data: WhoisResponse {
850                domain: "test.com".to_string(),
851                registrar: None,
852                registrant: None,
853                organization: None,
854                registrant_email: None,
855                registrant_phone: None,
856                registrant_address: None,
857                registrant_country: None,
858                admin_name: None,
859                admin_organization: None,
860                admin_email: None,
861                admin_phone: None,
862                tech_name: None,
863                tech_organization: None,
864                tech_email: None,
865                tech_phone: None,
866                creation_date: None,
867                expiration_date: None,
868                updated_date: None,
869                status: vec![],
870                nameservers: vec![],
871                dnssec: None,
872                whois_server: String::new(),
873                raw_response: String::new(),
874            },
875            rdap_error: Some("RDAP failed".to_string()),
876            rdap_fallback: None,
877        };
878
879        let json = serde_json::to_string(&result).unwrap();
880        assert!(json.contains("\"source\":\"whois\""));
881        assert!(json.contains("RDAP failed"));
882    }
883
884    #[test]
885    fn test_lookup_result_available_serialization() {
886        let result = LookupResult::Available {
887            data: Box::new(AvailabilityResult {
888                domain: "test123.xyz".to_string(),
889                available: true,
890                confidence: "medium".to_string(),
891                method: "whois_error".to_string(),
892                details: Some("WHOIS server indicates no matching records".to_string()),
893            }),
894            rdap_error: "RDAP failed".to_string(),
895            whois_error: "WHOIS failed".to_string(),
896            whois_data: None,
897        };
898
899        let json = serde_json::to_string(&result).unwrap();
900        assert!(json.contains("\"source\":\"available\""));
901        assert!(json.contains("\"available\":true"));
902        assert!(json.contains("test123.xyz"));
903
904        assert_eq!(result.domain_name(), Some("test123.xyz".to_string()));
905        assert!(result.is_available());
906        assert!(!result.is_rdap());
907        assert!(!result.is_whois());
908        assert!(result.registrar().is_none());
909        assert_eq!(result.expiration_info(), (None, None));
910    }
911
912    #[test]
913    #[allow(deprecated)]
914    fn test_smart_lookup_builder() {
915        let lookup = SmartLookup::new().prefer_rdap(false).include_fallback(true);
916        assert!(!lookup.prefer_rdap);
917        assert!(lookup.include_fallback);
918    }
919
920    #[test]
921    fn test_lookup_cache_clear() {
922        SmartLookup::clear_cache();
923        assert!(LOOKUP_CACHE.is_empty());
924    }
925
926    // ---------------- sanitize_error_for_public ----------------
927
928    #[test]
929    fn test_sanitize_strips_ipv4() {
930        let msg = "RDAP URL resolves to reserved IP 10.0.0.1 which is forbidden";
931        let sanitized = sanitize_error_for_public(msg);
932        assert!(
933            !sanitized.contains("10.0.0.1"),
934            "IPv4 should be stripped, got: {}",
935            sanitized
936        );
937        assert!(sanitized.contains("[ip-redacted]"));
938    }
939
940    #[test]
941    fn test_sanitize_strips_multiple_ipv4() {
942        let msg = "Could not connect to 192.168.1.1 after trying 127.0.0.1";
943        let sanitized = sanitize_error_for_public(msg);
944        assert!(!sanitized.contains("192.168.1.1"));
945        assert!(!sanitized.contains("127.0.0.1"));
946        // Two redactions expected.
947        assert_eq!(sanitized.matches("[ip-redacted]").count(), 2);
948    }
949
950    #[test]
951    fn test_sanitize_strips_ipv6() {
952        let msg = "RDAP URL resolves to reserved IP fe80::1 which is forbidden";
953        let sanitized = sanitize_error_for_public(msg);
954        assert!(!sanitized.contains("fe80::1"));
955        assert!(sanitized.contains("[ip-redacted]"));
956    }
957
958    #[test]
959    fn sanitize_leaves_mac_address_like_tokens_alone() {
960        let msg = "error code af:ba:12 at line 5";
961        let out = sanitize_error_for_public(msg);
962        assert!(
963            out.contains("af:ba:12"),
964            "MAC fragment should not be stripped: {}",
965            out
966        );
967    }
968
969    #[test]
970    fn sanitize_strips_real_ipv6() {
971        let msg = "cannot reach 2001:db8::1 — timeout";
972        let out = sanitize_error_for_public(msg);
973        assert!(!out.contains("2001:db8::1"));
974        assert!(out.contains("[ip-redacted]"));
975    }
976
977    #[test]
978    fn sanitize_strips_fe80_link_local() {
979        let msg = "peer at fe80::1 unreachable";
980        let out = sanitize_error_for_public(msg);
981        assert!(out.contains("[ip-redacted]"));
982    }
983
984    #[test]
985    fn test_sanitize_truncates_long_message() {
986        // Build a 500-char message with no IPs.
987        let long = "a".repeat(500);
988        let sanitized = sanitize_error_for_public(&long);
989        // Should cap at MAX_PUBLIC_ERROR_LEN chars + ellipsis.
990        let char_count = sanitized.chars().count();
991        assert_eq!(char_count, MAX_PUBLIC_ERROR_LEN + 1);
992        assert!(sanitized.ends_with('…'));
993    }
994
995    #[test]
996    fn test_sanitize_preserves_short_messages() {
997        let msg = "RDAP timed out after 15s";
998        let sanitized = sanitize_error_for_public(msg);
999        assert_eq!(sanitized, msg);
1000    }
1001
1002    // ---------------- RdapOutcome classification ----------------
1003
1004    #[test]
1005    fn test_is_rdap_response_useful_detects_no_data() {
1006        use crate::rdap::RdapResponse;
1007        // Construct a response with a name but no events, entities, NS, or status
1008        // — this is the "200 OK but no useful fields" case that should be
1009        // classified as RdapOutcome::NoData (not Useful, not Error).
1010        let resp = RdapResponse {
1011            ldh_name: Some("example.com".to_string()),
1012            ..Default::default()
1013        };
1014        let lookup = SmartLookup::new();
1015        assert!(
1016            !lookup.is_rdap_response_useful(&resp),
1017            "Response with only a name should be classified as NoData"
1018        );
1019
1020        // And one with a name + status IS useful (sanity check).
1021        let useful = RdapResponse {
1022            ldh_name: Some("example.com".to_string()),
1023            status: vec!["active".to_string()],
1024            ..Default::default()
1025        };
1026        assert!(lookup.is_rdap_response_useful(&useful));
1027    }
1028
1029    // ---------------- Coalescing ----------------
1030
1031    // Verifies that when multiple concurrent lookups hit the in-flight map
1032    // for the same domain, later arrivals observe the existing Weak<Notify>
1033    // and become waiters rather than racing a second lookup. We test the
1034    // map-level primitive here because the full SmartLookup pipeline
1035    // requires network access to exercise.
1036    #[tokio::test]
1037    async fn test_inflight_coalescing_map() {
1038        // Serialize with sibling poisoning tests: we share LOOKUP_INFLIGHT
1039        // state, and `InflightGuard::drop` uses `try_lock` — if a sibling
1040        // holds the mutex during drop, cleanup is skipped and assertions
1041        // fail.
1042        let _serial = INFLIGHT_TEST_SERIAL
1043            .lock()
1044            .unwrap_or_else(|p| p.into_inner());
1045        // Poison-tolerant: the sibling poisoning regression tests may run
1046        // earlier under `cargo test` parallelism and leave LOOKUP_INFLIGHT
1047        // poisoned. The production code recovers via `unwrap_or_else`,
1048        // so this test does the same.
1049        //
1050        // Use a per-run unique key so this test cannot race with the other
1051        // tests that touch LOOKUP_INFLIGHT. Previously we `clear()`ed the
1052        // whole map, which raced with peer tests' entries.
1053        let domain = unique_test_key("__coalesce");
1054
1055        // Defensive: ensure our specific key is not present.
1056        {
1057            let mut m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1058            m.remove(&domain);
1059        }
1060
1061        // First caller: no entry → becomes owner.
1062        let owner_notify = Arc::new(Notify::new());
1063        {
1064            let mut m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1065            assert!(m.get(&domain).and_then(|w| w.upgrade()).is_none());
1066            m.insert(domain.clone(), Arc::downgrade(&owner_notify));
1067        }
1068
1069        // Second caller: sees the existing Weak and upgrades.
1070        let waiter = {
1071            let m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1072            m.get(&domain)
1073                .and_then(|w| w.upgrade())
1074                .expect("Second caller must observe in-flight entry")
1075        };
1076
1077        // Waiter listens in the background.
1078        let waiter_clone = waiter.clone();
1079        let handle = tokio::spawn(async move {
1080            waiter_clone.notified().await;
1081        });
1082
1083        // Simulate owner completing.
1084        tokio::time::sleep(Duration::from_millis(20)).await;
1085        {
1086            let mut m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1087            m.remove(&domain);
1088        }
1089        owner_notify.notify_waiters();
1090
1091        // Waiter should unblock quickly.
1092        tokio::time::timeout(Duration::from_secs(1), handle)
1093            .await
1094            .expect("waiter must unblock after notify")
1095            .expect("waiter task joined cleanly");
1096
1097        // After owner removes entry and drops its Arc, the Weak is dead.
1098        drop(owner_notify);
1099        drop(waiter);
1100        let m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1101        assert!(m.get(&domain).and_then(|w| w.upgrade()).is_none());
1102    }
1103
1104    /// Builds a domain key guaranteed unique per test invocation, so that
1105    /// tests touching the shared LOOKUP_INFLIGHT static never collide when
1106    /// `cargo test` runs them in parallel. We include a nanosecond timestamp
1107    /// plus an atomic counter to defeat even hash-identical calls within the
1108    /// same nanosecond.
1109    fn unique_test_key(prefix: &str) -> String {
1110        use std::sync::atomic::{AtomicU64, Ordering};
1111        use std::time::{SystemTime, UNIX_EPOCH};
1112        static COUNTER: AtomicU64 = AtomicU64::new(0);
1113        let nanos = SystemTime::now()
1114            .duration_since(UNIX_EPOCH)
1115            .map(|d| d.as_nanos())
1116            .unwrap_or(0);
1117        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
1118        format!("{}_{}_{}.example.", prefix, nanos, n)
1119    }
1120
1121    // Demonstrates that the `sanitize_error_for_public` helper is applied
1122    // to the rdap_error / whois_error fields written into the `Available`
1123    // variant. We check the call site indirectly: construct a Available
1124    // manually and then verify a raw error with an IP becomes redacted.
1125    // (Integration via real clients would require network.)
1126    #[test]
1127    fn test_sanitize_applied_to_available_fields() {
1128        let rdap_raw = "RDAP URL resolves to reserved IP 10.0.0.1";
1129        let whois_raw = "connection refused at 192.168.0.5";
1130        let sanitized_rdap = sanitize_error_for_public(rdap_raw);
1131        let sanitized_whois = sanitize_error_for_public(whois_raw);
1132        let result = LookupResult::Available {
1133            data: Box::new(AvailabilityResult {
1134                domain: "unreg.test".to_string(),
1135                available: true,
1136                confidence: "low".to_string(),
1137                method: "heuristic".to_string(),
1138                details: None,
1139            }),
1140            rdap_error: sanitized_rdap,
1141            whois_error: sanitized_whois,
1142            whois_data: None,
1143        };
1144        if let LookupResult::Available {
1145            rdap_error,
1146            whois_error,
1147            ..
1148        } = result
1149        {
1150            assert!(!rdap_error.contains("10.0.0.1"));
1151            assert!(!whois_error.contains("192.168.0.5"));
1152            assert!(rdap_error.contains("[ip-redacted]"));
1153            assert!(whois_error.contains("[ip-redacted]"));
1154        } else {
1155            panic!("expected Available variant");
1156        }
1157    }
1158
1159    #[test]
1160    fn rdap_error_is_404_matches_standard_404() {
1161        let e = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1162        assert!(rdap_error_is_404(&e));
1163    }
1164
1165    #[test]
1166    fn rdap_error_is_404_matches_without_reason_phrase() {
1167        let e = SeerError::RdapError("query failed with status 404".to_string());
1168        assert!(rdap_error_is_404(&e));
1169    }
1170
1171    #[test]
1172    fn rdap_error_is_404_rejects_other_statuses() {
1173        let e = SeerError::RdapError("query failed with status 500 Server Error".to_string());
1174        assert!(!rdap_error_is_404(&e));
1175        let e = SeerError::RdapError("query failed with status 400 Bad Request".to_string());
1176        assert!(!rdap_error_is_404(&e));
1177    }
1178
1179    #[test]
1180    fn rdap_error_is_404_rejects_non_http_errors() {
1181        let e = SeerError::RdapError("connection timeout".to_string());
1182        assert!(!rdap_error_is_404(&e));
1183        let e = SeerError::Timeout("rdap".to_string());
1184        assert!(!rdap_error_is_404(&e));
1185    }
1186
1187    #[test]
1188    fn rdap_error_is_404_rejects_incidental_404_in_message() {
1189        // A 404 substring inside a non-status context must not match.
1190        let e = SeerError::RdapError("error 40404: database corruption".to_string());
1191        assert!(!rdap_error_is_404(&e));
1192    }
1193
1194    // ---------------- whois_response_is_thin ----------------
1195
1196    fn empty_whois(domain: &str) -> WhoisResponse {
1197        WhoisResponse {
1198            domain: domain.to_string(),
1199            registrar: None,
1200            registrant: None,
1201            organization: None,
1202            registrant_email: None,
1203            registrant_phone: None,
1204            registrant_address: None,
1205            registrant_country: None,
1206            admin_name: None,
1207            admin_organization: None,
1208            admin_email: None,
1209            admin_phone: None,
1210            tech_name: None,
1211            tech_organization: None,
1212            tech_email: None,
1213            tech_phone: None,
1214            creation_date: None,
1215            expiration_date: None,
1216            updated_date: None,
1217            nameservers: vec![],
1218            status: vec![],
1219            dnssec: None,
1220            whois_server: String::new(),
1221            raw_response: String::new(),
1222        }
1223    }
1224
1225    #[test]
1226    fn whois_response_is_thin_when_all_key_fields_missing() {
1227        let w = empty_whois("example.com");
1228        assert!(whois_response_is_thin(&w));
1229    }
1230
1231    #[test]
1232    fn whois_response_is_not_thin_when_registrar_present() {
1233        let mut w = empty_whois("example.com");
1234        w.registrar = Some("Test Registrar".to_string());
1235        assert!(!whois_response_is_thin(&w));
1236    }
1237
1238    #[test]
1239    fn whois_response_is_not_thin_when_creation_date_present() {
1240        let mut w = empty_whois("example.com");
1241        w.creation_date = Some(Utc::now());
1242        assert!(!whois_response_is_thin(&w));
1243    }
1244
1245    #[test]
1246    fn whois_response_is_not_thin_when_expiration_date_present() {
1247        let mut w = empty_whois("example.com");
1248        w.expiration_date = Some(Utc::now());
1249        assert!(!whois_response_is_thin(&w));
1250    }
1251
1252    #[test]
1253    fn whois_response_is_thin_even_with_nameservers_alone() {
1254        let mut w = empty_whois("example.com");
1255        w.nameservers = vec!["ns1.example.net".to_string()];
1256        assert!(whois_response_is_thin(&w));
1257    }
1258
1259    // ---------------- classify_whois_leg ----------------
1260
1261    use crate::rdap::RdapResponse;
1262
1263    #[allow(dead_code)]
1264    fn make_empty_rdap_response() -> RdapResponse {
1265        serde_json::from_value(serde_json::json!({
1266            "objectClassName": "domain",
1267        }))
1268        .expect("valid minimal RDAP response")
1269    }
1270
1271    #[test]
1272    fn classify_whois_leg_case_a_high_confidence() {
1273        let mut w = empty_whois("zaccodes.com");
1274        w.raw_response = "No match for \"ZACCODES.COM\".".to_string();
1275        assert!(w.is_available());
1276        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1277        let (verdict, method) =
1278            classify_whois_leg(&w, &rdap_err).expect("expected a routing decision");
1279        assert_eq!(verdict, "high");
1280        assert_eq!(method, "whois");
1281    }
1282
1283    #[test]
1284    fn classify_whois_leg_case_b_medium_confidence() {
1285        let w = empty_whois("example.xyz");
1286        assert!(!w.is_available(), "this WHOIS body has no 'no match' text");
1287        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1288        let (verdict, method) =
1289            classify_whois_leg(&w, &rdap_err).expect("expected a routing decision");
1290        assert_eq!(verdict, "medium");
1291        assert_eq!(method, "whois_thin_response");
1292    }
1293
1294    #[test]
1295    fn classify_whois_leg_rejects_thin_whois_without_404() {
1296        let w = empty_whois("example.xyz");
1297        let rdap_err = SeerError::RdapError("connection timeout".to_string());
1298        assert!(classify_whois_leg(&w, &rdap_err).is_none());
1299    }
1300
1301    #[test]
1302    fn classify_whois_leg_rejects_whois_with_real_data() {
1303        let mut w = empty_whois("legacy.tld");
1304        w.registrar = Some("Legacy Registry".to_string());
1305        w.creation_date = Some(Utc::now());
1306        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1307        assert!(classify_whois_leg(&w, &rdap_err).is_none());
1308    }
1309
1310    #[test]
1311    fn classify_whois_leg_case_a_wins_over_case_b() {
1312        let mut w = empty_whois("example.com");
1313        w.raw_response = "No match for \"EXAMPLE.COM\".".to_string();
1314        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1315        let (verdict, _) = classify_whois_leg(&w, &rdap_err).unwrap();
1316        assert_eq!(verdict, "high");
1317    }
1318
1319    // ---------------- should_route_to_availability ----------------
1320    //
1321    // Regression coverage for the v0.26.6 fix: when RDAP returned an HTTP 200
1322    // (even with thin body), a WHOIS "no match" must NOT be treated as
1323    // evidence of availability — that would let propagation lag flip the
1324    // verdict for a domain the registry has already provisioned.
1325
1326    #[test]
1327    fn rdap_200_vetoes_whois_no_match() {
1328        let mut w = empty_whois("freshly-registered.com");
1329        w.raw_response = "No match for \"FRESHLY-REGISTERED.COM\".".to_string();
1330        // rdap_returned_200 = true, no rdap_seer_error (NoData has no error).
1331        assert!(
1332            should_route_to_availability(true, None, &w).is_none(),
1333            "RDAP 200 must veto WHOIS-only availability claim",
1334        );
1335    }
1336
1337    #[test]
1338    fn rdap_200_vetoes_even_with_thin_whois() {
1339        let w = empty_whois("freshly-registered.com");
1340        // Thin WHOIS without is_available() patterns.
1341        assert!(
1342            should_route_to_availability(true, None, &w).is_none(),
1343            "RDAP 200 must veto even when WHOIS is thin",
1344        );
1345    }
1346
1347    #[test]
1348    fn rdap_404_with_whois_no_match_routes_to_available() {
1349        let mut w = empty_whois("genuinely-free.com");
1350        w.raw_response = "No match for \"GENUINELY-FREE.COM\".".to_string();
1351        let rdap_err = SeerError::RdapError("query failed with status 404".to_string());
1352        let result = should_route_to_availability(false, Some(&rdap_err), &w);
1353        assert_eq!(result, Some(("high", "whois")));
1354    }
1355
1356    #[test]
1357    fn rdap_error_with_whois_is_available_still_routes_case_a() {
1358        let mut w = empty_whois("genuinely-free.com");
1359        w.raw_response = "Domain not found".to_string();
1360        // RDAP errored for a non-404 reason (e.g. bootstrap failure); WHOIS
1361        // signal alone should still route to availability.
1362        let rdap_err = SeerError::RdapBootstrapError("all registries failed".to_string());
1363        let result = should_route_to_availability(false, Some(&rdap_err), &w);
1364        assert_eq!(result, Some(("high", "whois")));
1365    }
1366
1367    #[test]
1368    fn rdap_grace_timeout_with_whois_is_available_routes_case_a() {
1369        // GraceTimeout path: rdap_returned_200 = false, rdap_seer_error = None.
1370        let mut w = empty_whois("genuinely-free.com");
1371        w.raw_response = "No match".to_string();
1372        let result = should_route_to_availability(false, None, &w);
1373        assert_eq!(result, Some(("high", "whois")));
1374    }
1375
1376    #[test]
1377    fn no_rdap_200_no_error_thick_whois_stays_in_whois_path() {
1378        let mut w = empty_whois("registered.com");
1379        w.registrar = Some("Example Registrar Ltd".to_string());
1380        // GraceTimeout-like: rdap_returned_200=false, no error, and WHOIS
1381        // does not look free. Must return None so the caller picks
1382        // `LookupResult::Whois`.
1383        assert!(should_route_to_availability(false, None, &w).is_none());
1384    }
1385
1386    // ---------------- nxdomain_confirms_available ----------------
1387
1388    #[test]
1389    fn nxdomain_confirms_available_thin_no200_absent() {
1390        assert!(nxdomain_confirms_available(
1391            true,
1392            false,
1393            DnsPresence::Absent
1394        ));
1395    }
1396
1397    #[test]
1398    fn nxdomain_confirms_available_vetoed_by_rdap_200() {
1399        // A 200 from RDAP (object exists) must veto the NXDOMAIN signal even
1400        // if the apex currently has no delegation.
1401        assert!(!nxdomain_confirms_available(
1402            true,
1403            true,
1404            DnsPresence::Absent
1405        ));
1406    }
1407
1408    #[test]
1409    fn nxdomain_confirms_available_requires_thin_whois() {
1410        // A WHOIS body with real data is never overridden by DNS.
1411        assert!(!nxdomain_confirms_available(
1412            false,
1413            false,
1414            DnsPresence::Absent
1415        ));
1416    }
1417
1418    #[test]
1419    fn nxdomain_confirms_available_requires_absent_dns() {
1420        assert!(!nxdomain_confirms_available(
1421            true,
1422            false,
1423            DnsPresence::Present
1424        ));
1425        assert!(!nxdomain_confirms_available(
1426            true,
1427            false,
1428            DnsPresence::Unknown
1429        ));
1430    }
1431
1432    // ---------------- Mutex poisoning recovery ----------------
1433
1434    /// Regression: a panic inside `LOOKUP_INFLIGHT.lock()` must not wedge
1435    /// the tracker forever. After the mutex is poisoned, subsequent
1436    /// acquisition attempts must still succeed via `unwrap_or_else`.
1437    ///
1438    /// This isolates the lookup_with_progress acquisition site (formerly a
1439    /// `.expect("LOOKUP_INFLIGHT mutex poisoned")`) by exercising the same
1440    /// `.lock().unwrap_or_else(|p| p.into_inner())` pattern directly.
1441    #[test]
1442    fn lookup_inflight_recovers_from_poisoned_mutex() {
1443        use std::panic::{catch_unwind, AssertUnwindSafe};
1444
1445        // Serialize with sibling tests that also touch LOOKUP_INFLIGHT.
1446        let _serial = INFLIGHT_TEST_SERIAL
1447            .lock()
1448            .unwrap_or_else(|p| p.into_inner());
1449
1450        // Poison the real static by panicking while holding the guard.
1451        let _ = catch_unwind(AssertUnwindSafe(|| {
1452            let _guard = LOOKUP_INFLIGHT.lock().unwrap();
1453            panic!("poisoning LOOKUP_INFLIGHT for test");
1454        }));
1455
1456        // At this point LOOKUP_INFLIGHT is poisoned. Plain .lock() would
1457        // return Err(PoisonError). The recovery pattern used in
1458        // lookup_with_progress must still yield a usable guard.
1459        let mut guard = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1460        // Use a per-run unique canary so parallel tests cannot collide.
1461        let canary = unique_test_key("__poison_recovery");
1462        guard.insert(canary.clone(), Weak::new());
1463        assert!(guard.contains_key(&canary));
1464        guard.remove(&canary);
1465    }
1466
1467    /// Regression: InflightGuard::drop must also tolerate mutex poisoning
1468    /// without panicking — the Poisoned arm should still remove the entry.
1469    #[test]
1470    fn inflight_guard_drop_recovers_from_poisoned_mutex() {
1471        use std::panic::{catch_unwind, AssertUnwindSafe};
1472
1473        // Serialize with sibling tests that also touch LOOKUP_INFLIGHT —
1474        // the critical race was `InflightGuard::drop` using `try_lock`
1475        // and silently skipping cleanup when a parallel test held the
1476        // mutex, leaving this test's entry in the map and failing the
1477        // final assertion.
1478        let _serial = INFLIGHT_TEST_SERIAL
1479            .lock()
1480            .unwrap_or_else(|p| p.into_inner());
1481
1482        // Seed an entry and arm a guard for it. Use a per-run unique key
1483        // so this test can never collide with siblings under parallel
1484        // `cargo test` — previously a hard-coded key raced with the peer
1485        // coalescing test's `m.clear()` call.
1486        let key = unique_test_key("__drop_poison");
1487        let notify = Arc::new(Notify::new());
1488        {
1489            let mut map = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1490            map.insert(key.clone(), Arc::downgrade(&notify));
1491        }
1492        let guard = InflightGuard {
1493            key: key.clone(),
1494            notify: notify.clone(),
1495        };
1496
1497        // Poison the mutex.
1498        let _ = catch_unwind(AssertUnwindSafe(|| {
1499            let _g = LOOKUP_INFLIGHT.lock().unwrap();
1500            panic!("poisoning LOOKUP_INFLIGHT for drop test");
1501        }));
1502
1503        // Dropping the guard must not panic and must remove the entry via
1504        // the Poisoned branch of the new try_lock match.
1505        drop(guard);
1506
1507        let map = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1508        assert!(
1509            !map.contains_key(&key),
1510            "poisoned-mutex drop path should still remove the in-flight entry"
1511        );
1512    }
1513}