Skip to main content

seer_core/
lookup.rs

1use std::collections::HashMap;
2use std::net::Ipv6Addr;
3use std::str::FromStr;
4use std::sync::{Arc, Mutex, Weak};
5use std::time::Duration;
6
7use chrono::{DateTime, Utc};
8use once_cell::sync::Lazy;
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use tokio::sync::Notify;
12use tracing::{debug, instrument, warn};
13
14use tokio::time::timeout as tokio_timeout;
15
16use crate::availability::{AvailabilityChecker, AvailabilityResult};
17use crate::cache::TtlCache;
18use crate::error::{Result, SeerError};
19use crate::rdap::{RdapClient, RdapResponse};
20use crate::whois::{get_registry_url, get_tld, WhoisClient, WhoisResponse};
21
22/// Cache TTL for lookup results (5 minutes).
23const LOOKUP_CACHE_TTL: Duration = Duration::from_secs(5 * 60);
24
25/// Grace period for the second protocol after the first one finishes.
26/// If WHOIS finishes and RDAP hasn't responded within this window, we
27/// use the WHOIS result rather than waiting the full RDAP timeout.
28const PROTOCOL_GRACE_PERIOD: Duration = Duration::from_secs(5);
29
30/// Maximum length for public-facing error strings.
31const MAX_PUBLIC_ERROR_LEN: usize = 256;
32
33/// Global cache for lookup results to avoid redundant network calls.
34static LOOKUP_CACHE: Lazy<TtlCache<String, LookupResult>> =
35    Lazy::new(|| TtlCache::new(LOOKUP_CACHE_TTL));
36
37/// In-flight lookup coalescing map: normalized-domain -> Weak<Notify>.
38/// Only one network race runs per unique domain at a time; concurrent callers
39/// wait on the shared Notify and then read the result from LOOKUP_CACHE.
40static LOOKUP_INFLIGHT: Lazy<Mutex<HashMap<String, Weak<Notify>>>> =
41    Lazy::new(|| Mutex::new(HashMap::new()));
42
43/// Regex patterns for stripping IP literals from public error messages.
44static IPV4_RE: Lazy<Regex> =
45    Lazy::new(|| Regex::new(r"\b(?:\d{1,3}\.){3}\d{1,3}\b").expect("IPV4_RE is a valid regex"));
46
47/// Candidate pattern for IPv6 literals: a hex/colon token containing either
48/// a `::` compression or at least three colons. This catches plausible IPv6
49/// addresses cheaply; each match is then validated by `Ipv6Addr::from_str`
50/// before redaction, so MAC fragments, hex hashes, and similar colon-laden
51/// tokens are left alone.
52static IPV6_CANDIDATE_RE: Lazy<Regex> = Lazy::new(|| {
53    Regex::new(r"\b[0-9a-fA-F:]*(?:::|(?:[0-9a-fA-F]{1,4}:){3,})[0-9a-fA-F:]*\b")
54        .expect("IPV6_CANDIDATE_RE is a valid regex")
55});
56
57/// Redact substrings that parse as valid IPv6 addresses, leaving non-IPv6
58/// tokens (e.g. `af:ba:12`) untouched.
59fn strip_ipv6(msg: &str) -> String {
60    IPV6_CANDIDATE_RE
61        .replace_all(msg, |caps: &regex::Captures| {
62            let candidate = &caps[0];
63            if Ipv6Addr::from_str(candidate).is_ok() {
64                "[ip-redacted]".to_string()
65            } else {
66                candidate.to_string()
67            }
68        })
69        .into_owned()
70}
71
72/// Test-only hook: counts the number of times `lookup_concurrent` is actually
73/// invoked (i.e., the underlying network race runs). Used to verify request
74/// coalescing. Not exposed outside the crate.
75#[cfg(test)]
76static LOOKUP_CONCURRENT_CALLS: Lazy<std::sync::atomic::AtomicUsize> =
77    Lazy::new(|| std::sync::atomic::AtomicUsize::new(0));
78
79/// Returns true if the error is an RDAP HTTP 404 response, indicating the
80/// registry's RDAP server has no entry for this domain. Other RDAP errors
81/// (timeouts, 5xx, connection failures, etc.) do NOT match — they mean "we
82/// don't know", not "not registered".
83///
84/// Matches the format produced by `seer-core/src/rdap/client.rs:603`:
85/// `"query failed with status 404 ..."`.
86fn rdap_error_is_404(err: &SeerError) -> bool {
87    if let SeerError::RdapError(msg) = err {
88        msg.contains("query failed with status 404")
89    } else {
90        false
91    }
92}
93
94/// Returns true if the parsed WHOIS response lacks all key registration
95/// signals: no registrar, no creation date, and no expiration date.
96///
97/// This is a necessary-but-not-sufficient signal for domain availability;
98/// `lookup_concurrent` combines it with an RDAP 404 before routing to the
99/// availability path. Nameservers alone don't disqualify thinness — some
100/// registries return placeholder nameservers for unregistered domains.
101fn whois_response_is_thin(w: &WhoisResponse) -> bool {
102    w.registrar.is_none() && w.creation_date.is_none() && w.expiration_date.is_none()
103}
104
105/// Decides whether a WHOIS response + RDAP error combination should route
106/// to the availability path. Returns `(confidence, method)` when routing is
107/// warranted, `None` to keep the existing `LookupResult::Whois` behavior.
108///
109/// Case A: WHOIS explicitly indicates no registration (highest priority).
110/// Case B: WHOIS returned but lacks registration data AND RDAP returned 404.
111fn classify_whois_leg(
112    w: &WhoisResponse,
113    rdap_err: &SeerError,
114) -> Option<(&'static str, &'static str)> {
115    if w.is_available() {
116        return Some(("high", "whois"));
117    }
118    if whois_response_is_thin(w) && rdap_error_is_404(rdap_err) {
119        return Some(("medium", "whois_thin_response"));
120    }
121    None
122}
123
124/// Sanitizes an error message for inclusion in a public-facing response.
125///
126/// Strips IPv4 and IPv6 literals (to avoid leaking internal addresses when
127/// an SSRF guard rejects a resolved URL) and caps the total length to
128/// [`MAX_PUBLIC_ERROR_LEN`] characters.
129fn sanitize_error_for_public(msg: &str) -> String {
130    let s = IPV4_RE.replace_all(msg, "[ip-redacted]");
131    let s = strip_ipv6(&s);
132    if s.chars().count() > MAX_PUBLIC_ERROR_LEN {
133        let mut trunc: String = s.chars().take(MAX_PUBLIC_ERROR_LEN).collect();
134        trunc.push('…');
135        trunc
136    } else {
137        s
138    }
139}
140
141/// RAII guard for the in-flight-lookup slot. On drop, removes the entry
142/// from `LOOKUP_INFLIGHT` and notifies any waiters so they can read the
143/// freshly-populated cache.
144///
145/// NOTE on failed-owner retry semantics:
146/// When the owning task's lookup fails, `InflightGuard::drop` runs, the
147/// `HashMap` entry is removed, and `notify_waiters()` fires. Waiters wake,
148/// observe an empty cache, and one of them becomes the new owner — triggering
149/// a fresh network race. This means transient failures are automatically
150/// retried by any concurrent waiter. Callers that observe a timeout error
151/// should not assume no work is in flight; another concurrent caller may
152/// already be retrying.
153struct InflightGuard {
154    key: String,
155    notify: Arc<Notify>,
156}
157
158impl Drop for InflightGuard {
159    fn drop(&mut self) {
160        // Avoid blocking on the mutex inside Drop. A cancelled future that
161        // drops this guard could otherwise starve the Tokio executor while
162        // it waits for contention to clear. `try_lock` lets us take the
163        // fast path when the map is uncontended, recover from poisoning
164        // explicitly, and simply skip cleanup when another task holds the
165        // mutex — waiters re-contend for ownership on their next wakeup,
166        // so a missed cleanup is self-healing.
167        match LOOKUP_INFLIGHT.try_lock() {
168            Ok(mut inflight) => {
169                inflight.remove(&self.key);
170            }
171            Err(std::sync::TryLockError::Poisoned(p)) => {
172                let mut inflight = p.into_inner();
173                inflight.remove(&self.key);
174            }
175            Err(std::sync::TryLockError::WouldBlock) => {
176                tracing::debug!(
177                    key = %self.key,
178                    "InflightGuard drop: skipping cleanup under contention"
179                );
180            }
181        }
182        self.notify.notify_waiters();
183    }
184}
185
186/// Internal classification of the RDAP leg of a concurrent lookup.
187///
188/// Distinguishing `NoData` (HTTP 200 but response was missing useful fields)
189/// from `Error` lets the orchestrator prefer a thin WHOIS result over the
190/// availability fallback when RDAP silently returned nothing.
191enum RdapOutcome {
192    Useful(RdapResponse),
193    NoData(RdapResponse),
194    Error(SeerError),
195    /// RDAP future did not complete within the grace period after the other
196    /// protocol finished.
197    GraceTimeout,
198}
199
200/// Progress callback for smart lookup operations.
201/// Called with a message describing the current phase of the lookup.
202pub type LookupProgressCallback = Arc<dyn Fn(&str) + Send + Sync>;
203
204#[derive(Debug, Clone, Serialize, Deserialize)]
205#[serde(tag = "source", rename_all = "lowercase")]
206pub enum LookupResult {
207    Rdap {
208        data: Box<RdapResponse>,
209        #[serde(skip_serializing_if = "Option::is_none")]
210        whois_fallback: Option<WhoisResponse>,
211    },
212    Whois {
213        data: WhoisResponse,
214        rdap_error: Option<String>,
215        #[serde(skip_serializing_if = "Option::is_none")]
216        rdap_fallback: Option<Box<RdapResponse>>,
217    },
218    Available {
219        data: Box<AvailabilityResult>,
220        rdap_error: String,
221        whois_error: String,
222        /// Raw WHOIS response, when one was available at routing time
223        /// (Cases A and B in the design spec). `None` preserves the
224        /// pre-existing "both protocols errored" semantics.
225        #[serde(default, skip_serializing_if = "Option::is_none")]
226        whois_data: Option<WhoisResponse>,
227    },
228}
229
230impl LookupResult {
231    /// Returns the domain name from the lookup result.
232    pub fn domain_name(&self) -> Option<String> {
233        match self {
234            LookupResult::Rdap { data, .. } => data.domain_name().map(String::from),
235            LookupResult::Whois { data, .. } => Some(data.domain.clone()),
236            LookupResult::Available { data, .. } => Some(data.domain.clone()),
237        }
238    }
239
240    /// Returns the registrar name, preferring RDAP data with WHOIS fallback.
241    pub fn registrar(&self) -> Option<String> {
242        match self {
243            LookupResult::Rdap {
244                data,
245                whois_fallback,
246            } => data
247                .get_registrar()
248                .or_else(|| whois_fallback.as_ref().and_then(|w| w.registrar.clone())),
249            LookupResult::Whois { data, .. } => data.registrar.clone(),
250            LookupResult::Available { .. } => None,
251        }
252    }
253
254    /// Returns the registrant organization, preferring RDAP data with WHOIS fallback.
255    pub fn organization(&self) -> Option<String> {
256        match self {
257            LookupResult::Rdap {
258                data,
259                whois_fallback,
260            } => data
261                .get_registrant_organization()
262                .or_else(|| whois_fallback.as_ref().and_then(|w| w.organization.clone())),
263            LookupResult::Whois { data, .. } => data.organization.clone(),
264            LookupResult::Available { .. } => None,
265        }
266    }
267
268    /// Returns true if the result came from RDAP.
269    pub fn is_rdap(&self) -> bool {
270        matches!(self, LookupResult::Rdap { .. })
271    }
272
273    /// Returns true if the result came from WHOIS.
274    pub fn is_whois(&self) -> bool {
275        matches!(self, LookupResult::Whois { .. })
276    }
277
278    /// Returns true if the result is an availability check fallback.
279    pub fn is_available(&self) -> bool {
280        matches!(self, LookupResult::Available { .. })
281    }
282
283    /// Returns the expiration date and registrar info from the lookup result.
284    pub fn expiration_info(&self) -> (Option<DateTime<Utc>>, Option<String>) {
285        match self {
286            LookupResult::Rdap {
287                data,
288                whois_fallback,
289            } => {
290                // Try to get expiration from RDAP events
291                let expiration_date = data
292                    .events
293                    .iter()
294                    .find(|e| e.event_action == "expiration")
295                    .and_then(|e| e.parsed_date())
296                    .or_else(|| {
297                        // Fallback to WHOIS if available
298                        whois_fallback.as_ref().and_then(|w| w.expiration_date)
299                    });
300
301                let registrar = data
302                    .get_registrar()
303                    .or_else(|| whois_fallback.as_ref().and_then(|w| w.registrar.clone()));
304
305                (expiration_date, registrar)
306            }
307            LookupResult::Whois { data, .. } => (data.expiration_date, data.registrar.clone()),
308            LookupResult::Available { .. } => (None, None),
309        }
310    }
311}
312
313/// Before caching, trim raw WHOIS response to limit cache memory.
314/// A full WHOIS raw_response can be up to 1 MB; we cap it at 32 KB which is
315/// plenty for the parsed fields while preventing the cache from ballooning.
316fn trim_for_cache(mut result: LookupResult) -> LookupResult {
317    const MAX_RAW: usize = 32 * 1024;
318
319    match result {
320        LookupResult::Whois { ref mut data, .. } => {
321            if data.raw_response.len() > MAX_RAW {
322                data.raw_response.truncate(MAX_RAW);
323                data.raw_response.push_str("\n... [truncated for cache]");
324            }
325        }
326        LookupResult::Rdap {
327            ref mut whois_fallback,
328            ..
329        } => {
330            if let Some(ref mut w) = whois_fallback {
331                if w.raw_response.len() > MAX_RAW {
332                    w.raw_response.truncate(MAX_RAW);
333                    w.raw_response.push_str("\n... [truncated for cache]");
334                }
335            }
336        }
337        LookupResult::Available {
338            ref mut whois_data, ..
339        } => {
340            if let Some(ref mut w) = whois_data {
341                if w.raw_response.len() > MAX_RAW {
342                    w.raw_response.truncate(MAX_RAW);
343                    w.raw_response.push_str("\n... [truncated for cache]");
344                }
345            }
346        }
347    }
348
349    result
350}
351
352#[derive(Debug, Clone)]
353pub struct SmartLookup {
354    rdap_client: RdapClient,
355    whois_client: WhoisClient,
356    availability_checker: AvailabilityChecker,
357    /// Deprecated: both protocols are now always attempted concurrently.
358    prefer_rdap: bool,
359    /// Deprecated: WHOIS data is now always attached when available.
360    include_fallback: bool,
361}
362
363impl Default for SmartLookup {
364    fn default() -> Self {
365        Self::new()
366    }
367}
368
369impl SmartLookup {
370    /// Creates a new SmartLookup that runs RDAP and WHOIS concurrently,
371    /// falling back to an availability check if both fail.
372    pub fn new() -> Self {
373        Self {
374            rdap_client: RdapClient::new(),
375            whois_client: WhoisClient::new(),
376            availability_checker: AvailabilityChecker::new(),
377            prefer_rdap: true,
378            include_fallback: false,
379        }
380    }
381
382    /// Deprecated: both protocols are now always attempted concurrently.
383    /// This method is kept for API compatibility but has no effect.
384    #[deprecated(note = "This field has no effect. RDAP is always tried concurrently with WHOIS.")]
385    pub fn prefer_rdap(mut self, prefer: bool) -> Self {
386        self.prefer_rdap = prefer;
387        self
388    }
389
390    /// Deprecated: WHOIS data is now always attached when available.
391    /// This method is kept for API compatibility but has no effect.
392    #[deprecated(note = "This field has no effect. RDAP is always tried concurrently with WHOIS.")]
393    pub fn include_fallback(mut self, include: bool) -> Self {
394        self.include_fallback = include;
395        self
396    }
397
398    /// Performs a smart lookup for a domain, trying both RDAP and WHOIS concurrently.
399    /// Falls back to an availability check if both fail.
400    /// Results are cached for 5 minutes to avoid redundant network calls.
401    #[instrument(skip(self), fields(domain = %domain))]
402    pub async fn lookup(&self, domain: &str) -> Result<LookupResult> {
403        self.lookup_with_progress(domain, None).await
404    }
405
406    /// Performs a lookup with an optional progress callback.
407    /// The callback is called with messages describing the current phase.
408    /// Results are cached for 5 minutes. Concurrent lookups for the same
409    /// domain are coalesced — only one network race runs per domain at a time.
410    #[instrument(skip(self, progress), fields(domain = %domain))]
411    pub async fn lookup_with_progress(
412        &self,
413        domain: &str,
414        progress: Option<LookupProgressCallback>,
415    ) -> Result<LookupResult> {
416        let normalized = crate::validation::normalize_domain(domain)?;
417
418        // Check cache first
419        if let Some(cached) = LOOKUP_CACHE.get(&normalized) {
420            debug!(domain = %normalized, "Returning cached lookup result");
421            return Ok(cached);
422        }
423
424        // Coalesce in-flight lookups: if another task is already running a
425        // race for this domain, wait on its Notify rather than starting a
426        // second race. Two branches:
427        //   - Waiter: another task owns the slot; await its notify, then
428        //     read the cache. If the cache is still empty (owner failed),
429        //     loop and re-contend for ownership.
430        //   - Owner: no entry exists; insert a Weak handle, hold the Arc
431        //     for the duration of the work, then remove and notify on drop.
432        //
433        // A `loop` with a separate lock-scope per iteration keeps the
434        // `MutexGuard` from being held across any `.await`.
435        let _guard = loop {
436            enum Slot {
437                Waiter(Arc<Notify>),
438                Owner(InflightGuard),
439            }
440
441            let slot = {
442                // Recover from poisoning rather than panicking: a prior
443                // owner's panic should not permanently wedge the in-flight
444                // tracker for every future lookup.
445                let mut inflight = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
446                match inflight.get(&normalized).and_then(|w| w.upgrade()) {
447                    Some(existing) => Slot::Waiter(existing),
448                    None => {
449                        let n = Arc::new(Notify::new());
450                        inflight.insert(normalized.clone(), Arc::downgrade(&n));
451                        Slot::Owner(InflightGuard {
452                            key: normalized.clone(),
453                            notify: n,
454                        })
455                    }
456                }
457            };
458
459            match slot {
460                Slot::Waiter(n) => {
461                    debug!(domain = %normalized, "Waiting for in-flight lookup to complete");
462                    n.notified().await;
463                    if let Some(cached) = LOOKUP_CACHE.get(&normalized) {
464                        return Ok(cached);
465                    }
466                    // Owner finished without populating the cache (failed
467                    // or errored). Re-contend for ownership.
468                    continue;
469                }
470                Slot::Owner(guard) => break guard,
471            }
472        };
473
474        let result = self.lookup_concurrent(&normalized, progress).await?;
475
476        // Cache a trimmed copy to limit memory usage before releasing
477        // waiters (via guard drop) so they observe the cached value.
478        LOOKUP_CACHE.insert(normalized.clone(), trim_for_cache(result.clone()));
479
480        Ok(result)
481    }
482
483    /// Clears the lookup result cache.
484    pub fn clear_cache() {
485        LOOKUP_CACHE.clear();
486    }
487
488    #[instrument(skip(self, progress), fields(domain = %domain))]
489    async fn lookup_concurrent(
490        &self,
491        domain: &str,
492        progress: Option<LookupProgressCallback>,
493    ) -> Result<LookupResult> {
494        #[cfg(test)]
495        LOOKUP_CONCURRENT_CALLS.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
496
497        debug!(domain = %domain, "Attempting RDAP and WHOIS concurrently");
498
499        if let Some(ref cb) = progress {
500            cb("Querying RDAP and WHOIS concurrently");
501        }
502
503        let rdap_fut = self.rdap_client.lookup_domain(domain);
504        let whois_fut = self.whois_client.lookup(domain);
505
506        tokio::pin!(rdap_fut);
507        tokio::pin!(whois_fut);
508
509        // Race: whichever finishes first gets a grace period for the other.
510        //
511        // We track whether each side completed naturally or was truncated by
512        // the grace period, so downstream error messages can distinguish a
513        // true timeout from a loser-truncation.
514        enum LegOutcome<T> {
515            Completed(T),
516            GraceTruncated,
517        }
518
519        let (rdap_leg, whois_leg) = tokio::select! {
520            rdap_res = &mut rdap_fut => {
521                // RDAP finished first — give WHOIS a grace period
522                let whois_leg = match tokio_timeout(PROTOCOL_GRACE_PERIOD, whois_fut).await {
523                    Ok(res) => LegOutcome::Completed(res),
524                    Err(_) => {
525                        debug!("WHOIS did not finish within grace period, proceeding with RDAP only");
526                        LegOutcome::GraceTruncated
527                    }
528                };
529                (LegOutcome::Completed(rdap_res), whois_leg)
530            }
531            whois_res = &mut whois_fut => {
532                // WHOIS finished first — give RDAP a grace period
533                let rdap_leg = match tokio_timeout(PROTOCOL_GRACE_PERIOD, rdap_fut).await {
534                    Ok(res) => LegOutcome::Completed(res),
535                    Err(_) => {
536                        debug!("RDAP did not finish within grace period, proceeding with WHOIS only");
537                        LegOutcome::GraceTruncated
538                    }
539                };
540                (rdap_leg, LegOutcome::Completed(whois_res))
541            }
542        };
543
544        // Classify the RDAP leg.
545        let rdap_outcome = match rdap_leg {
546            LegOutcome::Completed(Ok(data)) => {
547                if self.is_rdap_response_useful(&data) {
548                    RdapOutcome::Useful(data)
549                } else {
550                    RdapOutcome::NoData(data)
551                }
552            }
553            LegOutcome::Completed(Err(e)) => RdapOutcome::Error(e),
554            LegOutcome::GraceTruncated => RdapOutcome::GraceTimeout,
555        };
556
557        // Phase 1: If RDAP returned useful data, use it as primary.
558        if let RdapOutcome::Useful(rdap_data) = rdap_outcome {
559            debug!("RDAP lookup successful");
560            let whois_fallback = match whois_leg {
561                LegOutcome::Completed(Ok(w)) => Some(w),
562                _ => None,
563            };
564            return Ok(LookupResult::Rdap {
565                data: Box::new(rdap_data),
566                whois_fallback,
567            });
568        }
569
570        // RDAP was not useful (NoData, Error, or GraceTimeout). Prefer WHOIS
571        // if it returned any response, even a thin one — this is safer than
572        // falling back to the availability heuristic when we have actual
573        // registry data in hand.
574        let (rdap_error_str, rdap_fallback_data, rdap_seer_error) = match rdap_outcome {
575            RdapOutcome::Useful(_) => {
576                // Unreachable in this branch (we returned above), but handle
577                // defensively rather than panicking across the FFI boundary.
578                debug!("Unexpected RdapOutcome::Useful in fallback branch");
579                (String::from("RDAP ok"), None, None)
580            }
581            RdapOutcome::NoData(data) => (
582                "RDAP response incomplete".to_string(),
583                Some(Box::new(data)),
584                None,
585            ),
586            RdapOutcome::Error(e) => (e.to_string(), None, Some(e)),
587            RdapOutcome::GraceTimeout => (
588                format!(
589                    "RDAP did not return within {}s grace period after WHOIS won",
590                    PROTOCOL_GRACE_PERIOD.as_secs()
591                ),
592                None,
593                None,
594            ),
595        };
596
597        if let LegOutcome::Completed(Ok(whois_data)) = whois_leg {
598            // Check Cases A and B: should we reclassify as Available?
599            let availability_match = rdap_seer_error
600                .as_ref()
601                .and_then(|e| classify_whois_leg(&whois_data, e))
602                .or_else(|| {
603                    // Case A can still fire even when RDAP errored for a
604                    // non-404 reason — the WHOIS signal alone is sufficient.
605                    if whois_data.is_available() {
606                        Some(("high", "whois"))
607                    } else {
608                        None
609                    }
610                });
611
612            if let Some((confidence, method)) = availability_match {
613                debug!(
614                    domain = %domain,
615                    confidence = %confidence,
616                    "Reclassifying WHOIS as availability signal"
617                );
618                if let Some(ref cb) = progress {
619                    cb("Domain appears unregistered");
620                }
621                let details = match confidence {
622                    "high" => Some("WHOIS indicates domain is not registered".to_string()),
623                    "medium" => Some(
624                        "WHOIS returned no registrar or registration dates; RDAP returned 404"
625                            .to_string(),
626                    ),
627                    _ => None,
628                };
629                let avail = AvailabilityResult {
630                    domain: domain.to_string(),
631                    available: true,
632                    confidence: confidence.to_string(),
633                    method: method.to_string(),
634                    details,
635                };
636                return Ok(LookupResult::Available {
637                    data: Box::new(avail),
638                    rdap_error: sanitize_error_for_public(&rdap_error_str),
639                    whois_error: String::new(),
640                    whois_data: Some(whois_data),
641                });
642            }
643
644            debug!("Using WHOIS result (RDAP not useful)");
645            if let Some(ref cb) = progress {
646                cb("RDAP not available (using WHOIS)");
647            }
648            return Ok(LookupResult::Whois {
649                data: whois_data,
650                rdap_error: Some(rdap_error_str),
651                rdap_fallback: rdap_fallback_data,
652            });
653        }
654
655        // Both sides failed to provide useful data. Craft a precise WHOIS
656        // error string that distinguishes true errors from grace-period
657        // truncation.
658        let whois_error_str = match whois_leg {
659            LegOutcome::Completed(Err(e)) => e.to_string(),
660            LegOutcome::Completed(Ok(_)) => {
661                // Already handled above; treat defensively.
662                debug!("Unexpected completed-Ok WHOIS in availability fallback branch");
663                "WHOIS returned but was not used".to_string()
664            }
665            LegOutcome::GraceTruncated => format!(
666                "WHOIS did not return within {}s grace period after RDAP won",
667                PROTOCOL_GRACE_PERIOD.as_secs()
668            ),
669        };
670
671        self.availability_fallback(domain, rdap_error_str, whois_error_str, progress)
672            .await
673    }
674
675    async fn availability_fallback(
676        &self,
677        domain: &str,
678        rdap_error: String,
679        whois_error: String,
680        progress: Option<LookupProgressCallback>,
681    ) -> Result<LookupResult> {
682        if let Some(ref cb) = progress {
683            cb("RDAP and WHOIS unavailable (checking availability)");
684        }
685        warn!(
686            domain = %domain,
687            rdap_error = %rdap_error,
688            whois_error = %whois_error,
689            "Both RDAP and WHOIS failed, falling back to availability check"
690        );
691
692        match self.availability_checker.check(domain).await {
693            Ok(avail) => Ok(LookupResult::Available {
694                data: Box::new(avail),
695                rdap_error: sanitize_error_for_public(&rdap_error),
696                whois_error: sanitize_error_for_public(&whois_error),
697                whois_data: None,
698            }),
699            Err(avail_err) => {
700                let tld = get_tld(domain).unwrap_or("unknown");
701                let registry_url = get_registry_url(tld).unwrap_or_else(|| {
702                    format!("https://www.iana.org/domains/root/db/{}.html", tld)
703                });
704                Err(SeerError::LookupFailed {
705                    domain: domain.to_string(),
706                    details: format!(
707                        "RDAP failed ({}), WHOIS failed ({}), availability check failed ({})",
708                        rdap_error, whois_error, avail_err
709                    ),
710                    registry_url,
711                })
712            }
713        }
714    }
715
716    fn is_rdap_response_useful(&self, response: &RdapResponse) -> bool {
717        // Check if we have at least some meaningful data
718        let has_name = response.ldh_name.is_some() || response.unicode_name.is_some();
719        let has_dates = response
720            .events
721            .iter()
722            .any(|e| e.event_action == "registration" || e.event_action == "expiration");
723        let has_entities = !response.entities.is_empty();
724        let has_nameservers = !response.nameservers.is_empty();
725        let has_status = !response.status.is_empty();
726
727        // Consider useful if we have the name plus at least one other piece of info
728        has_name && (has_dates || has_entities || has_nameservers || has_status)
729    }
730}
731
732#[cfg(test)]
733mod tests {
734    use super::*;
735
736    /// Global serialization mutex for the three tests that share
737    /// `LOOKUP_INFLIGHT` state (coalescing, poison recovery, drop recovery).
738    /// Running them in parallel creates two races:
739    ///   1. Guard drop uses `try_lock`; if another test holds the mutex, the
740    ///      Drop path skips cleanup → stale entries fail later assertions.
741    ///   2. Poisoning one test leaves the mutex poisoned for the next test,
742    ///      which is handled by `unwrap_or_else` but still disturbs state.
743    /// Per-test unique keys (see `unique_test_key`) prevent entry-level
744    /// collisions; this mutex prevents lock-contention races on Drop.
745    static INFLIGHT_TEST_SERIAL: Mutex<()> = Mutex::new(());
746
747    #[test]
748    fn test_lookup_result_domain_name_whois() {
749        let result = LookupResult::Whois {
750            data: WhoisResponse {
751                domain: "example.com".to_string(),
752                registrar: Some("Test Registrar".to_string()),
753                registrant: None,
754                organization: None,
755                registrant_email: None,
756                registrant_phone: None,
757                registrant_address: None,
758                registrant_country: None,
759                admin_name: None,
760                admin_organization: None,
761                admin_email: None,
762                admin_phone: None,
763                tech_name: None,
764                tech_organization: None,
765                tech_email: None,
766                tech_phone: None,
767                creation_date: None,
768                expiration_date: None,
769                updated_date: None,
770                status: vec![],
771                nameservers: vec![],
772                dnssec: None,
773                whois_server: "whois.example.com".to_string(),
774                raw_response: String::new(),
775            },
776            rdap_error: None,
777            rdap_fallback: None,
778        };
779
780        assert_eq!(result.domain_name(), Some("example.com".to_string()));
781        assert_eq!(result.registrar(), Some("Test Registrar".to_string()));
782        assert!(result.is_whois());
783        assert!(!result.is_rdap());
784        assert!(!result.is_available());
785    }
786
787    #[test]
788    fn test_lookup_result_serialization() {
789        let result = LookupResult::Whois {
790            data: WhoisResponse {
791                domain: "test.com".to_string(),
792                registrar: None,
793                registrant: None,
794                organization: None,
795                registrant_email: None,
796                registrant_phone: None,
797                registrant_address: None,
798                registrant_country: None,
799                admin_name: None,
800                admin_organization: None,
801                admin_email: None,
802                admin_phone: None,
803                tech_name: None,
804                tech_organization: None,
805                tech_email: None,
806                tech_phone: None,
807                creation_date: None,
808                expiration_date: None,
809                updated_date: None,
810                status: vec![],
811                nameservers: vec![],
812                dnssec: None,
813                whois_server: String::new(),
814                raw_response: String::new(),
815            },
816            rdap_error: Some("RDAP failed".to_string()),
817            rdap_fallback: None,
818        };
819
820        let json = serde_json::to_string(&result).unwrap();
821        assert!(json.contains("\"source\":\"whois\""));
822        assert!(json.contains("RDAP failed"));
823    }
824
825    #[test]
826    fn test_lookup_result_available_serialization() {
827        let result = LookupResult::Available {
828            data: Box::new(AvailabilityResult {
829                domain: "test123.xyz".to_string(),
830                available: true,
831                confidence: "medium".to_string(),
832                method: "whois_error".to_string(),
833                details: Some("WHOIS server indicates no matching records".to_string()),
834            }),
835            rdap_error: "RDAP failed".to_string(),
836            whois_error: "WHOIS failed".to_string(),
837            whois_data: None,
838        };
839
840        let json = serde_json::to_string(&result).unwrap();
841        assert!(json.contains("\"source\":\"available\""));
842        assert!(json.contains("\"available\":true"));
843        assert!(json.contains("test123.xyz"));
844
845        assert_eq!(result.domain_name(), Some("test123.xyz".to_string()));
846        assert!(result.is_available());
847        assert!(!result.is_rdap());
848        assert!(!result.is_whois());
849        assert!(result.registrar().is_none());
850        assert_eq!(result.expiration_info(), (None, None));
851    }
852
853    #[test]
854    #[allow(deprecated)]
855    fn test_smart_lookup_builder() {
856        let lookup = SmartLookup::new().prefer_rdap(false).include_fallback(true);
857        assert!(!lookup.prefer_rdap);
858        assert!(lookup.include_fallback);
859    }
860
861    #[test]
862    fn test_lookup_cache_clear() {
863        SmartLookup::clear_cache();
864        assert!(LOOKUP_CACHE.is_empty());
865    }
866
867    // ---------------- sanitize_error_for_public ----------------
868
869    #[test]
870    fn test_sanitize_strips_ipv4() {
871        let msg = "RDAP URL resolves to reserved IP 10.0.0.1 which is forbidden";
872        let sanitized = sanitize_error_for_public(msg);
873        assert!(
874            !sanitized.contains("10.0.0.1"),
875            "IPv4 should be stripped, got: {}",
876            sanitized
877        );
878        assert!(sanitized.contains("[ip-redacted]"));
879    }
880
881    #[test]
882    fn test_sanitize_strips_multiple_ipv4() {
883        let msg = "Could not connect to 192.168.1.1 after trying 127.0.0.1";
884        let sanitized = sanitize_error_for_public(msg);
885        assert!(!sanitized.contains("192.168.1.1"));
886        assert!(!sanitized.contains("127.0.0.1"));
887        // Two redactions expected.
888        assert_eq!(sanitized.matches("[ip-redacted]").count(), 2);
889    }
890
891    #[test]
892    fn test_sanitize_strips_ipv6() {
893        let msg = "RDAP URL resolves to reserved IP fe80::1 which is forbidden";
894        let sanitized = sanitize_error_for_public(msg);
895        assert!(!sanitized.contains("fe80::1"));
896        assert!(sanitized.contains("[ip-redacted]"));
897    }
898
899    #[test]
900    fn sanitize_leaves_mac_address_like_tokens_alone() {
901        let msg = "error code af:ba:12 at line 5";
902        let out = sanitize_error_for_public(msg);
903        assert!(
904            out.contains("af:ba:12"),
905            "MAC fragment should not be stripped: {}",
906            out
907        );
908    }
909
910    #[test]
911    fn sanitize_strips_real_ipv6() {
912        let msg = "cannot reach 2001:db8::1 — timeout";
913        let out = sanitize_error_for_public(msg);
914        assert!(!out.contains("2001:db8::1"));
915        assert!(out.contains("[ip-redacted]"));
916    }
917
918    #[test]
919    fn sanitize_strips_fe80_link_local() {
920        let msg = "peer at fe80::1 unreachable";
921        let out = sanitize_error_for_public(msg);
922        assert!(out.contains("[ip-redacted]"));
923    }
924
925    #[test]
926    fn test_sanitize_truncates_long_message() {
927        // Build a 500-char message with no IPs.
928        let long = "a".repeat(500);
929        let sanitized = sanitize_error_for_public(&long);
930        // Should cap at MAX_PUBLIC_ERROR_LEN chars + ellipsis.
931        let char_count = sanitized.chars().count();
932        assert_eq!(char_count, MAX_PUBLIC_ERROR_LEN + 1);
933        assert!(sanitized.ends_with('…'));
934    }
935
936    #[test]
937    fn test_sanitize_preserves_short_messages() {
938        let msg = "RDAP timed out after 15s";
939        let sanitized = sanitize_error_for_public(msg);
940        assert_eq!(sanitized, msg);
941    }
942
943    // ---------------- RdapOutcome classification ----------------
944
945    #[test]
946    fn test_is_rdap_response_useful_detects_no_data() {
947        use crate::rdap::RdapResponse;
948        // Construct a response with a name but no events, entities, NS, or status
949        // — this is the "200 OK but no useful fields" case that should be
950        // classified as RdapOutcome::NoData (not Useful, not Error).
951        let resp = RdapResponse {
952            ldh_name: Some("example.com".to_string()),
953            ..Default::default()
954        };
955        let lookup = SmartLookup::new();
956        assert!(
957            !lookup.is_rdap_response_useful(&resp),
958            "Response with only a name should be classified as NoData"
959        );
960
961        // And one with a name + status IS useful (sanity check).
962        let useful = RdapResponse {
963            ldh_name: Some("example.com".to_string()),
964            status: vec!["active".to_string()],
965            ..Default::default()
966        };
967        assert!(lookup.is_rdap_response_useful(&useful));
968    }
969
970    // ---------------- Coalescing ----------------
971
972    // Verifies that when multiple concurrent lookups hit the in-flight map
973    // for the same domain, later arrivals observe the existing Weak<Notify>
974    // and become waiters rather than racing a second lookup. We test the
975    // map-level primitive here because the full SmartLookup pipeline
976    // requires network access to exercise.
977    #[tokio::test]
978    async fn test_inflight_coalescing_map() {
979        // Serialize with sibling poisoning tests: we share LOOKUP_INFLIGHT
980        // state, and `InflightGuard::drop` uses `try_lock` — if a sibling
981        // holds the mutex during drop, cleanup is skipped and assertions
982        // fail.
983        let _serial = INFLIGHT_TEST_SERIAL
984            .lock()
985            .unwrap_or_else(|p| p.into_inner());
986        // Poison-tolerant: the sibling poisoning regression tests may run
987        // earlier under `cargo test` parallelism and leave LOOKUP_INFLIGHT
988        // poisoned. The production code recovers via `unwrap_or_else`,
989        // so this test does the same.
990        //
991        // Use a per-run unique key so this test cannot race with the other
992        // tests that touch LOOKUP_INFLIGHT. Previously we `clear()`ed the
993        // whole map, which raced with peer tests' entries.
994        let domain = unique_test_key("__coalesce");
995
996        // Defensive: ensure our specific key is not present.
997        {
998            let mut m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
999            m.remove(&domain);
1000        }
1001
1002        // First caller: no entry → becomes owner.
1003        let owner_notify = Arc::new(Notify::new());
1004        {
1005            let mut m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1006            assert!(m.get(&domain).and_then(|w| w.upgrade()).is_none());
1007            m.insert(domain.clone(), Arc::downgrade(&owner_notify));
1008        }
1009
1010        // Second caller: sees the existing Weak and upgrades.
1011        let waiter = {
1012            let m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1013            m.get(&domain)
1014                .and_then(|w| w.upgrade())
1015                .expect("Second caller must observe in-flight entry")
1016        };
1017
1018        // Waiter listens in the background.
1019        let waiter_clone = waiter.clone();
1020        let handle = tokio::spawn(async move {
1021            waiter_clone.notified().await;
1022        });
1023
1024        // Simulate owner completing.
1025        tokio::time::sleep(Duration::from_millis(20)).await;
1026        {
1027            let mut m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1028            m.remove(&domain);
1029        }
1030        owner_notify.notify_waiters();
1031
1032        // Waiter should unblock quickly.
1033        tokio::time::timeout(Duration::from_secs(1), handle)
1034            .await
1035            .expect("waiter must unblock after notify")
1036            .expect("waiter task joined cleanly");
1037
1038        // After owner removes entry and drops its Arc, the Weak is dead.
1039        drop(owner_notify);
1040        drop(waiter);
1041        let m = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1042        assert!(m.get(&domain).and_then(|w| w.upgrade()).is_none());
1043    }
1044
1045    /// Builds a domain key guaranteed unique per test invocation, so that
1046    /// tests touching the shared LOOKUP_INFLIGHT static never collide when
1047    /// `cargo test` runs them in parallel. We include a nanosecond timestamp
1048    /// plus an atomic counter to defeat even hash-identical calls within the
1049    /// same nanosecond.
1050    fn unique_test_key(prefix: &str) -> String {
1051        use std::sync::atomic::{AtomicU64, Ordering};
1052        use std::time::{SystemTime, UNIX_EPOCH};
1053        static COUNTER: AtomicU64 = AtomicU64::new(0);
1054        let nanos = SystemTime::now()
1055            .duration_since(UNIX_EPOCH)
1056            .map(|d| d.as_nanos())
1057            .unwrap_or(0);
1058        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
1059        format!("{}_{}_{}.example.", prefix, nanos, n)
1060    }
1061
1062    // Demonstrates that the `sanitize_error_for_public` helper is applied
1063    // to the rdap_error / whois_error fields written into the `Available`
1064    // variant. We check the call site indirectly: construct a Available
1065    // manually and then verify a raw error with an IP becomes redacted.
1066    // (Integration via real clients would require network.)
1067    #[test]
1068    fn test_sanitize_applied_to_available_fields() {
1069        let rdap_raw = "RDAP URL resolves to reserved IP 10.0.0.1";
1070        let whois_raw = "connection refused at 192.168.0.5";
1071        let sanitized_rdap = sanitize_error_for_public(rdap_raw);
1072        let sanitized_whois = sanitize_error_for_public(whois_raw);
1073        let result = LookupResult::Available {
1074            data: Box::new(AvailabilityResult {
1075                domain: "unreg.test".to_string(),
1076                available: true,
1077                confidence: "low".to_string(),
1078                method: "heuristic".to_string(),
1079                details: None,
1080            }),
1081            rdap_error: sanitized_rdap,
1082            whois_error: sanitized_whois,
1083            whois_data: None,
1084        };
1085        if let LookupResult::Available {
1086            rdap_error,
1087            whois_error,
1088            ..
1089        } = result
1090        {
1091            assert!(!rdap_error.contains("10.0.0.1"));
1092            assert!(!whois_error.contains("192.168.0.5"));
1093            assert!(rdap_error.contains("[ip-redacted]"));
1094            assert!(whois_error.contains("[ip-redacted]"));
1095        } else {
1096            panic!("expected Available variant");
1097        }
1098    }
1099
1100    #[test]
1101    fn rdap_error_is_404_matches_standard_404() {
1102        let e = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1103        assert!(rdap_error_is_404(&e));
1104    }
1105
1106    #[test]
1107    fn rdap_error_is_404_matches_without_reason_phrase() {
1108        let e = SeerError::RdapError("query failed with status 404".to_string());
1109        assert!(rdap_error_is_404(&e));
1110    }
1111
1112    #[test]
1113    fn rdap_error_is_404_rejects_other_statuses() {
1114        let e = SeerError::RdapError("query failed with status 500 Server Error".to_string());
1115        assert!(!rdap_error_is_404(&e));
1116        let e = SeerError::RdapError("query failed with status 400 Bad Request".to_string());
1117        assert!(!rdap_error_is_404(&e));
1118    }
1119
1120    #[test]
1121    fn rdap_error_is_404_rejects_non_http_errors() {
1122        let e = SeerError::RdapError("connection timeout".to_string());
1123        assert!(!rdap_error_is_404(&e));
1124        let e = SeerError::Timeout("rdap".to_string());
1125        assert!(!rdap_error_is_404(&e));
1126    }
1127
1128    #[test]
1129    fn rdap_error_is_404_rejects_incidental_404_in_message() {
1130        // A 404 substring inside a non-status context must not match.
1131        let e = SeerError::RdapError("error 40404: database corruption".to_string());
1132        assert!(!rdap_error_is_404(&e));
1133    }
1134
1135    // ---------------- whois_response_is_thin ----------------
1136
1137    fn empty_whois(domain: &str) -> WhoisResponse {
1138        WhoisResponse {
1139            domain: domain.to_string(),
1140            registrar: None,
1141            registrant: None,
1142            organization: None,
1143            registrant_email: None,
1144            registrant_phone: None,
1145            registrant_address: None,
1146            registrant_country: None,
1147            admin_name: None,
1148            admin_organization: None,
1149            admin_email: None,
1150            admin_phone: None,
1151            tech_name: None,
1152            tech_organization: None,
1153            tech_email: None,
1154            tech_phone: None,
1155            creation_date: None,
1156            expiration_date: None,
1157            updated_date: None,
1158            nameservers: vec![],
1159            status: vec![],
1160            dnssec: None,
1161            whois_server: String::new(),
1162            raw_response: String::new(),
1163        }
1164    }
1165
1166    #[test]
1167    fn whois_response_is_thin_when_all_key_fields_missing() {
1168        let w = empty_whois("example.com");
1169        assert!(whois_response_is_thin(&w));
1170    }
1171
1172    #[test]
1173    fn whois_response_is_not_thin_when_registrar_present() {
1174        let mut w = empty_whois("example.com");
1175        w.registrar = Some("Test Registrar".to_string());
1176        assert!(!whois_response_is_thin(&w));
1177    }
1178
1179    #[test]
1180    fn whois_response_is_not_thin_when_creation_date_present() {
1181        let mut w = empty_whois("example.com");
1182        w.creation_date = Some(Utc::now());
1183        assert!(!whois_response_is_thin(&w));
1184    }
1185
1186    #[test]
1187    fn whois_response_is_not_thin_when_expiration_date_present() {
1188        let mut w = empty_whois("example.com");
1189        w.expiration_date = Some(Utc::now());
1190        assert!(!whois_response_is_thin(&w));
1191    }
1192
1193    #[test]
1194    fn whois_response_is_thin_even_with_nameservers_alone() {
1195        let mut w = empty_whois("example.com");
1196        w.nameservers = vec!["ns1.example.net".to_string()];
1197        assert!(whois_response_is_thin(&w));
1198    }
1199
1200    // ---------------- classify_whois_leg ----------------
1201
1202    use crate::rdap::RdapResponse;
1203
1204    #[allow(dead_code)]
1205    fn make_empty_rdap_response() -> RdapResponse {
1206        serde_json::from_value(serde_json::json!({
1207            "objectClassName": "domain",
1208        }))
1209        .expect("valid minimal RDAP response")
1210    }
1211
1212    #[test]
1213    fn classify_whois_leg_case_a_high_confidence() {
1214        let mut w = empty_whois("zaccodes.com");
1215        w.raw_response = "No match for \"ZACCODES.COM\".".to_string();
1216        assert!(w.is_available());
1217        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1218        let (verdict, method) =
1219            classify_whois_leg(&w, &rdap_err).expect("expected a routing decision");
1220        assert_eq!(verdict, "high");
1221        assert_eq!(method, "whois");
1222    }
1223
1224    #[test]
1225    fn classify_whois_leg_case_b_medium_confidence() {
1226        let w = empty_whois("example.xyz");
1227        assert!(!w.is_available(), "this WHOIS body has no 'no match' text");
1228        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1229        let (verdict, method) =
1230            classify_whois_leg(&w, &rdap_err).expect("expected a routing decision");
1231        assert_eq!(verdict, "medium");
1232        assert_eq!(method, "whois_thin_response");
1233    }
1234
1235    #[test]
1236    fn classify_whois_leg_rejects_thin_whois_without_404() {
1237        let w = empty_whois("example.xyz");
1238        let rdap_err = SeerError::RdapError("connection timeout".to_string());
1239        assert!(classify_whois_leg(&w, &rdap_err).is_none());
1240    }
1241
1242    #[test]
1243    fn classify_whois_leg_rejects_whois_with_real_data() {
1244        let mut w = empty_whois("legacy.tld");
1245        w.registrar = Some("Legacy Registry".to_string());
1246        w.creation_date = Some(Utc::now());
1247        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1248        assert!(classify_whois_leg(&w, &rdap_err).is_none());
1249    }
1250
1251    #[test]
1252    fn classify_whois_leg_case_a_wins_over_case_b() {
1253        let mut w = empty_whois("example.com");
1254        w.raw_response = "No match for \"EXAMPLE.COM\".".to_string();
1255        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1256        let (verdict, _) = classify_whois_leg(&w, &rdap_err).unwrap();
1257        assert_eq!(verdict, "high");
1258    }
1259
1260    // ---------------- Mutex poisoning recovery ----------------
1261
1262    /// Regression: a panic inside `LOOKUP_INFLIGHT.lock()` must not wedge
1263    /// the tracker forever. After the mutex is poisoned, subsequent
1264    /// acquisition attempts must still succeed via `unwrap_or_else`.
1265    ///
1266    /// This isolates the lookup_with_progress acquisition site (formerly a
1267    /// `.expect("LOOKUP_INFLIGHT mutex poisoned")`) by exercising the same
1268    /// `.lock().unwrap_or_else(|p| p.into_inner())` pattern directly.
1269    #[test]
1270    fn lookup_inflight_recovers_from_poisoned_mutex() {
1271        use std::panic::{catch_unwind, AssertUnwindSafe};
1272
1273        // Serialize with sibling tests that also touch LOOKUP_INFLIGHT.
1274        let _serial = INFLIGHT_TEST_SERIAL
1275            .lock()
1276            .unwrap_or_else(|p| p.into_inner());
1277
1278        // Poison the real static by panicking while holding the guard.
1279        let _ = catch_unwind(AssertUnwindSafe(|| {
1280            let _guard = LOOKUP_INFLIGHT.lock().unwrap();
1281            panic!("poisoning LOOKUP_INFLIGHT for test");
1282        }));
1283
1284        // At this point LOOKUP_INFLIGHT is poisoned. Plain .lock() would
1285        // return Err(PoisonError). The recovery pattern used in
1286        // lookup_with_progress must still yield a usable guard.
1287        let mut guard = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1288        // Use a per-run unique canary so parallel tests cannot collide.
1289        let canary = unique_test_key("__poison_recovery");
1290        guard.insert(canary.clone(), Weak::new());
1291        assert!(guard.contains_key(&canary));
1292        guard.remove(&canary);
1293    }
1294
1295    /// Regression: InflightGuard::drop must also tolerate mutex poisoning
1296    /// without panicking — the Poisoned arm should still remove the entry.
1297    #[test]
1298    fn inflight_guard_drop_recovers_from_poisoned_mutex() {
1299        use std::panic::{catch_unwind, AssertUnwindSafe};
1300
1301        // Serialize with sibling tests that also touch LOOKUP_INFLIGHT —
1302        // the critical race was `InflightGuard::drop` using `try_lock`
1303        // and silently skipping cleanup when a parallel test held the
1304        // mutex, leaving this test's entry in the map and failing the
1305        // final assertion.
1306        let _serial = INFLIGHT_TEST_SERIAL
1307            .lock()
1308            .unwrap_or_else(|p| p.into_inner());
1309
1310        // Seed an entry and arm a guard for it. Use a per-run unique key
1311        // so this test can never collide with siblings under parallel
1312        // `cargo test` — previously a hard-coded key raced with the peer
1313        // coalescing test's `m.clear()` call.
1314        let key = unique_test_key("__drop_poison");
1315        let notify = Arc::new(Notify::new());
1316        {
1317            let mut map = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1318            map.insert(key.clone(), Arc::downgrade(&notify));
1319        }
1320        let guard = InflightGuard {
1321            key: key.clone(),
1322            notify: notify.clone(),
1323        };
1324
1325        // Poison the mutex.
1326        let _ = catch_unwind(AssertUnwindSafe(|| {
1327            let _g = LOOKUP_INFLIGHT.lock().unwrap();
1328            panic!("poisoning LOOKUP_INFLIGHT for drop test");
1329        }));
1330
1331        // Dropping the guard must not panic and must remove the entry via
1332        // the Poisoned branch of the new try_lock match.
1333        drop(guard);
1334
1335        let map = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
1336        assert!(
1337            !map.contains_key(&key),
1338            "poisoned-mutex drop path should still remove the in-flight entry"
1339        );
1340    }
1341}