Skip to main content

seer_core/
lookup.rs

1use std::collections::HashMap;
2use std::net::Ipv6Addr;
3use std::str::FromStr;
4use std::sync::{Arc, Mutex, Weak};
5use std::time::Duration;
6
7use chrono::{DateTime, Utc};
8use once_cell::sync::Lazy;
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use tokio::sync::Notify;
12use tracing::{debug, instrument, warn};
13
14use tokio::time::timeout as tokio_timeout;
15
16use crate::availability::{AvailabilityChecker, AvailabilityResult};
17use crate::cache::TtlCache;
18use crate::error::{Result, SeerError};
19use crate::rdap::{RdapClient, RdapResponse};
20use crate::whois::{get_registry_url, get_tld, WhoisClient, WhoisResponse};
21
22/// Cache TTL for lookup results (5 minutes).
23const LOOKUP_CACHE_TTL: Duration = Duration::from_secs(5 * 60);
24
25/// Grace period for the second protocol after the first one finishes.
26/// If WHOIS finishes and RDAP hasn't responded within this window, we
27/// use the WHOIS result rather than waiting the full RDAP timeout.
28const PROTOCOL_GRACE_PERIOD: Duration = Duration::from_secs(5);
29
30/// Maximum length for public-facing error strings.
31const MAX_PUBLIC_ERROR_LEN: usize = 256;
32
33/// Global cache for lookup results to avoid redundant network calls.
34static LOOKUP_CACHE: Lazy<TtlCache<String, LookupResult>> =
35    Lazy::new(|| TtlCache::new(LOOKUP_CACHE_TTL));
36
37/// In-flight lookup coalescing map: normalized-domain -> Weak<Notify>.
38/// Only one network race runs per unique domain at a time; concurrent callers
39/// wait on the shared Notify and then read the result from LOOKUP_CACHE.
40static LOOKUP_INFLIGHT: Lazy<Mutex<HashMap<String, Weak<Notify>>>> =
41    Lazy::new(|| Mutex::new(HashMap::new()));
42
43/// Regex patterns for stripping IP literals from public error messages.
44static IPV4_RE: Lazy<Regex> =
45    Lazy::new(|| Regex::new(r"\b(?:\d{1,3}\.){3}\d{1,3}\b").expect("IPV4_RE is a valid regex"));
46
47/// Candidate pattern for IPv6 literals: a hex/colon token containing either
48/// a `::` compression or at least three colons. This catches plausible IPv6
49/// addresses cheaply; each match is then validated by `Ipv6Addr::from_str`
50/// before redaction, so MAC fragments, hex hashes, and similar colon-laden
51/// tokens are left alone.
52static IPV6_CANDIDATE_RE: Lazy<Regex> = Lazy::new(|| {
53    Regex::new(r"\b[0-9a-fA-F:]*(?:::|(?:[0-9a-fA-F]{1,4}:){3,})[0-9a-fA-F:]*\b")
54        .expect("IPV6_CANDIDATE_RE is a valid regex")
55});
56
57/// Redact substrings that parse as valid IPv6 addresses, leaving non-IPv6
58/// tokens (e.g. `af:ba:12`) untouched.
59fn strip_ipv6(msg: &str) -> String {
60    IPV6_CANDIDATE_RE
61        .replace_all(msg, |caps: &regex::Captures| {
62            let candidate = &caps[0];
63            if Ipv6Addr::from_str(candidate).is_ok() {
64                "[ip-redacted]".to_string()
65            } else {
66                candidate.to_string()
67            }
68        })
69        .into_owned()
70}
71
72/// Test-only hook: counts the number of times `lookup_concurrent` is actually
73/// invoked (i.e., the underlying network race runs). Used to verify request
74/// coalescing. Not exposed outside the crate.
75#[cfg(test)]
76static LOOKUP_CONCURRENT_CALLS: Lazy<std::sync::atomic::AtomicUsize> =
77    Lazy::new(|| std::sync::atomic::AtomicUsize::new(0));
78
79/// Returns true if the error is an RDAP HTTP 404 response, indicating the
80/// registry's RDAP server has no entry for this domain. Other RDAP errors
81/// (timeouts, 5xx, connection failures, etc.) do NOT match — they mean "we
82/// don't know", not "not registered".
83///
84/// Matches the format produced by `seer-core/src/rdap/client.rs:603`:
85/// `"query failed with status 404 ..."`.
86fn rdap_error_is_404(err: &SeerError) -> bool {
87    if let SeerError::RdapError(msg) = err {
88        msg.contains("query failed with status 404")
89    } else {
90        false
91    }
92}
93
94/// Returns true if the parsed WHOIS response lacks all key registration
95/// signals: no registrar, no creation date, and no expiration date.
96///
97/// This is a necessary-but-not-sufficient signal for domain availability;
98/// `lookup_concurrent` combines it with an RDAP 404 before routing to the
99/// availability path. Nameservers alone don't disqualify thinness — some
100/// registries return placeholder nameservers for unregistered domains.
101fn whois_response_is_thin(w: &WhoisResponse) -> bool {
102    w.registrar.is_none() && w.creation_date.is_none() && w.expiration_date.is_none()
103}
104
105/// Decides whether a WHOIS response + RDAP error combination should route
106/// to the availability path. Returns `(confidence, method)` when routing is
107/// warranted, `None` to keep the existing `LookupResult::Whois` behavior.
108///
109/// Case A: WHOIS explicitly indicates no registration (highest priority).
110/// Case B: WHOIS returned but lacks registration data AND RDAP returned 404.
111fn classify_whois_leg(
112    w: &WhoisResponse,
113    rdap_err: &SeerError,
114) -> Option<(&'static str, &'static str)> {
115    if w.is_available() {
116        return Some(("high", "whois"));
117    }
118    if whois_response_is_thin(w) && rdap_error_is_404(rdap_err) {
119        return Some(("medium", "whois_thin_response"));
120    }
121    None
122}
123
124/// Sanitizes an error message for inclusion in a public-facing response.
125///
126/// Strips IPv4 and IPv6 literals (to avoid leaking internal addresses when
127/// an SSRF guard rejects a resolved URL) and caps the total length to
128/// [`MAX_PUBLIC_ERROR_LEN`] characters.
129fn sanitize_error_for_public(msg: &str) -> String {
130    let s = IPV4_RE.replace_all(msg, "[ip-redacted]");
131    let s = strip_ipv6(&s);
132    if s.chars().count() > MAX_PUBLIC_ERROR_LEN {
133        let mut trunc: String = s.chars().take(MAX_PUBLIC_ERROR_LEN).collect();
134        trunc.push('…');
135        trunc
136    } else {
137        s
138    }
139}
140
141/// RAII guard for the in-flight-lookup slot. On drop, removes the entry
142/// from `LOOKUP_INFLIGHT` and notifies any waiters so they can read the
143/// freshly-populated cache.
144///
145/// NOTE on failed-owner retry semantics:
146/// When the owning task's lookup fails, `InflightGuard::drop` runs, the
147/// `HashMap` entry is removed, and `notify_waiters()` fires. Waiters wake,
148/// observe an empty cache, and one of them becomes the new owner — triggering
149/// a fresh network race. This means transient failures are automatically
150/// retried by any concurrent waiter. Callers that observe a timeout error
151/// should not assume no work is in flight; another concurrent caller may
152/// already be retrying.
153struct InflightGuard {
154    key: String,
155    notify: Arc<Notify>,
156}
157
158impl Drop for InflightGuard {
159    fn drop(&mut self) {
160        // Recover from mutex poisoning rather than leaking the HashMap entry.
161        // A poisoned mutex here would otherwise strand the key in the map
162        // until process exit, permanently blocking future lookups for this
163        // domain from acquiring ownership.
164        let mut inflight = LOOKUP_INFLIGHT.lock().unwrap_or_else(|p| p.into_inner());
165        inflight.remove(&self.key);
166        self.notify.notify_waiters();
167    }
168}
169
170/// Internal classification of the RDAP leg of a concurrent lookup.
171///
172/// Distinguishing `NoData` (HTTP 200 but response was missing useful fields)
173/// from `Error` lets the orchestrator prefer a thin WHOIS result over the
174/// availability fallback when RDAP silently returned nothing.
175enum RdapOutcome {
176    Useful(RdapResponse),
177    NoData(RdapResponse),
178    Error(SeerError),
179    /// RDAP future did not complete within the grace period after the other
180    /// protocol finished.
181    GraceTimeout,
182}
183
184/// Progress callback for smart lookup operations.
185/// Called with a message describing the current phase of the lookup.
186pub type LookupProgressCallback = Arc<dyn Fn(&str) + Send + Sync>;
187
188#[derive(Debug, Clone, Serialize, Deserialize)]
189#[serde(tag = "source", rename_all = "lowercase")]
190pub enum LookupResult {
191    Rdap {
192        data: Box<RdapResponse>,
193        #[serde(skip_serializing_if = "Option::is_none")]
194        whois_fallback: Option<WhoisResponse>,
195    },
196    Whois {
197        data: WhoisResponse,
198        rdap_error: Option<String>,
199        #[serde(skip_serializing_if = "Option::is_none")]
200        rdap_fallback: Option<Box<RdapResponse>>,
201    },
202    Available {
203        data: Box<AvailabilityResult>,
204        rdap_error: String,
205        whois_error: String,
206        /// Raw WHOIS response, when one was available at routing time
207        /// (Cases A and B in the design spec). `None` preserves the
208        /// pre-existing "both protocols errored" semantics.
209        #[serde(default, skip_serializing_if = "Option::is_none")]
210        whois_data: Option<WhoisResponse>,
211    },
212}
213
214impl LookupResult {
215    /// Returns the domain name from the lookup result.
216    pub fn domain_name(&self) -> Option<String> {
217        match self {
218            LookupResult::Rdap { data, .. } => data.domain_name().map(String::from),
219            LookupResult::Whois { data, .. } => Some(data.domain.clone()),
220            LookupResult::Available { data, .. } => Some(data.domain.clone()),
221        }
222    }
223
224    /// Returns the registrar name, preferring RDAP data with WHOIS fallback.
225    pub fn registrar(&self) -> Option<String> {
226        match self {
227            LookupResult::Rdap {
228                data,
229                whois_fallback,
230            } => data
231                .get_registrar()
232                .or_else(|| whois_fallback.as_ref().and_then(|w| w.registrar.clone())),
233            LookupResult::Whois { data, .. } => data.registrar.clone(),
234            LookupResult::Available { .. } => None,
235        }
236    }
237
238    /// Returns the registrant organization, preferring RDAP data with WHOIS fallback.
239    pub fn organization(&self) -> Option<String> {
240        match self {
241            LookupResult::Rdap {
242                data,
243                whois_fallback,
244            } => data
245                .get_registrant_organization()
246                .or_else(|| whois_fallback.as_ref().and_then(|w| w.organization.clone())),
247            LookupResult::Whois { data, .. } => data.organization.clone(),
248            LookupResult::Available { .. } => None,
249        }
250    }
251
252    /// Returns true if the result came from RDAP.
253    pub fn is_rdap(&self) -> bool {
254        matches!(self, LookupResult::Rdap { .. })
255    }
256
257    /// Returns true if the result came from WHOIS.
258    pub fn is_whois(&self) -> bool {
259        matches!(self, LookupResult::Whois { .. })
260    }
261
262    /// Returns true if the result is an availability check fallback.
263    pub fn is_available(&self) -> bool {
264        matches!(self, LookupResult::Available { .. })
265    }
266
267    /// Returns the expiration date and registrar info from the lookup result.
268    pub fn expiration_info(&self) -> (Option<DateTime<Utc>>, Option<String>) {
269        match self {
270            LookupResult::Rdap {
271                data,
272                whois_fallback,
273            } => {
274                // Try to get expiration from RDAP events
275                let expiration_date = data
276                    .events
277                    .iter()
278                    .find(|e| e.event_action == "expiration")
279                    .and_then(|e| e.parsed_date())
280                    .or_else(|| {
281                        // Fallback to WHOIS if available
282                        whois_fallback.as_ref().and_then(|w| w.expiration_date)
283                    });
284
285                let registrar = data
286                    .get_registrar()
287                    .or_else(|| whois_fallback.as_ref().and_then(|w| w.registrar.clone()));
288
289                (expiration_date, registrar)
290            }
291            LookupResult::Whois { data, .. } => (data.expiration_date, data.registrar.clone()),
292            LookupResult::Available { .. } => (None, None),
293        }
294    }
295}
296
297/// Before caching, trim raw WHOIS response to limit cache memory.
298/// A full WHOIS raw_response can be up to 1 MB; we cap it at 32 KB which is
299/// plenty for the parsed fields while preventing the cache from ballooning.
300fn trim_for_cache(mut result: LookupResult) -> LookupResult {
301    const MAX_RAW: usize = 32 * 1024;
302
303    match result {
304        LookupResult::Whois { ref mut data, .. } => {
305            if data.raw_response.len() > MAX_RAW {
306                data.raw_response.truncate(MAX_RAW);
307                data.raw_response.push_str("\n... [truncated for cache]");
308            }
309        }
310        LookupResult::Rdap {
311            ref mut whois_fallback,
312            ..
313        } => {
314            if let Some(ref mut w) = whois_fallback {
315                if w.raw_response.len() > MAX_RAW {
316                    w.raw_response.truncate(MAX_RAW);
317                    w.raw_response.push_str("\n... [truncated for cache]");
318                }
319            }
320        }
321        LookupResult::Available {
322            ref mut whois_data, ..
323        } => {
324            if let Some(ref mut w) = whois_data {
325                if w.raw_response.len() > MAX_RAW {
326                    w.raw_response.truncate(MAX_RAW);
327                    w.raw_response.push_str("\n... [truncated for cache]");
328                }
329            }
330        }
331    }
332
333    result
334}
335
336#[derive(Debug, Clone)]
337pub struct SmartLookup {
338    rdap_client: RdapClient,
339    whois_client: WhoisClient,
340    availability_checker: AvailabilityChecker,
341    /// Deprecated: both protocols are now always attempted concurrently.
342    prefer_rdap: bool,
343    /// Deprecated: WHOIS data is now always attached when available.
344    include_fallback: bool,
345}
346
347impl Default for SmartLookup {
348    fn default() -> Self {
349        Self::new()
350    }
351}
352
353impl SmartLookup {
354    /// Creates a new SmartLookup that runs RDAP and WHOIS concurrently,
355    /// falling back to an availability check if both fail.
356    pub fn new() -> Self {
357        Self {
358            rdap_client: RdapClient::new(),
359            whois_client: WhoisClient::new(),
360            availability_checker: AvailabilityChecker::new(),
361            prefer_rdap: true,
362            include_fallback: false,
363        }
364    }
365
366    /// Deprecated: both protocols are now always attempted concurrently.
367    /// This method is kept for API compatibility but has no effect.
368    #[deprecated(note = "This field has no effect. RDAP is always tried concurrently with WHOIS.")]
369    pub fn prefer_rdap(mut self, prefer: bool) -> Self {
370        self.prefer_rdap = prefer;
371        self
372    }
373
374    /// Deprecated: WHOIS data is now always attached when available.
375    /// This method is kept for API compatibility but has no effect.
376    #[deprecated(note = "This field has no effect. RDAP is always tried concurrently with WHOIS.")]
377    pub fn include_fallback(mut self, include: bool) -> Self {
378        self.include_fallback = include;
379        self
380    }
381
382    /// Performs a smart lookup for a domain, trying both RDAP and WHOIS concurrently.
383    /// Falls back to an availability check if both fail.
384    /// Results are cached for 5 minutes to avoid redundant network calls.
385    #[instrument(skip(self), fields(domain = %domain))]
386    pub async fn lookup(&self, domain: &str) -> Result<LookupResult> {
387        self.lookup_with_progress(domain, None).await
388    }
389
390    /// Performs a lookup with an optional progress callback.
391    /// The callback is called with messages describing the current phase.
392    /// Results are cached for 5 minutes. Concurrent lookups for the same
393    /// domain are coalesced — only one network race runs per domain at a time.
394    #[instrument(skip(self, progress), fields(domain = %domain))]
395    pub async fn lookup_with_progress(
396        &self,
397        domain: &str,
398        progress: Option<LookupProgressCallback>,
399    ) -> Result<LookupResult> {
400        let normalized = crate::validation::normalize_domain(domain)?;
401
402        // Check cache first
403        if let Some(cached) = LOOKUP_CACHE.get(&normalized) {
404            debug!(domain = %normalized, "Returning cached lookup result");
405            return Ok(cached);
406        }
407
408        // Coalesce in-flight lookups: if another task is already running a
409        // race for this domain, wait on its Notify rather than starting a
410        // second race. Two branches:
411        //   - Waiter: another task owns the slot; await its notify, then
412        //     read the cache. If the cache is still empty (owner failed),
413        //     loop and re-contend for ownership.
414        //   - Owner: no entry exists; insert a Weak handle, hold the Arc
415        //     for the duration of the work, then remove and notify on drop.
416        //
417        // A `loop` with a separate lock-scope per iteration keeps the
418        // `MutexGuard` from being held across any `.await`.
419        let _guard = loop {
420            enum Slot {
421                Waiter(Arc<Notify>),
422                Owner(InflightGuard),
423            }
424
425            let slot = {
426                let mut inflight = LOOKUP_INFLIGHT
427                    .lock()
428                    .expect("LOOKUP_INFLIGHT mutex poisoned");
429                match inflight.get(&normalized).and_then(|w| w.upgrade()) {
430                    Some(existing) => Slot::Waiter(existing),
431                    None => {
432                        let n = Arc::new(Notify::new());
433                        inflight.insert(normalized.clone(), Arc::downgrade(&n));
434                        Slot::Owner(InflightGuard {
435                            key: normalized.clone(),
436                            notify: n,
437                        })
438                    }
439                }
440            };
441
442            match slot {
443                Slot::Waiter(n) => {
444                    debug!(domain = %normalized, "Waiting for in-flight lookup to complete");
445                    n.notified().await;
446                    if let Some(cached) = LOOKUP_CACHE.get(&normalized) {
447                        return Ok(cached);
448                    }
449                    // Owner finished without populating the cache (failed
450                    // or errored). Re-contend for ownership.
451                    continue;
452                }
453                Slot::Owner(guard) => break guard,
454            }
455        };
456
457        let result = self.lookup_concurrent(&normalized, progress).await?;
458
459        // Cache a trimmed copy to limit memory usage before releasing
460        // waiters (via guard drop) so they observe the cached value.
461        LOOKUP_CACHE.insert(normalized.clone(), trim_for_cache(result.clone()));
462
463        Ok(result)
464    }
465
466    /// Clears the lookup result cache.
467    pub fn clear_cache() {
468        LOOKUP_CACHE.clear();
469    }
470
471    #[instrument(skip(self, progress), fields(domain = %domain))]
472    async fn lookup_concurrent(
473        &self,
474        domain: &str,
475        progress: Option<LookupProgressCallback>,
476    ) -> Result<LookupResult> {
477        #[cfg(test)]
478        LOOKUP_CONCURRENT_CALLS.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
479
480        debug!(domain = %domain, "Attempting RDAP and WHOIS concurrently");
481
482        if let Some(ref cb) = progress {
483            cb("Querying RDAP and WHOIS concurrently");
484        }
485
486        let rdap_fut = self.rdap_client.lookup_domain(domain);
487        let whois_fut = self.whois_client.lookup(domain);
488
489        tokio::pin!(rdap_fut);
490        tokio::pin!(whois_fut);
491
492        // Race: whichever finishes first gets a grace period for the other.
493        //
494        // We track whether each side completed naturally or was truncated by
495        // the grace period, so downstream error messages can distinguish a
496        // true timeout from a loser-truncation.
497        enum LegOutcome<T> {
498            Completed(T),
499            GraceTruncated,
500        }
501
502        let (rdap_leg, whois_leg) = tokio::select! {
503            rdap_res = &mut rdap_fut => {
504                // RDAP finished first — give WHOIS a grace period
505                let whois_leg = match tokio_timeout(PROTOCOL_GRACE_PERIOD, whois_fut).await {
506                    Ok(res) => LegOutcome::Completed(res),
507                    Err(_) => {
508                        debug!("WHOIS did not finish within grace period, proceeding with RDAP only");
509                        LegOutcome::GraceTruncated
510                    }
511                };
512                (LegOutcome::Completed(rdap_res), whois_leg)
513            }
514            whois_res = &mut whois_fut => {
515                // WHOIS finished first — give RDAP a grace period
516                let rdap_leg = match tokio_timeout(PROTOCOL_GRACE_PERIOD, rdap_fut).await {
517                    Ok(res) => LegOutcome::Completed(res),
518                    Err(_) => {
519                        debug!("RDAP did not finish within grace period, proceeding with WHOIS only");
520                        LegOutcome::GraceTruncated
521                    }
522                };
523                (rdap_leg, LegOutcome::Completed(whois_res))
524            }
525        };
526
527        // Classify the RDAP leg.
528        let rdap_outcome = match rdap_leg {
529            LegOutcome::Completed(Ok(data)) => {
530                if self.is_rdap_response_useful(&data) {
531                    RdapOutcome::Useful(data)
532                } else {
533                    RdapOutcome::NoData(data)
534                }
535            }
536            LegOutcome::Completed(Err(e)) => RdapOutcome::Error(e),
537            LegOutcome::GraceTruncated => RdapOutcome::GraceTimeout,
538        };
539
540        // Phase 1: If RDAP returned useful data, use it as primary.
541        if let RdapOutcome::Useful(rdap_data) = rdap_outcome {
542            debug!("RDAP lookup successful");
543            let whois_fallback = match whois_leg {
544                LegOutcome::Completed(Ok(w)) => Some(w),
545                _ => None,
546            };
547            return Ok(LookupResult::Rdap {
548                data: Box::new(rdap_data),
549                whois_fallback,
550            });
551        }
552
553        // RDAP was not useful (NoData, Error, or GraceTimeout). Prefer WHOIS
554        // if it returned any response, even a thin one — this is safer than
555        // falling back to the availability heuristic when we have actual
556        // registry data in hand.
557        let (rdap_error_str, rdap_fallback_data, rdap_seer_error) = match rdap_outcome {
558            RdapOutcome::Useful(_) => {
559                // Unreachable in this branch (we returned above), but handle
560                // defensively rather than panicking across the FFI boundary.
561                debug!("Unexpected RdapOutcome::Useful in fallback branch");
562                (String::from("RDAP ok"), None, None)
563            }
564            RdapOutcome::NoData(data) => (
565                "RDAP response incomplete".to_string(),
566                Some(Box::new(data)),
567                None,
568            ),
569            RdapOutcome::Error(e) => (e.to_string(), None, Some(e)),
570            RdapOutcome::GraceTimeout => (
571                format!(
572                    "RDAP did not return within {}s grace period after WHOIS won",
573                    PROTOCOL_GRACE_PERIOD.as_secs()
574                ),
575                None,
576                None,
577            ),
578        };
579
580        if let LegOutcome::Completed(Ok(whois_data)) = whois_leg {
581            // Check Cases A and B: should we reclassify as Available?
582            let availability_match = rdap_seer_error
583                .as_ref()
584                .and_then(|e| classify_whois_leg(&whois_data, e))
585                .or_else(|| {
586                    // Case A can still fire even when RDAP errored for a
587                    // non-404 reason — the WHOIS signal alone is sufficient.
588                    if whois_data.is_available() {
589                        Some(("high", "whois"))
590                    } else {
591                        None
592                    }
593                });
594
595            if let Some((confidence, method)) = availability_match {
596                debug!(
597                    domain = %domain,
598                    confidence = %confidence,
599                    "Reclassifying WHOIS as availability signal"
600                );
601                if let Some(ref cb) = progress {
602                    cb("Domain appears unregistered");
603                }
604                let details = match confidence {
605                    "high" => Some("WHOIS indicates domain is not registered".to_string()),
606                    "medium" => Some(
607                        "WHOIS returned no registrar or registration dates; RDAP returned 404"
608                            .to_string(),
609                    ),
610                    _ => None,
611                };
612                let avail = AvailabilityResult {
613                    domain: domain.to_string(),
614                    available: true,
615                    confidence: confidence.to_string(),
616                    method: method.to_string(),
617                    details,
618                };
619                return Ok(LookupResult::Available {
620                    data: Box::new(avail),
621                    rdap_error: sanitize_error_for_public(&rdap_error_str),
622                    whois_error: String::new(),
623                    whois_data: Some(whois_data),
624                });
625            }
626
627            debug!("Using WHOIS result (RDAP not useful)");
628            if let Some(ref cb) = progress {
629                cb("RDAP not available (using WHOIS)");
630            }
631            return Ok(LookupResult::Whois {
632                data: whois_data,
633                rdap_error: Some(rdap_error_str),
634                rdap_fallback: rdap_fallback_data,
635            });
636        }
637
638        // Both sides failed to provide useful data. Craft a precise WHOIS
639        // error string that distinguishes true errors from grace-period
640        // truncation.
641        let whois_error_str = match whois_leg {
642            LegOutcome::Completed(Err(e)) => e.to_string(),
643            LegOutcome::Completed(Ok(_)) => {
644                // Already handled above; treat defensively.
645                debug!("Unexpected completed-Ok WHOIS in availability fallback branch");
646                "WHOIS returned but was not used".to_string()
647            }
648            LegOutcome::GraceTruncated => format!(
649                "WHOIS did not return within {}s grace period after RDAP won",
650                PROTOCOL_GRACE_PERIOD.as_secs()
651            ),
652        };
653
654        self.availability_fallback(domain, rdap_error_str, whois_error_str, progress)
655            .await
656    }
657
658    async fn availability_fallback(
659        &self,
660        domain: &str,
661        rdap_error: String,
662        whois_error: String,
663        progress: Option<LookupProgressCallback>,
664    ) -> Result<LookupResult> {
665        if let Some(ref cb) = progress {
666            cb("RDAP and WHOIS unavailable (checking availability)");
667        }
668        warn!(
669            domain = %domain,
670            rdap_error = %rdap_error,
671            whois_error = %whois_error,
672            "Both RDAP and WHOIS failed, falling back to availability check"
673        );
674
675        match self.availability_checker.check(domain).await {
676            Ok(avail) => Ok(LookupResult::Available {
677                data: Box::new(avail),
678                rdap_error: sanitize_error_for_public(&rdap_error),
679                whois_error: sanitize_error_for_public(&whois_error),
680                whois_data: None,
681            }),
682            Err(avail_err) => {
683                let tld = get_tld(domain).unwrap_or("unknown");
684                let registry_url = get_registry_url(tld).unwrap_or_else(|| {
685                    format!("https://www.iana.org/domains/root/db/{}.html", tld)
686                });
687                Err(SeerError::LookupFailed {
688                    domain: domain.to_string(),
689                    details: format!(
690                        "RDAP failed ({}), WHOIS failed ({}), availability check failed ({})",
691                        rdap_error, whois_error, avail_err
692                    ),
693                    registry_url,
694                })
695            }
696        }
697    }
698
699    fn is_rdap_response_useful(&self, response: &RdapResponse) -> bool {
700        // Check if we have at least some meaningful data
701        let has_name = response.ldh_name.is_some() || response.unicode_name.is_some();
702        let has_dates = response
703            .events
704            .iter()
705            .any(|e| e.event_action == "registration" || e.event_action == "expiration");
706        let has_entities = !response.entities.is_empty();
707        let has_nameservers = !response.nameservers.is_empty();
708        let has_status = !response.status.is_empty();
709
710        // Consider useful if we have the name plus at least one other piece of info
711        has_name && (has_dates || has_entities || has_nameservers || has_status)
712    }
713}
714
715#[cfg(test)]
716mod tests {
717    use super::*;
718
719    #[test]
720    fn test_lookup_result_domain_name_whois() {
721        let result = LookupResult::Whois {
722            data: WhoisResponse {
723                domain: "example.com".to_string(),
724                registrar: Some("Test Registrar".to_string()),
725                registrant: None,
726                organization: None,
727                registrant_email: None,
728                registrant_phone: None,
729                registrant_address: None,
730                registrant_country: None,
731                admin_name: None,
732                admin_organization: None,
733                admin_email: None,
734                admin_phone: None,
735                tech_name: None,
736                tech_organization: None,
737                tech_email: None,
738                tech_phone: None,
739                creation_date: None,
740                expiration_date: None,
741                updated_date: None,
742                status: vec![],
743                nameservers: vec![],
744                dnssec: None,
745                whois_server: "whois.example.com".to_string(),
746                raw_response: String::new(),
747            },
748            rdap_error: None,
749            rdap_fallback: None,
750        };
751
752        assert_eq!(result.domain_name(), Some("example.com".to_string()));
753        assert_eq!(result.registrar(), Some("Test Registrar".to_string()));
754        assert!(result.is_whois());
755        assert!(!result.is_rdap());
756        assert!(!result.is_available());
757    }
758
759    #[test]
760    fn test_lookup_result_serialization() {
761        let result = LookupResult::Whois {
762            data: WhoisResponse {
763                domain: "test.com".to_string(),
764                registrar: None,
765                registrant: None,
766                organization: None,
767                registrant_email: None,
768                registrant_phone: None,
769                registrant_address: None,
770                registrant_country: None,
771                admin_name: None,
772                admin_organization: None,
773                admin_email: None,
774                admin_phone: None,
775                tech_name: None,
776                tech_organization: None,
777                tech_email: None,
778                tech_phone: None,
779                creation_date: None,
780                expiration_date: None,
781                updated_date: None,
782                status: vec![],
783                nameservers: vec![],
784                dnssec: None,
785                whois_server: String::new(),
786                raw_response: String::new(),
787            },
788            rdap_error: Some("RDAP failed".to_string()),
789            rdap_fallback: None,
790        };
791
792        let json = serde_json::to_string(&result).unwrap();
793        assert!(json.contains("\"source\":\"whois\""));
794        assert!(json.contains("RDAP failed"));
795    }
796
797    #[test]
798    fn test_lookup_result_available_serialization() {
799        let result = LookupResult::Available {
800            data: Box::new(AvailabilityResult {
801                domain: "test123.xyz".to_string(),
802                available: true,
803                confidence: "medium".to_string(),
804                method: "whois_error".to_string(),
805                details: Some("WHOIS server indicates no matching records".to_string()),
806            }),
807            rdap_error: "RDAP failed".to_string(),
808            whois_error: "WHOIS failed".to_string(),
809            whois_data: None,
810        };
811
812        let json = serde_json::to_string(&result).unwrap();
813        assert!(json.contains("\"source\":\"available\""));
814        assert!(json.contains("\"available\":true"));
815        assert!(json.contains("test123.xyz"));
816
817        assert_eq!(result.domain_name(), Some("test123.xyz".to_string()));
818        assert!(result.is_available());
819        assert!(!result.is_rdap());
820        assert!(!result.is_whois());
821        assert!(result.registrar().is_none());
822        assert_eq!(result.expiration_info(), (None, None));
823    }
824
825    #[test]
826    #[allow(deprecated)]
827    fn test_smart_lookup_builder() {
828        let lookup = SmartLookup::new().prefer_rdap(false).include_fallback(true);
829        assert!(!lookup.prefer_rdap);
830        assert!(lookup.include_fallback);
831    }
832
833    #[test]
834    fn test_lookup_cache_clear() {
835        SmartLookup::clear_cache();
836        assert!(LOOKUP_CACHE.is_empty());
837    }
838
839    // ---------------- sanitize_error_for_public ----------------
840
841    #[test]
842    fn test_sanitize_strips_ipv4() {
843        let msg = "RDAP URL resolves to reserved IP 10.0.0.1 which is forbidden";
844        let sanitized = sanitize_error_for_public(msg);
845        assert!(
846            !sanitized.contains("10.0.0.1"),
847            "IPv4 should be stripped, got: {}",
848            sanitized
849        );
850        assert!(sanitized.contains("[ip-redacted]"));
851    }
852
853    #[test]
854    fn test_sanitize_strips_multiple_ipv4() {
855        let msg = "Could not connect to 192.168.1.1 after trying 127.0.0.1";
856        let sanitized = sanitize_error_for_public(msg);
857        assert!(!sanitized.contains("192.168.1.1"));
858        assert!(!sanitized.contains("127.0.0.1"));
859        // Two redactions expected.
860        assert_eq!(sanitized.matches("[ip-redacted]").count(), 2);
861    }
862
863    #[test]
864    fn test_sanitize_strips_ipv6() {
865        let msg = "RDAP URL resolves to reserved IP fe80::1 which is forbidden";
866        let sanitized = sanitize_error_for_public(msg);
867        assert!(!sanitized.contains("fe80::1"));
868        assert!(sanitized.contains("[ip-redacted]"));
869    }
870
871    #[test]
872    fn sanitize_leaves_mac_address_like_tokens_alone() {
873        let msg = "error code af:ba:12 at line 5";
874        let out = sanitize_error_for_public(msg);
875        assert!(
876            out.contains("af:ba:12"),
877            "MAC fragment should not be stripped: {}",
878            out
879        );
880    }
881
882    #[test]
883    fn sanitize_strips_real_ipv6() {
884        let msg = "cannot reach 2001:db8::1 — timeout";
885        let out = sanitize_error_for_public(msg);
886        assert!(!out.contains("2001:db8::1"));
887        assert!(out.contains("[ip-redacted]"));
888    }
889
890    #[test]
891    fn sanitize_strips_fe80_link_local() {
892        let msg = "peer at fe80::1 unreachable";
893        let out = sanitize_error_for_public(msg);
894        assert!(out.contains("[ip-redacted]"));
895    }
896
897    #[test]
898    fn test_sanitize_truncates_long_message() {
899        // Build a 500-char message with no IPs.
900        let long = "a".repeat(500);
901        let sanitized = sanitize_error_for_public(&long);
902        // Should cap at MAX_PUBLIC_ERROR_LEN chars + ellipsis.
903        let char_count = sanitized.chars().count();
904        assert_eq!(char_count, MAX_PUBLIC_ERROR_LEN + 1);
905        assert!(sanitized.ends_with('…'));
906    }
907
908    #[test]
909    fn test_sanitize_preserves_short_messages() {
910        let msg = "RDAP timed out after 15s";
911        let sanitized = sanitize_error_for_public(msg);
912        assert_eq!(sanitized, msg);
913    }
914
915    // ---------------- RdapOutcome classification ----------------
916
917    #[test]
918    fn test_is_rdap_response_useful_detects_no_data() {
919        use crate::rdap::RdapResponse;
920        // Construct a response with a name but no events, entities, NS, or status
921        // — this is the "200 OK but no useful fields" case that should be
922        // classified as RdapOutcome::NoData (not Useful, not Error).
923        let resp = RdapResponse {
924            ldh_name: Some("example.com".to_string()),
925            ..Default::default()
926        };
927        let lookup = SmartLookup::new();
928        assert!(
929            !lookup.is_rdap_response_useful(&resp),
930            "Response with only a name should be classified as NoData"
931        );
932
933        // And one with a name + status IS useful (sanity check).
934        let useful = RdapResponse {
935            ldh_name: Some("example.com".to_string()),
936            status: vec!["active".to_string()],
937            ..Default::default()
938        };
939        assert!(lookup.is_rdap_response_useful(&useful));
940    }
941
942    // ---------------- Coalescing ----------------
943
944    // Verifies that when multiple concurrent lookups hit the in-flight map
945    // for the same domain, later arrivals observe the existing Weak<Notify>
946    // and become waiters rather than racing a second lookup. We test the
947    // map-level primitive here because the full SmartLookup pipeline
948    // requires network access to exercise.
949    #[tokio::test]
950    async fn test_inflight_coalescing_map() {
951        // Clear any prior state.
952        {
953            let mut m = LOOKUP_INFLIGHT.lock().unwrap();
954            m.clear();
955        }
956
957        let domain = "__test_coalesce.example.".to_string();
958
959        // First caller: no entry → becomes owner.
960        let owner_notify = Arc::new(Notify::new());
961        {
962            let mut m = LOOKUP_INFLIGHT.lock().unwrap();
963            assert!(m.get(&domain).and_then(|w| w.upgrade()).is_none());
964            m.insert(domain.clone(), Arc::downgrade(&owner_notify));
965        }
966
967        // Second caller: sees the existing Weak and upgrades.
968        let waiter = {
969            let m = LOOKUP_INFLIGHT.lock().unwrap();
970            m.get(&domain)
971                .and_then(|w| w.upgrade())
972                .expect("Second caller must observe in-flight entry")
973        };
974
975        // Waiter listens in the background.
976        let waiter_clone = waiter.clone();
977        let handle = tokio::spawn(async move {
978            waiter_clone.notified().await;
979        });
980
981        // Simulate owner completing.
982        tokio::time::sleep(Duration::from_millis(20)).await;
983        {
984            let mut m = LOOKUP_INFLIGHT.lock().unwrap();
985            m.remove(&domain);
986        }
987        owner_notify.notify_waiters();
988
989        // Waiter should unblock quickly.
990        tokio::time::timeout(Duration::from_secs(1), handle)
991            .await
992            .expect("waiter must unblock after notify")
993            .expect("waiter task joined cleanly");
994
995        // After owner removes entry and drops its Arc, the Weak is dead.
996        drop(owner_notify);
997        drop(waiter);
998        let m = LOOKUP_INFLIGHT.lock().unwrap();
999        assert!(m.get(&domain).and_then(|w| w.upgrade()).is_none());
1000    }
1001
1002    // Demonstrates that the `sanitize_error_for_public` helper is applied
1003    // to the rdap_error / whois_error fields written into the `Available`
1004    // variant. We check the call site indirectly: construct a Available
1005    // manually and then verify a raw error with an IP becomes redacted.
1006    // (Integration via real clients would require network.)
1007    #[test]
1008    fn test_sanitize_applied_to_available_fields() {
1009        let rdap_raw = "RDAP URL resolves to reserved IP 10.0.0.1";
1010        let whois_raw = "connection refused at 192.168.0.5";
1011        let sanitized_rdap = sanitize_error_for_public(rdap_raw);
1012        let sanitized_whois = sanitize_error_for_public(whois_raw);
1013        let result = LookupResult::Available {
1014            data: Box::new(AvailabilityResult {
1015                domain: "unreg.test".to_string(),
1016                available: true,
1017                confidence: "low".to_string(),
1018                method: "heuristic".to_string(),
1019                details: None,
1020            }),
1021            rdap_error: sanitized_rdap,
1022            whois_error: sanitized_whois,
1023            whois_data: None,
1024        };
1025        if let LookupResult::Available {
1026            rdap_error,
1027            whois_error,
1028            ..
1029        } = result
1030        {
1031            assert!(!rdap_error.contains("10.0.0.1"));
1032            assert!(!whois_error.contains("192.168.0.5"));
1033            assert!(rdap_error.contains("[ip-redacted]"));
1034            assert!(whois_error.contains("[ip-redacted]"));
1035        } else {
1036            panic!("expected Available variant");
1037        }
1038    }
1039
1040    #[test]
1041    fn rdap_error_is_404_matches_standard_404() {
1042        let e = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1043        assert!(rdap_error_is_404(&e));
1044    }
1045
1046    #[test]
1047    fn rdap_error_is_404_matches_without_reason_phrase() {
1048        let e = SeerError::RdapError("query failed with status 404".to_string());
1049        assert!(rdap_error_is_404(&e));
1050    }
1051
1052    #[test]
1053    fn rdap_error_is_404_rejects_other_statuses() {
1054        let e = SeerError::RdapError("query failed with status 500 Server Error".to_string());
1055        assert!(!rdap_error_is_404(&e));
1056        let e = SeerError::RdapError("query failed with status 400 Bad Request".to_string());
1057        assert!(!rdap_error_is_404(&e));
1058    }
1059
1060    #[test]
1061    fn rdap_error_is_404_rejects_non_http_errors() {
1062        let e = SeerError::RdapError("connection timeout".to_string());
1063        assert!(!rdap_error_is_404(&e));
1064        let e = SeerError::Timeout("rdap".to_string());
1065        assert!(!rdap_error_is_404(&e));
1066    }
1067
1068    #[test]
1069    fn rdap_error_is_404_rejects_incidental_404_in_message() {
1070        // A 404 substring inside a non-status context must not match.
1071        let e = SeerError::RdapError("error 40404: database corruption".to_string());
1072        assert!(!rdap_error_is_404(&e));
1073    }
1074
1075    // ---------------- whois_response_is_thin ----------------
1076
1077    fn empty_whois(domain: &str) -> WhoisResponse {
1078        WhoisResponse {
1079            domain: domain.to_string(),
1080            registrar: None,
1081            registrant: None,
1082            organization: None,
1083            registrant_email: None,
1084            registrant_phone: None,
1085            registrant_address: None,
1086            registrant_country: None,
1087            admin_name: None,
1088            admin_organization: None,
1089            admin_email: None,
1090            admin_phone: None,
1091            tech_name: None,
1092            tech_organization: None,
1093            tech_email: None,
1094            tech_phone: None,
1095            creation_date: None,
1096            expiration_date: None,
1097            updated_date: None,
1098            nameservers: vec![],
1099            status: vec![],
1100            dnssec: None,
1101            whois_server: String::new(),
1102            raw_response: String::new(),
1103        }
1104    }
1105
1106    #[test]
1107    fn whois_response_is_thin_when_all_key_fields_missing() {
1108        let w = empty_whois("example.com");
1109        assert!(whois_response_is_thin(&w));
1110    }
1111
1112    #[test]
1113    fn whois_response_is_not_thin_when_registrar_present() {
1114        let mut w = empty_whois("example.com");
1115        w.registrar = Some("Test Registrar".to_string());
1116        assert!(!whois_response_is_thin(&w));
1117    }
1118
1119    #[test]
1120    fn whois_response_is_not_thin_when_creation_date_present() {
1121        let mut w = empty_whois("example.com");
1122        w.creation_date = Some(chrono::Utc::now());
1123        assert!(!whois_response_is_thin(&w));
1124    }
1125
1126    #[test]
1127    fn whois_response_is_not_thin_when_expiration_date_present() {
1128        let mut w = empty_whois("example.com");
1129        w.expiration_date = Some(chrono::Utc::now());
1130        assert!(!whois_response_is_thin(&w));
1131    }
1132
1133    #[test]
1134    fn whois_response_is_thin_even_with_nameservers_alone() {
1135        let mut w = empty_whois("example.com");
1136        w.nameservers = vec!["ns1.example.net".to_string()];
1137        assert!(whois_response_is_thin(&w));
1138    }
1139
1140    // ---------------- classify_whois_leg ----------------
1141
1142    use crate::rdap::RdapResponse;
1143
1144    #[allow(dead_code)]
1145    fn make_empty_rdap_response() -> RdapResponse {
1146        serde_json::from_value(serde_json::json!({
1147            "objectClassName": "domain",
1148        }))
1149        .expect("valid minimal RDAP response")
1150    }
1151
1152    #[test]
1153    fn classify_whois_leg_case_a_high_confidence() {
1154        let mut w = empty_whois("zaccodes.com");
1155        w.raw_response = "No match for \"ZACCODES.COM\".".to_string();
1156        assert!(w.is_available());
1157        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1158        let (verdict, method) =
1159            classify_whois_leg(&w, &rdap_err).expect("expected a routing decision");
1160        assert_eq!(verdict, "high");
1161        assert_eq!(method, "whois");
1162    }
1163
1164    #[test]
1165    fn classify_whois_leg_case_b_medium_confidence() {
1166        let w = empty_whois("example.xyz");
1167        assert!(!w.is_available(), "this WHOIS body has no 'no match' text");
1168        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1169        let (verdict, method) =
1170            classify_whois_leg(&w, &rdap_err).expect("expected a routing decision");
1171        assert_eq!(verdict, "medium");
1172        assert_eq!(method, "whois_thin_response");
1173    }
1174
1175    #[test]
1176    fn classify_whois_leg_rejects_thin_whois_without_404() {
1177        let w = empty_whois("example.xyz");
1178        let rdap_err = SeerError::RdapError("connection timeout".to_string());
1179        assert!(classify_whois_leg(&w, &rdap_err).is_none());
1180    }
1181
1182    #[test]
1183    fn classify_whois_leg_rejects_whois_with_real_data() {
1184        let mut w = empty_whois("legacy.tld");
1185        w.registrar = Some("Legacy Registry".to_string());
1186        w.creation_date = Some(chrono::Utc::now());
1187        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1188        assert!(classify_whois_leg(&w, &rdap_err).is_none());
1189    }
1190
1191    #[test]
1192    fn classify_whois_leg_case_a_wins_over_case_b() {
1193        let mut w = empty_whois("example.com");
1194        w.raw_response = "No match for \"EXAMPLE.COM\".".to_string();
1195        let rdap_err = SeerError::RdapError("query failed with status 404 Not Found".to_string());
1196        let (verdict, _) = classify_whois_leg(&w, &rdap_err).unwrap();
1197        assert_eq!(verdict, "high");
1198    }
1199}