Skip to main content

seer_core/rdap/
client.rs

1use std::collections::HashMap;
2use std::net::{IpAddr, SocketAddr};
3use std::sync::Arc;
4use std::time::{Duration, Instant};
5
6use futures::StreamExt;
7use once_cell::sync::Lazy;
8use reqwest::Client;
9use serde::Deserialize;
10use tokio::sync::{Notify, RwLock};
11use tracing::{debug, info, instrument};
12
13use super::bootstrap::{
14    ipv4_matches_prefix, ipv6_matches_prefix, parse_asn_range, validate_bootstrap_url,
15};
16use super::types::RdapResponse;
17use crate::error::{Result, SeerError};
18use crate::retry::{RetryExecutor, RetryPolicy};
19use crate::validation::{describe_reserved_ip, normalize_domain};
20
21const IANA_BOOTSTRAP_DNS: &str = "https://data.iana.org/rdap/dns.json";
22const IANA_BOOTSTRAP_IPV4: &str = "https://data.iana.org/rdap/ipv4.json";
23const IANA_BOOTSTRAP_IPV6: &str = "https://data.iana.org/rdap/ipv6.json";
24const IANA_BOOTSTRAP_ASN: &str = "https://data.iana.org/rdap/asn.json";
25
26/// Default timeout for RDAP queries (15 seconds).
27/// With the 5s connect_timeout, this gives 10s for the server to respond.
28/// Most RDAP servers respond within 2-5 seconds; slow ccTLD registries
29/// may need the full 15s.
30const DEFAULT_TIMEOUT: Duration = Duration::from_secs(15);
31
32/// Connect timeout — fail fast when a host is unreachable rather than
33/// waiting the full request timeout on a TCP handshake that will never complete.
34const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
35
36/// TTL for bootstrap data (24 hours)
37const BOOTSTRAP_TTL: Duration = Duration::from_secs(24 * 60 * 60);
38
39/// Minimum interval between bootstrap refresh attempts when the cache is
40/// expired-but-present or empty. Prevents a thundering herd of concurrent
41/// callers from all hammering IANA simultaneously during an outage.
42const BOOTSTRAP_REFRESH_MIN_INTERVAL: Duration = Duration::from_secs(60);
43
44/// Shared HTTP client for bootstrap fetches against IANA.
45/// The bootstrap targets are hardcoded data.iana.org URLs, so this client
46/// does not need DNS-rebinding protection. Per-query RDAP requests build
47/// their own short-lived client that pins resolved IPs.
48///
49/// Wrapped in `Option` so a reqwest builder failure surfaces as a typed
50/// `SeerError::HttpError` via `rdap_http_client()` instead of a process
51/// panic at first use (library code must not `.expect()` on shared state).
52static RDAP_HTTP_CLIENT: Lazy<Option<Client>> = Lazy::new(|| {
53    Client::builder()
54        .timeout(DEFAULT_TIMEOUT)
55        .connect_timeout(CONNECT_TIMEOUT)
56        .user_agent("Seer/1.0 (RDAP Client)")
57        .pool_max_idle_per_host(10)
58        .build()
59        .ok()
60});
61
62/// Returns a reference to the shared RDAP bootstrap HTTP client, or a typed
63/// error if the builder failed at initialization time. Call sites use
64/// `rdap_http_client()?` instead of dereferencing the static directly.
65fn rdap_http_client() -> Result<&'static Client> {
66    RDAP_HTTP_CLIENT
67        .as_ref()
68        .ok_or_else(|| SeerError::HttpError("failed to initialize HTTP client".into()))
69}
70
71/// Bootstrap cache with TTL support
72static BOOTSTRAP_CACHE: Lazy<RwLock<Option<CachedBootstrap>>> = Lazy::new(|| RwLock::new(None));
73
74/// Timestamp of the most recent bootstrap refresh attempt (success or failure).
75/// Used together with `BOOTSTRAP_REFRESH_MIN_INTERVAL` to throttle retry
76/// storms when IANA is unreachable.
77static BOOTSTRAP_LAST_ATTEMPT: Lazy<RwLock<Option<Instant>>> = Lazy::new(|| RwLock::new(None));
78
79/// Notifies waiters when an in-flight bootstrap load completes (success or
80/// failure). Solves the first-boot thundering-herd race where two concurrent
81/// cold-cache callers would otherwise see: caller A records its attempt
82/// timestamp, then caller B checks the timestamp and finds it "too recent"
83/// and returns a spurious `throttled and no cache available` error while A
84/// is still actively loading. Losers instead wait on this notify with a
85/// bounded timeout, then re-check the cache.
86static BOOTSTRAP_LOAD_NOTIFY: Lazy<Notify> = Lazy::new(Notify::new);
87
88/// Cached bootstrap data with timestamp for TTL tracking
89struct CachedBootstrap {
90    data: BootstrapData,
91    loaded_at: Instant,
92}
93
94impl CachedBootstrap {
95    fn new(data: BootstrapData) -> Self {
96        Self {
97            data,
98            loaded_at: Instant::now(),
99        }
100    }
101
102    fn is_expired(&self) -> bool {
103        self.loaded_at.elapsed() > BOOTSTRAP_TTL
104    }
105
106    fn age(&self) -> Duration {
107        self.loaded_at.elapsed()
108    }
109}
110
111/// Parsed IANA bootstrap data.
112/// Each TLD/prefix/ASN range is associated with an ordered list of
113/// candidate RDAP base URLs (IANA may list multiple per RFC 9224). Callers
114/// try them in order and fall back on failure.
115struct BootstrapData {
116    dns: HashMap<String, Arc<Vec<url::Url>>>,
117    ipv4: Vec<(IpRange, Arc<Vec<url::Url>>)>,
118    ipv6: Vec<(IpRange, Arc<Vec<url::Url>>)>,
119    asn: Vec<(AsnRange, Arc<Vec<url::Url>>)>,
120}
121
122#[derive(Clone)]
123struct IpRange {
124    prefix: String,
125}
126
127#[derive(Clone)]
128struct AsnRange {
129    start: u32,
130    end: u32,
131}
132
133#[derive(Deserialize)]
134struct BootstrapResponse {
135    services: Vec<Vec<serde_json::Value>>,
136}
137
138/// Waits (bounded) for an in-flight bootstrap load to complete, then
139/// re-checks the cache. Used by losers of the throttle race so a concurrent
140/// cold-cache caller doesn't spuriously error with "throttled and no cache
141/// available" while the winner is still loading.
142///
143/// The `notified` future must be created BEFORE the caller observes the
144/// throttle condition — otherwise `notify_waiters()` could fire in the gap
145/// between observing "still throttled, empty cache" and subscribing, and
146/// this call would then block until timeout.
147async fn wait_for_in_flight_load(
148    notified: std::pin::Pin<&mut tokio::sync::futures::Notified<'_>>,
149) -> Result<()> {
150    // Bounded wait so we don't block forever if the winner's future was
151    // cancelled/dropped before it could notify.
152    let _ = tokio::time::timeout(DEFAULT_TIMEOUT, notified).await;
153    let cache = BOOTSTRAP_CACHE.read().await;
154    if cache.is_some() {
155        Ok(())
156    } else {
157        Err(SeerError::RdapBootstrapError(
158            "bootstrap refresh throttled and no cache available".to_string(),
159        ))
160    }
161}
162
163#[derive(Debug, Clone)]
164pub struct RdapClient {
165    retry_policy: RetryPolicy,
166}
167
168impl Default for RdapClient {
169    fn default() -> Self {
170        Self::new()
171    }
172}
173
174impl RdapClient {
175    /// Creates a new RDAP client with default settings.
176    pub fn new() -> Self {
177        Self {
178            retry_policy: RetryPolicy::default().with_max_attempts(2),
179        }
180    }
181
182    /// Sets the retry policy for transient network failures.
183    ///
184    /// The default policy retries up to 2 times with exponential backoff.
185    pub fn with_retry_policy(mut self, policy: RetryPolicy) -> Self {
186        self.retry_policy = policy;
187        self
188    }
189
190    /// Disables retries (single attempt only).
191    pub fn without_retries(mut self) -> Self {
192        self.retry_policy = RetryPolicy::no_retry();
193        self
194    }
195
196    /// Ensures bootstrap data is loaded and not expired.
197    ///
198    /// Uses stale-while-revalidate: if refresh fails, stale data is used.
199    /// Performs the actual network load WITHOUT holding the write lock, so
200    /// concurrent readers are never blocked by an in-flight HTTP request
201    /// (fix for the previous deadlock/await-under-lock hazard).
202    ///
203    /// Refresh attempts are also throttled to at most one per
204    /// `BOOTSTRAP_REFRESH_MIN_INTERVAL` to avoid thundering-herd storms
205    /// against IANA when bootstrap is down.
206    ///
207    /// Concurrent cold-cache callers coordinate via `BOOTSTRAP_LOAD_NOTIFY`:
208    /// losers of the throttle race wait (with a bounded timeout) for the
209    /// winner's load instead of erroring out immediately.
210    async fn ensure_bootstrap(&self) -> Result<()> {
211        // Fast path: read-lock and return if fresh.
212        {
213            let cache = BOOTSTRAP_CACHE.read().await;
214            if let Some(cached) = cache.as_ref() {
215                if !cached.is_expired() {
216                    return Ok(());
217                }
218            }
219        }
220
221        // Register a notify subscription BEFORE we check the throttle gate,
222        // so a `notify_waiters()` from the winner can't slip between our
223        // "still throttled, empty cache" check and our `.notified().await`.
224        // `Notify::notified()` holds the permit slot the moment it's
225        // constructed; only `.await` blocks.
226        let notified = BOOTSTRAP_LOAD_NOTIFY.notified();
227        tokio::pin!(notified);
228
229        // Throttle refresh attempts. If another caller tried very recently,
230        // either return stale data we already have, or wait for their load
231        // to complete rather than erroring with "throttled and no cache".
232        {
233            let last = BOOTSTRAP_LAST_ATTEMPT.read().await;
234            if let Some(ts) = *last {
235                if ts.elapsed() < BOOTSTRAP_REFRESH_MIN_INTERVAL {
236                    // Another caller attempted a refresh very recently.
237                    let cache = BOOTSTRAP_CACHE.read().await;
238                    if cache.is_some() {
239                        // We have some data (possibly stale) — accept it.
240                        return Ok(());
241                    }
242                    // Cache is empty AND another task is mid-load (or just
243                    // failed). Wait for them instead of returning an error.
244                    drop(cache);
245                    drop(last);
246                    return wait_for_in_flight_load(notified).await;
247                }
248            }
249        }
250
251        // Record the attempt timestamp before we begin the network load.
252        // Holding this lock is cheap (no await in between read+write here).
253        {
254            let mut last = BOOTSTRAP_LAST_ATTEMPT.write().await;
255            // Double-check in case another task just updated it.
256            if let Some(ts) = *last {
257                if ts.elapsed() < BOOTSTRAP_REFRESH_MIN_INTERVAL {
258                    drop(last);
259                    let cache = BOOTSTRAP_CACHE.read().await;
260                    if cache.is_some() {
261                        return Ok(());
262                    }
263                    drop(cache);
264                    return wait_for_in_flight_load(notified).await;
265                }
266            }
267            *last = Some(Instant::now());
268        }
269
270        // Perform the actual load WITHOUT holding any cache lock. Whichever
271        // branch exits, we must notify waiters so losers don't hang for the
272        // full bounded timeout.
273        debug!("Loading/refreshing RDAP bootstrap data");
274        let load_result = load_bootstrap_data_with_retry(&self.retry_policy).await;
275
276        let outcome = match load_result {
277            Ok(data) => {
278                let mut cache = BOOTSTRAP_CACHE.write().await;
279                // Double-check: another task may have loaded while we ran.
280                // Only overwrite if the current cache is missing or expired.
281                let should_store = cache.as_ref().map(|c| c.is_expired()).unwrap_or(true);
282                if should_store {
283                    *cache = Some(CachedBootstrap::new(data));
284                }
285                Ok(())
286            }
287            Err(e) => {
288                // Stale-while-revalidate: keep using any existing stale cache.
289                let cache = BOOTSTRAP_CACHE.read().await;
290                if let Some(cached) = cache.as_ref() {
291                    debug!(
292                        error = %e,
293                        age_hours = cached.age().as_secs() / 3600,
294                        "Bootstrap refresh failed, using stale data"
295                    );
296                    Ok(())
297                } else {
298                    // No stale data available.
299                    Err(e)
300                }
301            }
302        };
303
304        // Wake any losers waiting on our load. Safe to call in both branches.
305        BOOTSTRAP_LOAD_NOTIFY.notify_waiters();
306        outcome
307    }
308
309    /// Looks up the candidate RDAP base URLs for a domain's TLD.
310    fn get_rdap_urls_for_domain(cache: &BootstrapData, domain: &str) -> Option<Arc<Vec<url::Url>>> {
311        let tld = domain.rsplit('.').next()?;
312        cache.dns.get(&tld.to_lowercase()).cloned()
313    }
314
315    /// Looks up the candidate RDAP base URLs for an IP address.
316    fn get_rdap_urls_for_ip(cache: &BootstrapData, ip: &IpAddr) -> Option<Arc<Vec<url::Url>>> {
317        match ip {
318            IpAddr::V4(addr) => {
319                for (range, urls) in &cache.ipv4 {
320                    if ipv4_matches_prefix(&range.prefix, addr) {
321                        return Some(Arc::clone(urls));
322                    }
323                }
324            }
325            IpAddr::V6(addr) => {
326                for (range, urls) in &cache.ipv6 {
327                    if ipv6_matches_prefix(&range.prefix, addr) {
328                        return Some(Arc::clone(urls));
329                    }
330                }
331            }
332        }
333
334        None
335    }
336
337    /// Looks up the candidate RDAP base URLs for an ASN.
338    fn get_rdap_urls_for_asn(cache: &BootstrapData, asn: u32) -> Option<Arc<Vec<url::Url>>> {
339        for (range, urls) in &cache.asn {
340            if asn >= range.start && asn <= range.end {
341                return Some(Arc::clone(urls));
342            }
343        }
344
345        None
346    }
347
348    /// Looks up RDAP registration data for a domain.
349    ///
350    /// Uses IANA bootstrap data to find the appropriate RDAP server for the TLD.
351    #[instrument(skip(self), fields(domain = %domain))]
352    pub async fn lookup_domain(&self, domain: &str) -> Result<RdapResponse> {
353        self.ensure_bootstrap().await?;
354
355        let domain = normalize_domain(domain)?;
356
357        // Extract candidate URLs while holding the lock, then release before HTTP requests.
358        let urls = {
359            let cache_guard = BOOTSTRAP_CACHE.read().await;
360            let cache = cache_guard.as_ref().ok_or_else(|| {
361                SeerError::RdapBootstrapError("bootstrap data not loaded".to_string())
362            })?;
363
364            let bases = Self::get_rdap_urls_for_domain(&cache.data, &domain).ok_or_else(|| {
365                SeerError::RdapBootstrapError(format!("no RDAP server for {}", domain))
366            })?;
367
368            build_rdap_urls(&bases, &format!("domain/{}", domain))
369        }; // Lock released here
370
371        self.query_rdap_urls(&urls).await
372    }
373
374    /// Looks up RDAP registration data for an IP address.
375    ///
376    /// Uses IANA bootstrap data to find the appropriate RIR (Regional Internet Registry).
377    #[instrument(skip(self), fields(ip = %ip))]
378    pub async fn lookup_ip(&self, ip: &str) -> Result<RdapResponse> {
379        self.ensure_bootstrap().await?;
380
381        let ip_addr: IpAddr = ip
382            .parse()
383            .map_err(|_| SeerError::InvalidIpAddress(ip.to_string()))?;
384
385        let urls = {
386            let cache_guard = BOOTSTRAP_CACHE.read().await;
387            let cache = cache_guard.as_ref().ok_or_else(|| {
388                SeerError::RdapBootstrapError("bootstrap data not loaded".to_string())
389            })?;
390
391            let bases = Self::get_rdap_urls_for_ip(&cache.data, &ip_addr).ok_or_else(|| {
392                SeerError::RdapBootstrapError(format!("no RDAP server for {}", ip))
393            })?;
394
395            build_rdap_urls(&bases, &format!("ip/{}", ip))
396        };
397
398        self.query_rdap_urls(&urls).await
399    }
400
401    /// Looks up RDAP registration data for an Autonomous System Number (ASN).
402    ///
403    /// Uses IANA bootstrap data to find the appropriate RIR for the ASN range.
404    #[instrument(skip(self), fields(asn = %asn))]
405    pub async fn lookup_asn(&self, asn: u32) -> Result<RdapResponse> {
406        self.ensure_bootstrap().await?;
407
408        let urls = {
409            let cache_guard = BOOTSTRAP_CACHE.read().await;
410            let cache = cache_guard.as_ref().ok_or_else(|| {
411                SeerError::RdapBootstrapError("bootstrap data not loaded".to_string())
412            })?;
413
414            let bases = Self::get_rdap_urls_for_asn(&cache.data, asn).ok_or_else(|| {
415                SeerError::RdapBootstrapError(format!("no RDAP server for AS{}", asn))
416            })?;
417
418            build_rdap_urls(&bases, &format!("autnum/{}", asn))
419        };
420
421        self.query_rdap_urls(&urls).await
422    }
423
424    /// Returns the RDAP base URL for a given TLD, if known from bootstrap data.
425    ///
426    /// Loads bootstrap data if not already cached. Returns `None` if the TLD
427    /// has no registered RDAP server in the IANA bootstrap registry. When
428    /// IANA lists multiple URLs for a TLD, the first one is returned.
429    #[instrument(skip(self), fields(tld = %tld))]
430    pub async fn get_rdap_base_url_for_tld(&self, tld: &str) -> Option<String> {
431        if self.ensure_bootstrap().await.is_err() {
432            return None;
433        }
434
435        let cache_guard = BOOTSTRAP_CACHE.read().await;
436        let cache = cache_guard.as_ref()?;
437        cache
438            .data
439            .dns
440            .get(&tld.to_lowercase())
441            .and_then(|urls| urls.first())
442            .map(|u| u.to_string())
443    }
444
445    /// Queries a list of candidate RDAP URLs in order, returning the first
446    /// successful response. Each URL is attempted with the full retry policy.
447    /// If all candidates fail, the last error is returned wrapped with context.
448    async fn query_rdap_urls(&self, urls: &[url::Url]) -> Result<RdapResponse> {
449        if urls.is_empty() {
450            return Err(SeerError::RdapError(
451                "no candidate RDAP URLs available".to_string(),
452            ));
453        }
454
455        let mut last_error: Option<SeerError> = None;
456        for (idx, url) in urls.iter().enumerate() {
457            let url_str = url.as_str().to_string();
458            debug!(url = %url_str, candidate = idx + 1, total = urls.len(), "Querying RDAP");
459            match self.query_rdap_with_retry(&url_str).await {
460                Ok(resp) => return Ok(resp),
461                Err(e) => {
462                    if urls.len() > 1 {
463                        debug!(
464                            url = %url_str,
465                            error = %e,
466                            candidate = idx + 1,
467                            total = urls.len(),
468                            "RDAP candidate failed, trying next",
469                        );
470                    }
471                    last_error = Some(e);
472                }
473            }
474        }
475
476        // All candidates failed.
477        Err(wrap_all_candidates_failed(last_error, urls.len()))
478    }
479
480    /// Queries a single RDAP endpoint with retry logic.
481    async fn query_rdap_with_retry(&self, url: &str) -> Result<RdapResponse> {
482        let executor = RetryExecutor::new(self.retry_policy.clone());
483        let url = url.to_string();
484
485        executor
486            .execute(|| {
487                let url = url.clone();
488                async move { query_rdap_internal(&url).await }
489            })
490            .await
491    }
492}
493
494/// Maximum RDAP response body size (10 MB, matching CT log response limit).
495const MAX_RDAP_RESPONSE_SIZE: usize = 10 * 1024 * 1024;
496
497/// Validates that a URL does not resolve to a reserved/private IP address (SSRF protection).
498///
499/// Returns the full list of resolved `SocketAddr`s so the caller can pin them on a
500/// per-request HTTP client via `resolve_to_addrs`. Pinning prevents a DNS rebinding
501/// TOCTOU where the hostname could resolve to a different (private) address between
502/// validation here and the actual HTTP connect.
503async fn validate_url_not_reserved(url: &str) -> Result<Vec<SocketAddr>> {
504    let parsed = url::Url::parse(url)
505        .map_err(|e| SeerError::RdapError(format!("invalid URL '{}': {}", url, e)))?;
506    let host = parsed
507        .host_str()
508        .ok_or_else(|| SeerError::RdapError(format!("URL '{}' has no host", url)))?;
509    let port = parsed.port_or_known_default().unwrap_or(443);
510
511    // If the host is already an IP literal, check it directly.
512    if let Ok(ip) = host.parse::<IpAddr>() {
513        if let Some(reason) = describe_reserved_ip(&ip) {
514            return Err(SeerError::RdapError(format!(
515                "RDAP URL resolves to reserved IP {}: {} — request blocked (SSRF protection)",
516                ip, reason
517            )));
518        }
519        return Ok(vec![SocketAddr::new(ip, port)]);
520    }
521
522    let addr = format!("{}:{}", host, port);
523
524    let socket_addrs: Vec<SocketAddr> = tokio::net::lookup_host(&addr)
525        .await
526        .map_err(|e| SeerError::RdapError(format!("failed to resolve host '{}': {}", host, e)))?
527        .collect();
528
529    if socket_addrs.is_empty() {
530        return Err(SeerError::RdapError(format!(
531            "host '{}' resolved to no addresses",
532            host
533        )));
534    }
535
536    for socket_addr in &socket_addrs {
537        if let Some(reason) = describe_reserved_ip(&socket_addr.ip()) {
538            return Err(SeerError::RdapError(format!(
539                "RDAP URL resolves to reserved IP {}: {} — request blocked (SSRF protection)",
540                socket_addr.ip(),
541                reason
542            )));
543        }
544    }
545
546    Ok(socket_addrs)
547}
548
549/// Validates a bootstrap-extracted URL before caching it.
550///
551/// Rejects non-https schemes, IP-literal hosts, missing hosts, and hosts
552/// containing whitespace or control characters. Returns the parsed URL on
553/// success so the caller can cache it in normalized form.
554/// Internal function to query an RDAP endpoint (used by retry executor).
555///
556/// Builds a per-request HTTP client that pins the validated resolved IPs to
557/// prevent DNS rebinding (TOCTOU between validation and connect).
558async fn query_rdap_internal(url: &str) -> Result<RdapResponse> {
559    // SSRF protection: validate the URL does not resolve to reserved IPs and
560    // capture the resolved SocketAddrs so we can pin them on the HTTP client.
561    let resolved = validate_url_not_reserved(url).await?;
562
563    let parsed = url::Url::parse(url)
564        .map_err(|e| SeerError::RdapError(format!("invalid URL '{}': {}", url, e)))?;
565    let host = parsed
566        .host_str()
567        .ok_or_else(|| SeerError::RdapError(format!("URL '{}' has no host", url)))?;
568
569    // Build a short-lived client pinning the validated IPs. If the host was
570    // an IP literal the resolved vec already holds it, so `resolve_to_addrs`
571    // is still correct.
572    let client = Client::builder()
573        .timeout(DEFAULT_TIMEOUT)
574        .connect_timeout(CONNECT_TIMEOUT)
575        .user_agent("Seer/1.0 (RDAP Client)")
576        .resolve_to_addrs(host, &resolved)
577        .build()
578        .map_err(|e| SeerError::RdapError(format!("failed to build HTTP client: {}", e)))?;
579
580    let response = client
581        .get(url)
582        .header("Accept", "application/rdap+json")
583        .send()
584        .await?;
585
586    if !response.status().is_success() {
587        return Err(SeerError::RdapError(format!(
588            "query failed with status {}",
589            response.status()
590        )));
591    }
592
593    // Stream body with incremental size check to prevent memory exhaustion.
594    // Wrap the chunk loop in a timeout so a server that opens the connection
595    // but trickles bytes forever is classified as a timeout (not a generic
596    // RdapError) and retries can be driven appropriately.
597    let mut body = Vec::new();
598    let mut stream = response.bytes_stream();
599    let streamed = tokio::time::timeout(DEFAULT_TIMEOUT, async {
600        while let Some(chunk) = stream.next().await {
601            let chunk = chunk
602                .map_err(|e| SeerError::RdapError(format!("failed to read response: {}", e)))?;
603            body.extend_from_slice(&chunk);
604            if body.len() > MAX_RDAP_RESPONSE_SIZE {
605                return Err(SeerError::RdapError(format!(
606                    "RDAP response exceeds {} byte limit",
607                    MAX_RDAP_RESPONSE_SIZE
608                )));
609            }
610        }
611        Ok::<(), SeerError>(())
612    })
613    .await;
614
615    match streamed {
616        Ok(Ok(())) => {}
617        Ok(Err(e)) => return Err(e),
618        Err(_) => {
619            return Err(SeerError::Timeout(format!(
620                "timed out reading RDAP response body from {} after {:?}",
621                host, DEFAULT_TIMEOUT
622            )));
623        }
624    }
625
626    let rdap: RdapResponse = serde_json::from_slice(&body)?;
627    // Bound attacker-controlled payload post-deserialization. The 10MB
628    // body cap prevents unbounded download, but a well-formed response
629    // can still pack millions of keys or deeply-nested values into the
630    // serde_json::Map, and adversarial `entities` nesting can drive
631    // recursive walkers to stack-overflow. See RdapResponse::validate.
632    rdap.validate()?;
633    Ok(rdap)
634}
635
636/// Loads IANA RDAP bootstrap data from all registries with retry.
637async fn load_bootstrap_data_with_retry(policy: &RetryPolicy) -> Result<BootstrapData> {
638    let executor = RetryExecutor::new(policy.clone());
639    executor.execute(load_bootstrap_data).await
640}
641
642/// Loads IANA RDAP bootstrap data from all registries.
643async fn load_bootstrap_data() -> Result<BootstrapData> {
644    debug!("Loading RDAP bootstrap data from IANA");
645
646    // SSRF validation is skipped here — these are hardcoded IANA URLs, not user input.
647    // User-supplied URLs are still validated in query_rdap_internal().
648
649    let http = rdap_http_client()?;
650
651    let dns_future = http.get(IANA_BOOTSTRAP_DNS).send();
652    let ipv4_future = http.get(IANA_BOOTSTRAP_IPV4).send();
653    let ipv6_future = http.get(IANA_BOOTSTRAP_IPV6).send();
654    let asn_future = http.get(IANA_BOOTSTRAP_ASN).send();
655
656    // Use join! instead of try_join! so one slow/failing registry doesn't
657    // block the others. We load whatever data is available.
658    let (dns_resp, ipv4_resp, ipv6_resp, asn_resp) =
659        tokio::join!(dns_future, ipv4_future, ipv6_future, asn_future);
660
661    // Stream body with incremental size check to prevent memory exhaustion
662    const MAX_BOOTSTRAP_SIZE: usize = 10 * 1024 * 1024; // 10 MB
663
664    async fn read_bootstrap(resp: reqwest::Response) -> Result<BootstrapResponse> {
665        // Bound the streaming-read loop with the same timeout used for RDAP
666        // queries. Without this, a slow or stalled IANA response (open TCP
667        // but no bytes arriving) could hang all RDAP lookups indefinitely
668        // because `ensure_bootstrap` awaits this future. Mirrors the pattern
669        // in `query_rdap_internal`.
670        let mut body = Vec::new();
671        let mut stream = resp.bytes_stream();
672        let streamed = tokio::time::timeout(DEFAULT_TIMEOUT, async {
673            while let Some(chunk) = stream.next().await {
674                let chunk = chunk.map_err(|e| {
675                    SeerError::RdapBootstrapError(format!("failed to read body: {}", e))
676                })?;
677                body.extend_from_slice(&chunk);
678                if body.len() > MAX_BOOTSTRAP_SIZE {
679                    return Err(SeerError::RdapBootstrapError(format!(
680                        "bootstrap response too large (exceeds {} bytes)",
681                        MAX_BOOTSTRAP_SIZE
682                    )));
683                }
684            }
685            Ok::<(), SeerError>(())
686        })
687        .await;
688
689        match streamed {
690            Ok(Ok(())) => {}
691            Ok(Err(e)) => return Err(e),
692            Err(_) => {
693                return Err(SeerError::Timeout(format!(
694                    "RDAP bootstrap body read timed out after {:?}",
695                    DEFAULT_TIMEOUT
696                )));
697            }
698        }
699
700        serde_json::from_slice(&body).map_err(Into::into)
701    }
702
703    // Parse each response independently, logging failures
704    let dns_data = match dns_resp {
705        Ok(resp) => match read_bootstrap(resp).await {
706            Ok(data) => Some(data),
707            Err(e) => {
708                debug!(error = %e, "Failed to parse DNS bootstrap response");
709                None
710            }
711        },
712        Err(e) => {
713            debug!(error = %e, "Failed to fetch DNS bootstrap from IANA");
714            None
715        }
716    };
717    let ipv4_data = match ipv4_resp {
718        Ok(resp) => match read_bootstrap(resp).await {
719            Ok(data) => Some(data),
720            Err(e) => {
721                debug!(error = %e, "Failed to parse IPv4 bootstrap response");
722                None
723            }
724        },
725        Err(e) => {
726            debug!(error = %e, "Failed to fetch IPv4 bootstrap from IANA");
727            None
728        }
729    };
730    let ipv6_data = match ipv6_resp {
731        Ok(resp) => match read_bootstrap(resp).await {
732            Ok(data) => Some(data),
733            Err(e) => {
734                debug!(error = %e, "Failed to parse IPv6 bootstrap response");
735                None
736            }
737        },
738        Err(e) => {
739            debug!(error = %e, "Failed to fetch IPv6 bootstrap from IANA");
740            None
741        }
742    };
743    let asn_data = match asn_resp {
744        Ok(resp) => match read_bootstrap(resp).await {
745            Ok(data) => Some(data),
746            Err(e) => {
747                debug!(error = %e, "Failed to parse ASN bootstrap response");
748                None
749            }
750        },
751        Err(e) => {
752            debug!(error = %e, "Failed to fetch ASN bootstrap from IANA");
753            None
754        }
755    };
756
757    // If ALL four registries failed, that's a real error
758    if dns_data.is_none() && ipv4_data.is_none() && ipv6_data.is_none() && asn_data.is_none() {
759        return Err(SeerError::RdapBootstrapError(
760            "all IANA bootstrap registries failed".to_string(),
761        ));
762    }
763
764    let mut dns = HashMap::new();
765    let mut ipv4 = Vec::new();
766    let mut ipv6 = Vec::new();
767    let mut asn = Vec::new();
768
769    // Helper: extract and validate all URLs in order, preserving IANA-listed
770    // ordering. Invalid URLs are logged and skipped rather than rejecting the
771    // entire service entry. Returns None when no valid URLs remain.
772    fn collect_valid_urls(urls: &[serde_json::Value]) -> Option<Arc<Vec<url::Url>>> {
773        let mut out = Vec::new();
774        for u in urls {
775            if let Some(s) = u.as_str() {
776                match validate_bootstrap_url(s) {
777                    Ok(parsed) => out.push(parsed),
778                    Err(e) => {
779                        debug!(url = s, error = %e, "Skipping invalid bootstrap URL");
780                    }
781                }
782            }
783        }
784        if out.is_empty() {
785            None
786        } else {
787            Some(Arc::new(out))
788        }
789    }
790
791    // Parse DNS bootstrap
792    if let Some(dns_data) = dns_data {
793        for service in dns_data.services {
794            if service.len() >= 2 {
795                if let (Some(tlds), Some(urls)) = (service[0].as_array(), service[1].as_array()) {
796                    if let Some(urls_arc) = collect_valid_urls(urls) {
797                        for tld in tlds {
798                            if let Some(tld_str) = tld.as_str() {
799                                dns.insert(tld_str.to_lowercase(), Arc::clone(&urls_arc));
800                            }
801                        }
802                    }
803                }
804            }
805        }
806    }
807
808    // Parse IPv4 bootstrap
809    if let Some(ipv4_data) = ipv4_data {
810        for service in ipv4_data.services {
811            if service.len() >= 2 {
812                if let (Some(prefixes), Some(urls)) = (service[0].as_array(), service[1].as_array())
813                {
814                    if let Some(urls_arc) = collect_valid_urls(urls) {
815                        for prefix in prefixes {
816                            if let Some(prefix_str) = prefix.as_str() {
817                                ipv4.push((
818                                    IpRange {
819                                        prefix: prefix_str.to_string(),
820                                    },
821                                    Arc::clone(&urls_arc),
822                                ));
823                            }
824                        }
825                    }
826                }
827            }
828        }
829    }
830
831    // Parse IPv6 bootstrap
832    if let Some(ipv6_data) = ipv6_data {
833        for service in ipv6_data.services {
834            if service.len() >= 2 {
835                if let (Some(prefixes), Some(urls)) = (service[0].as_array(), service[1].as_array())
836                {
837                    if let Some(urls_arc) = collect_valid_urls(urls) {
838                        for prefix in prefixes {
839                            if let Some(prefix_str) = prefix.as_str() {
840                                ipv6.push((
841                                    IpRange {
842                                        prefix: prefix_str.to_string(),
843                                    },
844                                    Arc::clone(&urls_arc),
845                                ));
846                            }
847                        }
848                    }
849                }
850            }
851        }
852    }
853
854    // Parse ASN bootstrap
855    if let Some(asn_data) = asn_data {
856        for service in asn_data.services {
857            if service.len() >= 2 {
858                if let (Some(ranges), Some(urls)) = (service[0].as_array(), service[1].as_array()) {
859                    if let Some(urls_arc) = collect_valid_urls(urls) {
860                        for range in ranges {
861                            if let Some(range_str) = range.as_str() {
862                                if let Some((start, end)) = parse_asn_range(range_str) {
863                                    asn.push((AsnRange { start, end }, Arc::clone(&urls_arc)));
864                                }
865                            }
866                        }
867                    }
868                }
869            }
870        }
871    }
872
873    info!(
874        dns_entries = dns.len(),
875        ipv4_ranges = ipv4.len(),
876        ipv6_ranges = ipv6.len(),
877        asn_ranges = asn.len(),
878        "RDAP bootstrap loaded"
879    );
880
881    Ok(BootstrapData {
882        dns,
883        ipv4,
884        ipv6,
885        asn,
886    })
887}
888
889/// Wraps the "all N candidate URLs failed" case for `query_rdap_urls`.
890///
891/// Preserves the `SeerError::Timeout` variant when the last failure was a
892/// timeout, so upstream callers that branch on `Timeout` for retry-or-not
893/// decisions can still do so. Non-timeout failures are wrapped in a generic
894/// `RdapError` with the last error's Display in the message. The
895/// single-candidate case returns the last error unchanged to avoid
896/// double-wrapping.
897fn wrap_all_candidates_failed(last_error: Option<SeerError>, candidate_count: usize) -> SeerError {
898    let last = last_error.unwrap_or_else(|| SeerError::RdapError("no candidates".to_string()));
899
900    if candidate_count <= 1 {
901        return last;
902    }
903
904    match last {
905        SeerError::Timeout(msg) => SeerError::Timeout(format!(
906            "all {} RDAP candidate URLs timed out; last error: {}",
907            candidate_count, msg
908        )),
909        other => SeerError::RdapError(format!(
910            "all {} RDAP candidate URLs failed; last error: {}",
911            candidate_count, other
912        )),
913    }
914}
915
916/// Builds full RDAP query URLs for each candidate base URL, preserving order.
917fn build_rdap_urls(bases: &[url::Url], path: &str) -> Vec<url::Url> {
918    bases
919        .iter()
920        .filter_map(|base| {
921            // Ensure the base URL ends with `/` before joining so the path is
922            // appended (not replacing the final path segment).
923            let base_str = base.as_str();
924            let normalized = if base_str.ends_with('/') {
925                base_str.to_string()
926            } else {
927                format!("{}/", base_str)
928            };
929            url::Url::parse(&normalized).and_then(|u| u.join(path)).ok()
930        })
931        .collect()
932}
933
934#[cfg(test)]
935mod tests {
936    use super::*;
937
938    #[test]
939    fn test_default_client_has_retry_policy() {
940        let client = RdapClient::new();
941        assert_eq!(client.retry_policy.max_attempts, 2);
942    }
943
944    #[test]
945    fn test_client_without_retries() {
946        let client = RdapClient::new().without_retries();
947        assert_eq!(client.retry_policy.max_attempts, 1);
948    }
949
950    #[test]
951    fn test_client_custom_retry_policy() {
952        let policy = RetryPolicy::new().with_max_attempts(5);
953        let client = RdapClient::new().with_retry_policy(policy);
954        assert_eq!(client.retry_policy.max_attempts, 5);
955    }
956
957    #[test]
958    fn test_cached_bootstrap_expiration() {
959        let data = BootstrapData {
960            dns: HashMap::new(),
961            ipv4: Vec::new(),
962            ipv6: Vec::new(),
963            asn: Vec::new(),
964        };
965        let cached = CachedBootstrap::new(data);
966        // Fresh cache should not be expired
967        assert!(!cached.is_expired());
968    }
969
970    #[test]
971    fn test_rdap_http_client_is_configured() {
972        // Force lazy initialization and verify it doesn't panic; the real
973        // reqwest builder is expected to succeed in any normal environment.
974        let client = rdap_http_client();
975        assert!(client.is_ok(), "RDAP HTTP client builder must succeed");
976    }
977
978    #[test]
979    fn test_parse_bootstrap_empty_services() {
980        // Verifies that parsing empty bootstrap data doesn't panic
981        let data = BootstrapData {
982            dns: HashMap::new(),
983            ipv4: Vec::new(),
984            ipv6: Vec::new(),
985            asn: Vec::new(),
986        };
987        // Should return None for any lookup on empty data
988        assert!(RdapClient::get_rdap_urls_for_domain(&data, "example.com").is_none());
989        assert!(RdapClient::get_rdap_urls_for_asn(&data, 12345).is_none());
990    }
991
992    // --- validate_url_not_reserved tests (C1 regression) ----------------
993
994    #[tokio::test]
995    async fn test_validate_url_not_reserved_rejects_loopback_literal() {
996        let err = validate_url_not_reserved("https://127.0.0.1/domain/example.com")
997            .await
998            .unwrap_err();
999        assert!(
1000            matches!(err, SeerError::RdapError(ref s) if s.contains("reserved IP")),
1001            "expected reserved-IP error, got: {:?}",
1002            err
1003        );
1004    }
1005
1006    #[tokio::test]
1007    async fn test_validate_url_not_reserved_rejects_private_ipv4_literal() {
1008        let err = validate_url_not_reserved("https://10.0.0.1/")
1009            .await
1010            .unwrap_err();
1011        assert!(
1012            matches!(err, SeerError::RdapError(ref s) if s.contains("reserved IP")),
1013            "expected reserved-IP error, got: {:?}",
1014            err
1015        );
1016    }
1017
1018    #[tokio::test]
1019    async fn test_validate_url_not_reserved_rejects_ipv6_loopback_literal() {
1020        let err = validate_url_not_reserved("https://[::1]/")
1021            .await
1022            .unwrap_err();
1023        assert!(
1024            matches!(err, SeerError::RdapError(ref s) if s.contains("reserved IP")),
1025            "expected reserved-IP error, got: {:?}",
1026            err
1027        );
1028    }
1029
1030    #[tokio::test]
1031    async fn test_validate_url_not_reserved_returns_resolved_addrs_for_public_literal() {
1032        // A public IP literal should return a one-element vector containing
1033        // exactly that address, ready for `resolve_to_addrs` pinning.
1034        let addrs = validate_url_not_reserved("https://8.8.8.8/").await.unwrap();
1035        assert_eq!(addrs.len(), 1);
1036        assert!(addrs[0].ip().is_ipv4());
1037        assert_eq!(addrs[0].port(), 443);
1038    }
1039
1040    // --- build_rdap_urls tests (M16) ------------------------------------
1041
1042    #[test]
1043    fn test_build_rdap_urls_preserves_order_and_appends_path() {
1044        let bases = vec![
1045            url::Url::parse("https://rdap.a.example/").unwrap(),
1046            url::Url::parse("https://rdap.b.example").unwrap(), // no trailing slash
1047        ];
1048        let built = build_rdap_urls(&bases, "domain/example.com");
1049        assert_eq!(built.len(), 2);
1050        assert_eq!(
1051            built[0].as_str(),
1052            "https://rdap.a.example/domain/example.com"
1053        );
1054        assert_eq!(
1055            built[1].as_str(),
1056            "https://rdap.b.example/domain/example.com"
1057        );
1058    }
1059
1060    #[test]
1061    fn test_build_rdap_urls_empty_input_returns_empty() {
1062        let built = build_rdap_urls(&[], "domain/example.com");
1063        assert!(built.is_empty());
1064    }
1065
1066    // --- wrap_all_candidates_failed tests (Issue 2 regression) ----------
1067
1068    #[test]
1069    fn test_wrap_all_candidates_failed_preserves_timeout_variant() {
1070        // When the last failure was a Timeout, the wrapped error must ALSO
1071        // be a Timeout so upstream retry logic can still branch on it.
1072        let last = SeerError::Timeout("body read timed out".to_string());
1073        let wrapped = wrap_all_candidates_failed(Some(last), 3);
1074        match wrapped {
1075            SeerError::Timeout(msg) => {
1076                assert!(
1077                    msg.contains("all 3 RDAP candidate URLs timed out"),
1078                    "expected wrapped timeout message, got: {}",
1079                    msg
1080                );
1081                assert!(
1082                    msg.contains("body read timed out"),
1083                    "expected original message preserved, got: {}",
1084                    msg
1085                );
1086            }
1087            other => panic!(
1088                "expected SeerError::Timeout after wrapping a Timeout, got: {:?}",
1089                other
1090            ),
1091        }
1092    }
1093
1094    #[test]
1095    fn test_wrap_all_candidates_failed_wraps_non_timeout_as_rdap_error() {
1096        let last = SeerError::RdapError("500 internal error".to_string());
1097        let wrapped = wrap_all_candidates_failed(Some(last), 2);
1098        assert!(
1099            matches!(wrapped, SeerError::RdapError(ref s) if s.contains("all 2 RDAP candidate URLs failed")),
1100            "expected wrapped RdapError, got: {:?}",
1101            wrapped
1102        );
1103    }
1104
1105    #[test]
1106    fn test_wrap_all_candidates_failed_single_candidate_returns_unchanged() {
1107        // Single-candidate case: return the last error unchanged to avoid
1108        // misleading "all 1 candidates failed" wrapping.
1109        let last = SeerError::Timeout("single timeout".to_string());
1110        let wrapped = wrap_all_candidates_failed(Some(last), 1);
1111        assert!(
1112            matches!(wrapped, SeerError::Timeout(ref s) if s == "single timeout"),
1113            "expected unchanged Timeout, got: {:?}",
1114            wrapped
1115        );
1116    }
1117
1118    #[test]
1119    fn test_wrap_all_candidates_failed_no_last_error_returns_placeholder() {
1120        let wrapped = wrap_all_candidates_failed(None, 0);
1121        assert!(matches!(wrapped, SeerError::RdapError(_)));
1122    }
1123
1124    // --- BOOTSTRAP_LOAD_NOTIFY concurrency test (Issue 1 regression) ----
1125    //
1126    // This test spawns two concurrent `ensure_bootstrap` calls on what is
1127    // effectively a cold/expired cache. The point is to exercise the
1128    // throttle-race path: before the Notify fix, one of the tasks could
1129    // observe `last_attempt.elapsed() < BOOTSTRAP_REFRESH_MIN_INTERVAL`
1130    // with an empty cache and immediately return
1131    // `RdapBootstrapError("bootstrap refresh throttled and no cache available")`.
1132    //
1133    // We cannot easily mock `load_bootstrap_data_with_retry`, but we CAN
1134    // exercise the coordination primitives directly to verify that a waiter
1135    // subscribing to BOOTSTRAP_LOAD_NOTIFY before a notify_waiters() call
1136    // correctly wakes, and that a spurious wake followed by a populated
1137    // cache is treated as success.
1138
1139    // Both bootstrap-notify tests mutate the shared BOOTSTRAP_CACHE static,
1140    // so they must be serialized against each other (cargo test parallelism
1141    // would otherwise race them).
1142    static BOOTSTRAP_TEST_LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(());
1143
1144    #[tokio::test]
1145    async fn test_bootstrap_load_notify_wakes_waiter_when_cache_populated() {
1146        let _guard = BOOTSTRAP_TEST_LOCK.lock().await;
1147
1148        // Start from a known-empty state.
1149        {
1150            let mut cache = BOOTSTRAP_CACHE.write().await;
1151            *cache = None;
1152        }
1153
1154        // Construct a notified subscription BEFORE triggering the notify,
1155        // mirroring the order in ensure_bootstrap.
1156        let notified = BOOTSTRAP_LOAD_NOTIFY.notified();
1157        tokio::pin!(notified);
1158
1159        // Simulate a winning loader populating the cache and signalling.
1160        {
1161            let mut cache = BOOTSTRAP_CACHE.write().await;
1162            *cache = Some(CachedBootstrap::new(BootstrapData {
1163                dns: HashMap::new(),
1164                ipv4: Vec::new(),
1165                ipv6: Vec::new(),
1166                asn: Vec::new(),
1167            }));
1168        }
1169        BOOTSTRAP_LOAD_NOTIFY.notify_waiters();
1170
1171        let result = wait_for_in_flight_load(notified).await;
1172        assert!(
1173            result.is_ok(),
1174            "expected waiter to see populated cache, got: {:?}",
1175            result
1176        );
1177
1178        // Clean up so we don't leak state into other tests.
1179        {
1180            let mut cache = BOOTSTRAP_CACHE.write().await;
1181            *cache = None;
1182        }
1183    }
1184
1185    #[tokio::test]
1186    async fn test_bootstrap_load_notify_empty_cache_after_wake_returns_error() {
1187        let _guard = BOOTSTRAP_TEST_LOCK.lock().await;
1188
1189        // Ensure cache is empty.
1190        {
1191            let mut cache = BOOTSTRAP_CACHE.write().await;
1192            *cache = None;
1193        }
1194
1195        let notified = BOOTSTRAP_LOAD_NOTIFY.notified();
1196        tokio::pin!(notified);
1197
1198        // Winner's load failed — they notify with empty cache.
1199        BOOTSTRAP_LOAD_NOTIFY.notify_waiters();
1200
1201        let result = wait_for_in_flight_load(notified).await;
1202        assert!(
1203            matches!(
1204                result,
1205                Err(SeerError::RdapBootstrapError(ref s))
1206                    if s.contains("throttled and no cache available")
1207            ),
1208            "expected throttled error when cache still empty after notify, got: {:?}",
1209            result
1210        );
1211    }
1212}