iroh_relay/
dns.rs

1//! DNS resolver
2
3use std::{
4    fmt::{self, Write},
5    future::Future,
6    net::{IpAddr, Ipv6Addr, SocketAddr},
7};
8
9use anyhow::{bail, Context, Result};
10use hickory_resolver::{name_server::TokioConnectionProvider, TokioResolver};
11use iroh_base::NodeId;
12use n0_future::{
13    time::{self, Duration},
14    StreamExt,
15};
16use url::Url;
17
18use crate::node_info::NodeInfo;
19
20/// The n0 testing DNS node origin, for production.
21pub const N0_DNS_NODE_ORIGIN_PROD: &str = "dns.iroh.link";
22/// The n0 testing DNS node origin, for testing.
23pub const N0_DNS_NODE_ORIGIN_STAGING: &str = "staging-dns.iroh.link";
24
25/// The DNS resolver used throughout `iroh`.
26#[derive(Debug, Clone)]
27pub struct DnsResolver(TokioResolver);
28
29impl DnsResolver {
30    /// Create a new DNS resolver with sensible cross-platform defaults.
31    ///
32    /// We first try to read the system's resolver from `/etc/resolv.conf`.
33    /// This does not work at least on some Androids, therefore we fallback
34    /// to the default `ResolverConfig` which uses eg. to google's `8.8.8.8` or `8.8.4.4`.
35    pub fn new() -> Self {
36        let (system_config, mut options) =
37            hickory_resolver::system_conf::read_system_conf().unwrap_or_default();
38
39        // Copy all of the system config, but strip the bad windows nameservers.  Unfortunately
40        // there is no easy way to do this.
41        let mut config = hickory_resolver::config::ResolverConfig::new();
42        if let Some(name) = system_config.domain() {
43            config.set_domain(name.clone());
44        }
45        for name in system_config.search() {
46            config.add_search(name.clone());
47        }
48        for nameserver_cfg in system_config.name_servers() {
49            if !WINDOWS_BAD_SITE_LOCAL_DNS_SERVERS.contains(&nameserver_cfg.socket_addr.ip()) {
50                config.add_name_server(nameserver_cfg.clone());
51            }
52        }
53
54        // see [`DnsResolver::lookup_ipv4_ipv6`] for info on why we avoid `LookupIpStrategy::Ipv4AndIpv6`
55        options.ip_strategy = hickory_resolver::config::LookupIpStrategy::Ipv4thenIpv6;
56
57        let mut builder =
58            TokioResolver::builder_with_config(config, TokioConnectionProvider::default());
59        *builder.options_mut() = options;
60        DnsResolver(builder.build())
61    }
62
63    /// Create a new DNS resolver configured with a single UDP DNS nameserver.
64    pub fn with_nameserver(nameserver: SocketAddr) -> Self {
65        let mut config = hickory_resolver::config::ResolverConfig::new();
66        let nameserver_config = hickory_resolver::config::NameServerConfig::new(
67            nameserver,
68            hickory_resolver::proto::xfer::Protocol::Udp,
69        );
70        config.add_name_server(nameserver_config);
71
72        let builder =
73            TokioResolver::builder_with_config(config, TokioConnectionProvider::default());
74        DnsResolver(builder.build())
75    }
76
77    /// Removes all entries from the cache.
78    pub fn clear_cache(&self) {
79        self.0.clear_cache();
80    }
81
82    /// Lookup a TXT record.
83    pub async fn lookup_txt(&self, host: impl ToString, timeout: Duration) -> Result<TxtLookup> {
84        let host = host.to_string();
85        let res = time::timeout(timeout, self.0.txt_lookup(host)).await??;
86        Ok(TxtLookup(res))
87    }
88
89    /// Perform an ipv4 lookup with a timeout.
90    pub async fn lookup_ipv4(
91        &self,
92        host: impl ToString,
93        timeout: Duration,
94    ) -> Result<impl Iterator<Item = IpAddr>> {
95        let host = host.to_string();
96        let addrs = time::timeout(timeout, self.0.ipv4_lookup(host)).await??;
97        Ok(addrs.into_iter().map(|ip| IpAddr::V4(ip.0)))
98    }
99
100    /// Perform an ipv6 lookup with a timeout.
101    pub async fn lookup_ipv6(
102        &self,
103        host: impl ToString,
104        timeout: Duration,
105    ) -> Result<impl Iterator<Item = IpAddr>> {
106        let host = host.to_string();
107        let addrs = time::timeout(timeout, self.0.ipv6_lookup(host)).await??;
108        Ok(addrs.into_iter().map(|ip| IpAddr::V6(ip.0)))
109    }
110
111    /// Resolve IPv4 and IPv6 in parallel with a timeout.
112    ///
113    /// `LookupIpStrategy::Ipv4AndIpv6` will wait for ipv6 resolution timeout, even if it is
114    /// not usable on the stack, so we manually query both lookups concurrently and time them out
115    /// individually.
116    pub async fn lookup_ipv4_ipv6(
117        &self,
118        host: impl ToString,
119        timeout: Duration,
120    ) -> Result<impl Iterator<Item = IpAddr>> {
121        let host = host.to_string();
122        let res = tokio::join!(
123            self.lookup_ipv4(host.clone(), timeout),
124            self.lookup_ipv6(host, timeout)
125        );
126
127        match res {
128            (Ok(ipv4), Ok(ipv6)) => Ok(LookupIter::Both(ipv4.chain(ipv6))),
129            (Ok(ipv4), Err(_)) => Ok(LookupIter::Ipv4(ipv4)),
130            (Err(_), Ok(ipv6)) => Ok(LookupIter::Ipv6(ipv6)),
131            (Err(ipv4_err), Err(ipv6_err)) => {
132                bail!("Ipv4: {:?}, Ipv6: {:?}", ipv4_err, ipv6_err)
133            }
134        }
135    }
136
137    /// Resolve a hostname from a URL to an IP address.
138    pub async fn resolve_host(
139        &self,
140        url: &Url,
141        prefer_ipv6: bool,
142        timeout: Duration,
143    ) -> Result<IpAddr> {
144        let host = url.host().context("Invalid URL")?;
145        match host {
146            url::Host::Domain(domain) => {
147                // Need to do a DNS lookup
148                let lookup = tokio::join!(
149                    self.lookup_ipv4(domain, timeout),
150                    self.lookup_ipv6(domain, timeout)
151                );
152                let (v4, v6) = match lookup {
153                    (Err(ipv4_err), Err(ipv6_err)) => {
154                        bail!("Ipv4: {ipv4_err:?}, Ipv6: {ipv6_err:?}");
155                    }
156                    (Err(_), Ok(mut v6)) => (None, v6.next()),
157                    (Ok(mut v4), Err(_)) => (v4.next(), None),
158                    (Ok(mut v4), Ok(mut v6)) => (v4.next(), v6.next()),
159                };
160                if prefer_ipv6 { v6.or(v4) } else { v4.or(v6) }.context("No response")
161            }
162            url::Host::Ipv4(ip) => Ok(IpAddr::V4(ip)),
163            url::Host::Ipv6(ip) => Ok(IpAddr::V6(ip)),
164        }
165    }
166
167    /// Perform an ipv4 lookup with a timeout in a staggered fashion.
168    ///
169    /// From the moment this function is called, each lookup is scheduled after the delays in
170    /// `delays_ms` with the first call being done immediately. `[200ms, 300ms]` results in calls
171    /// at T+0ms, T+200ms and T+300ms. The `timeout` is applied to each call individually. The
172    /// result of the first successful call is returned, or a summary of all errors otherwise.
173    pub async fn lookup_ipv4_staggered(
174        &self,
175        host: impl ToString,
176        timeout: Duration,
177        delays_ms: &[u64],
178    ) -> Result<impl Iterator<Item = IpAddr>> {
179        let host = host.to_string();
180        let f = || self.lookup_ipv4(host.clone(), timeout);
181        stagger_call(f, delays_ms).await
182    }
183
184    /// Perform an ipv6 lookup with a timeout in a staggered fashion.
185    ///
186    /// From the moment this function is called, each lookup is scheduled after the delays in
187    /// `delays_ms` with the first call being done immediately. `[200ms, 300ms]` results in calls
188    /// at T+0ms, T+200ms and T+300ms. The `timeout` is applied to each call individually. The
189    /// result of the first successful call is returned, or a summary of all errors otherwise.
190    pub async fn lookup_ipv6_staggered(
191        &self,
192        host: impl ToString,
193        timeout: Duration,
194        delays_ms: &[u64],
195    ) -> Result<impl Iterator<Item = IpAddr>> {
196        let host = host.to_string();
197        let f = || self.lookup_ipv6(host.clone(), timeout);
198        stagger_call(f, delays_ms).await
199    }
200
201    /// Race an ipv4 and ipv6 lookup with a timeout in a staggered fashion.
202    ///
203    /// From the moment this function is called, each lookup is scheduled after the delays in
204    /// `delays_ms` with the first call being done immediately. `[200ms, 300ms]` results in calls
205    /// at T+0ms, T+200ms and T+300ms. The `timeout` is applied as stated in
206    /// [`Self::lookup_ipv4_ipv6`]. The result of the first successful call is returned, or a
207    /// summary of all errors otherwise.
208    pub async fn lookup_ipv4_ipv6_staggered(
209        &self,
210        host: impl ToString,
211        timeout: Duration,
212        delays_ms: &[u64],
213    ) -> Result<impl Iterator<Item = IpAddr>> {
214        let host = host.to_string();
215        let f = || self.lookup_ipv4_ipv6(host.clone(), timeout);
216        stagger_call(f, delays_ms).await
217    }
218
219    /// Looks up node info by [`NodeId`] and origin domain name.
220    ///
221    /// To lookup nodes that published their node info to the DNS servers run by n0,
222    /// pass [`N0_DNS_NODE_ORIGIN_PROD`] as `origin`.
223    pub async fn lookup_node_by_id(&self, node_id: &NodeId, origin: &str) -> Result<NodeInfo> {
224        let attrs = crate::node_info::TxtAttrs::<crate::node_info::IrohAttr>::lookup_by_id(
225            self, node_id, origin,
226        )
227        .await?;
228        let info = attrs.into();
229        Ok(info)
230    }
231
232    /// Looks up node info by DNS name.
233    pub async fn lookup_node_by_domain_name(&self, name: &str) -> Result<NodeInfo> {
234        let attrs =
235            crate::node_info::TxtAttrs::<crate::node_info::IrohAttr>::lookup_by_name(self, name)
236                .await?;
237        let info = attrs.into();
238        Ok(info)
239    }
240
241    /// Looks up node info by DNS name in a staggered fashion.
242    ///
243    /// From the moment this function is called, each lookup is scheduled after the delays in
244    /// `delays_ms` with the first call being done immediately. `[200ms, 300ms]` results in calls
245    /// at T+0ms, T+200ms and T+300ms. The result of the first successful call is returned, or a
246    /// summary of all errors otherwise.
247    pub async fn lookup_node_by_domain_name_staggered(
248        &self,
249        name: &str,
250        delays_ms: &[u64],
251    ) -> Result<NodeInfo> {
252        let f = || self.lookup_node_by_domain_name(name);
253        stagger_call(f, delays_ms).await
254    }
255
256    /// Looks up node info by [`NodeId`] and origin domain name.
257    ///
258    /// From the moment this function is called, each lookup is scheduled after the delays in
259    /// `delays_ms` with the first call being done immediately. `[200ms, 300ms]` results in calls
260    /// at T+0ms, T+200ms and T+300ms. The result of the first successful call is returned, or a
261    /// summary of all errors otherwise.
262    pub async fn lookup_node_by_id_staggered(
263        &self,
264        node_id: &NodeId,
265        origin: &str,
266        delays_ms: &[u64],
267    ) -> Result<NodeInfo> {
268        let f = || self.lookup_node_by_id(node_id, origin);
269        stagger_call(f, delays_ms).await
270    }
271}
272
273impl Default for DnsResolver {
274    fn default() -> Self {
275        Self::new()
276    }
277}
278
279impl From<TokioResolver> for DnsResolver {
280    fn from(resolver: TokioResolver) -> Self {
281        DnsResolver(resolver)
282    }
283}
284
285/// TXT records returned from [`DnsResolver::lookup_txt`]
286#[derive(Debug, Clone)]
287pub struct TxtLookup(pub(crate) hickory_resolver::lookup::TxtLookup);
288
289impl From<hickory_resolver::lookup::TxtLookup> for TxtLookup {
290    fn from(value: hickory_resolver::lookup::TxtLookup) -> Self {
291        Self(value)
292    }
293}
294
295impl IntoIterator for TxtLookup {
296    type Item = TXT;
297
298    type IntoIter = Box<dyn Iterator<Item = TXT>>;
299
300    fn into_iter(self) -> Self::IntoIter {
301        Box::new(self.0.into_iter().map(TXT))
302    }
303}
304
305/// Record data for a TXT record
306#[derive(Debug, Clone)]
307pub struct TXT(hickory_resolver::proto::rr::rdata::TXT);
308
309impl TXT {
310    /// Returns the raw character strings of this TXT record.
311    pub fn txt_data(&self) -> &[Box<[u8]>] {
312        self.0.txt_data()
313    }
314}
315
316impl fmt::Display for TXT {
317    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
318        write!(f, "{}", self.0)
319    }
320}
321
322/// Deprecated IPv6 site-local anycast addresses still configured by windows.
323///
324/// Windows still configures these site-local addresses as soon even as an IPv6 loopback
325/// interface is configured.  We do not want to use these DNS servers, the chances of them
326/// being usable are almost always close to zero, while the chance of DNS configuration
327/// **only** relying on these servers and not also being configured normally are also almost
328/// zero.  The chance of the DNS resolver accidentally trying one of these and taking a
329/// bunch of timeouts to figure out they're no good are on the other hand very high.
330const WINDOWS_BAD_SITE_LOCAL_DNS_SERVERS: [IpAddr; 3] = [
331    IpAddr::V6(Ipv6Addr::new(0xfec0, 0, 0, 0xffff, 0, 0, 0, 1)),
332    IpAddr::V6(Ipv6Addr::new(0xfec0, 0, 0, 0xffff, 0, 0, 0, 2)),
333    IpAddr::V6(Ipv6Addr::new(0xfec0, 0, 0, 0xffff, 0, 0, 0, 3)),
334];
335
336/// Helper enum to give a unified type to the iterators of [`DnsResolver::lookup_ipv4_ipv6`].
337enum LookupIter<A, B> {
338    Ipv4(A),
339    Ipv6(B),
340    Both(std::iter::Chain<A, B>),
341}
342
343impl<A: Iterator<Item = IpAddr>, B: Iterator<Item = IpAddr>> Iterator for LookupIter<A, B> {
344    type Item = IpAddr;
345
346    fn next(&mut self) -> Option<Self::Item> {
347        match self {
348            LookupIter::Ipv4(iter) => iter.next(),
349            LookupIter::Ipv6(iter) => iter.next(),
350            LookupIter::Both(iter) => iter.next(),
351        }
352    }
353}
354
355/// Staggers calls to the future F with the given delays.
356///
357/// The first call is performed immediately. The first call to succeed generates an Ok result
358/// ignoring any previous error. If all calls fail, an error summarizing all errors is returned.
359async fn stagger_call<T, F: Fn() -> Fut, Fut: Future<Output = Result<T>>>(
360    f: F,
361    delays_ms: &[u64],
362) -> Result<T> {
363    let mut calls = n0_future::FuturesUnorderedBounded::new(delays_ms.len() + 1);
364    // NOTE: we add the 0 delay here to have a uniform set of futures. This is more performant than
365    // using alternatives that allow futures of different types.
366    for delay in std::iter::once(&0u64).chain(delays_ms) {
367        let delay = Duration::from_millis(*delay);
368        let fut = f();
369        let staggered_fut = async move {
370            time::sleep(delay).await;
371            fut.await
372        };
373        calls.push(staggered_fut)
374    }
375
376    let mut errors = vec![];
377    while let Some(call_result) = calls.next().await {
378        match call_result {
379            Ok(t) => return Ok(t),
380            Err(e) => errors.push(e),
381        }
382    }
383
384    bail!(
385        "no calls succeed: [ {}]",
386        errors.into_iter().fold(String::new(), |mut summary, e| {
387            write!(summary, "{e} ").expect("infallible");
388            summary
389        })
390    )
391}
392
393#[cfg(test)]
394pub(crate) mod tests {
395    use std::sync::atomic::AtomicUsize;
396
397    use tracing_test::traced_test;
398
399    use super::*;
400
401    #[tokio::test]
402    #[traced_test]
403    async fn stagger_basic() {
404        const CALL_RESULTS: &[Result<u8, u8>] = &[Err(2), Ok(3), Ok(5), Ok(7)];
405        static DONE_CALL: AtomicUsize = AtomicUsize::new(0);
406        let f = || {
407            let r_pos = DONE_CALL.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
408            async move {
409                tracing::info!(r_pos, "call");
410                CALL_RESULTS[r_pos].map_err(|e| anyhow::anyhow!("{e}"))
411            }
412        };
413
414        let delays = [1000, 15];
415        let result = stagger_call(f, &delays).await.unwrap();
416        assert_eq!(result, 5)
417    }
418}