Skip to main content

ts_runtime/
magic_dns.rs

1//! MagicDNS responder with a split-DNS / recursive forwarder.
2//!
3//! An in-netstack DNS server bound to `100.100.100.100:53`. It is authoritative for in-tailnet
4//! peer names and control-pushed [`ExtraRecord`][ts_control::ExtraRecord]s, answering `A`/`AAAA`/
5//! `PTR` for those directly. For names it is *not* authoritative for, it brings tsnet-style
6//! split-DNS and recursive resolution:
7//!
8//! - **Split DNS** ([`DnsConfig::routes`]): the longest matching suffix route forwards the query
9//!   to one of that route's upstream resolvers. A route with an **empty** upstream list is a
10//!   negative route — names under it are `NXDOMAIN` (Go keeps them on the built-in resolver; for
11//!   us that means fail-closed unless an overlay/extra record matched first).
12//! - **Recursive** ([`DnsConfig::fallback_resolvers`] / [`DnsConfig::resolvers`]): names matching
13//!   no route are forwarded to the fallback resolvers, else the global resolvers.
14//! - **Fail closed**: if no route and no resolver is configured, an unknown name is `NXDOMAIN`.
15//!
16//! Anti-leak / IPv6-off posture: upstream forwarding binds `0.0.0.0:0` (UDP, IPv4 only) and never
17//! opens an IPv6 socket. AAAA handling is gated on [`DnsView::enable_ipv6`] (default off): with the
18//! gate OFF an AAAA query for a tailnet/overlay/self name returns NoError with an empty answer
19//! (NODATA) rather than the overlay v6 address — answering a v6 the IPv4-only client can't route
20//! would only create dead connections and a fingerprint. With the gate ON, AAAA is answered from
21//! overlay data (the v6 overlay addr), as historically. AAAA for tailnet names is never forwarded
22//! to a recursive upstream regardless of the gate.
23//!
24//! - MagicDNS disabled (`dns_config == None` or `magic_dns == false`), OR the node does not accept
25//!   the tailnet DNS config ([`DnsView::accept_dns`] is `false`, i.e. `--accept-dns` / `CorpDNS`
26//!   off) => `REFUSED` for every query (the responder serves nothing, mirroring Go applying an empty
27//!   `dns.Config` when `CorpDNS` is off).
28//! - A qtype/class we don't serve authoritatively (anything but IN-class A/AAAA/PTR — TXT, SRV, MX,
29//!   HTTPS/SVCB, a CHAOS-class query, …) => NODATA (empty NOERROR) for a tailnet-authoritative name,
30//!   forwarded verbatim to upstream for an off-tailnet name — exactly like Go's resolver, NOT
31//!   `REFUSED` (a stub reads REFUSED as "won't serve me" and abandons the resolver). Tailnet reverse
32//!   zones (CGNAT `in-addr.arpa` / any `ip6.arpa`) still fail closed to NXDOMAIN for every qtype
33//!   (never forwarded — anti-leak).
34//! - Malformed query => dropped (no response).
35
36use std::{
37    net::{IpAddr, Ipv4Addr, SocketAddr},
38    sync::Arc,
39    time::Duration,
40};
41
42use kameo::{
43    actor::ActorRef,
44    message::{Context, Message},
45};
46use netstack::{CreateSocket, netcore::Channel};
47use tokio::{
48    sync::{Semaphore, watch},
49    task::JoinSet,
50    time::timeout,
51};
52use ts_control::{DnsConfig, DnsResolver, Node};
53use ts_dns_wire::{Name, QType, RData, Rcode, decode_query, encode_response};
54
55use crate::{
56    Error,
57    env::Env,
58    peer_tracker::{PeerDb, PeerState},
59};
60
61/// How long to wait for an upstream resolver to answer a forwarded query before giving up.
62const UPSTREAM_TIMEOUT: Duration = Duration::from_secs(5);
63/// Cap on concurrent in-flight forwarded queries on the local `100.100.100.100:53` responder.
64///
65/// Each forward is spawned onto a task that holds an overlay UDP socket until the upstream answers
66/// or [`UPSTREAM_TIMEOUT`] elapses. Without a cap, a local/tailnet client spraying distinct
67/// forwardable names opens unbounded concurrent overlay sockets + tasks (a resource-exhaustion DoS
68/// on a slow/black-holed upstream, since each lingers for the full timeout). Bound it the same way
69/// the peerAPI DoH server bounds its request handlers ([`crate::peerapi`]'s `MAX_INFLIGHT`): acquire
70/// a permit before spawning and drop the query fail-closed when saturated. A dropped DNS query is a
71/// benign outcome — the stub resolver simply retries or times out — and Go's resolver likewise
72/// bounds outstanding forwards rather than spawning without limit.
73const MAX_INFLIGHT_FORWARDS: usize = 512;
74/// Cap on a forwarded upstream response we read into memory (a single UDP datagram).
75///
76/// Matches Go's forwarder read buffer (`maxResponseBytes`, ~4 KiB). The client's query is forwarded
77/// verbatim, so a client advertising a large EDNS UDP size can elicit a legitimately large
78/// (1300–4096 byte) UDP answer (big TXT sets, DNSSEC, many-record round-robins). Capping at the old
79/// 1232 truncated those and set TC, forcing a TCP retry this fork's UDP-only forwarder can't serve —
80/// so the large answer became unreachable. 4096 relays them intact.
81const MAX_UPSTREAM_RESPONSE: usize = 4096;
82
83/// The MagicDNS service IP. The netstack interface owns this address, so a `udp_bind` here
84/// receives the tailnet's DNS traffic.
85const MAGIC_DNS_IP: Ipv4Addr = Ipv4Addr::new(100, 100, 100, 100);
86/// The DNS service port.
87const MAGIC_DNS_PORT: u16 = 53;
88
89/// The latest view the answer loop resolves queries against.
90///
91/// Updated by the actor's message handlers (from control `StateUpdate` and peer `PeerState`
92/// updates) and read fresh by the answer loop for every packet.
93#[derive(Clone, Default)]
94pub(crate) struct DnsView {
95    /// The DNS configuration. `magic_dns == false` (the default) means serve nothing.
96    pub(crate) cfg: DnsConfig,
97    /// The current peer database, if we've seen a peer update.
98    pub(crate) peers: Option<Arc<PeerDb>>,
99    /// This node, if we've seen a self-node update.
100    pub(crate) self_node: Option<Node>,
101    /// The peerAPI DoH socket address of the currently-selected exit node, if one is active and can
102    /// proxy DNS ([`Node::peerapi_doh_addr`]). When set, the MagicDNS *client* serve loop delegates
103    /// recursive resolution to this address over the overlay instead of forwarding to the locally
104    /// configured upstream resolvers — so recursive DNS egresses from the exit node, not this host.
105    ///
106    /// Only consumed by the local MagicDNS responder's serve loop (the client side). The peerAPI
107    /// DoH *server* shares this same view but ignores this field: an exit-node DNS proxy resolves
108    /// recursively itself (gated by `forward_exit_egress`), it never re-delegates to its own exit
109    /// node. `None` means no active exit node / no DoH delegation — recursion stays local.
110    pub(crate) exit_doh: Option<SocketAddr>,
111    /// Whether IPv6 is enabled on the tailnet overlay (from [`Env::enable_ipv6`], default `false`).
112    ///
113    /// Governs the AAAA answer path only: with the gate OFF (default) an AAAA query for a
114    /// tailnet/overlay/self name is answered NoError-with-empty-answer (NODATA) instead of the
115    /// overlay v6 address; with it ON, AAAA is answered from overlay data as historically. Set once
116    /// from the runtime `Env` when the actor starts; never changes for the life of the runtime.
117    pub(crate) enable_ipv6: bool,
118    /// Whether the tailnet's DNS configuration is accepted (`--accept-dns` / `CorpDNS`, from
119    /// [`Env::accept_dns`]). When `false`, [`decide`] refuses every query (the responder serves
120    /// nothing), mirroring Go applying an empty `dns.Config` when `CorpDNS` is off — so a node can
121    /// join for connectivity without taking over DNS.
122    ///
123    /// Unlike [`enable_ipv6`](DnsView::enable_ipv6) (snapshotted once at actor spawn), this is
124    /// runtime-settable via `Device::set_accept_dns`, so it is re-read from the live
125    /// [`Env::accept_dns`] cell on **every** view rebuild (the `StateUpdate` and `PeerState`
126    /// handlers), not just at spawn — otherwise a runtime toggle would never reach the served view.
127    pub(crate) accept_dns: bool,
128}
129
130impl DnsView {
131    /// Find the node (peer or self) that answers to `name`, case/dot-insensitively.
132    fn node_by_name(&self, name: &str) -> Option<Node> {
133        if let Some(node) = self
134            .peers
135            .as_ref()
136            .and_then(|p| p.get(&name).map(|(_, n)| n.clone()))
137        {
138            return Some(node);
139        }
140
141        self.self_node
142            .as_ref()
143            .filter(|n| n.matches_name(name))
144            .cloned()
145    }
146
147    /// Resolve `canon` to an answer address of the requested family. A tailnet peer/self match
148    /// wins first — tried as written and then qualified by each tailnet search domain (so a
149    /// short/partially-qualified name like `host` or `host.user` still resolves to
150    /// `host.user.ts.net`). Failing that, a control-pushed [`ExtraRecord`] of the matching family
151    /// answers, matched as a fully-qualified name only (no search-domain expansion — like Go tsnet,
152    /// ExtraRecords are authoritative FQDN entries, not subject to client search-list qualification).
153    /// Still fail-closed: only ever resolves to a known tailnet peer/self or an explicitly
154    /// control-pushed static record — never anything else.
155    fn resolve_addr(&self, canon: &str, want_v4: bool) -> Option<IpAddr> {
156        let addr_of = |node: Node| -> IpAddr {
157            if want_v4 {
158                IpAddr::from(node.tailnet_address.ipv4.addr())
159            } else {
160                IpAddr::from(node.tailnet_address.ipv6.addr())
161            }
162        };
163
164        if let Some(node) = self.node_by_name(canon) {
165            return Some(addr_of(node));
166        }
167        for suffix in &self.cfg.search_domains {
168            if let Some(node) = self.node_by_name(&format!("{canon}.{suffix}")) {
169                return Some(addr_of(node));
170            }
171        }
172
173        // Control-pushed static records match the fully-qualified query name only.
174        self.cfg.extra_records.iter().find_map(|rec| {
175            let family_ok = matches!(
176                (rec.addr, want_v4),
177                (IpAddr::V4(_), true) | (IpAddr::V6(_), false)
178            );
179            (rec.name == canon && family_ok).then_some(rec.addr)
180        })
181    }
182
183    /// Find the node (peer or self) that owns the tailnet IP `ip`.
184    fn node_by_ip(&self, ip: IpAddr) -> Option<Node> {
185        if let Some(node) = self
186            .peers
187            .as_ref()
188            .and_then(|p| p.get(&ip).map(|(_, n)| n.clone()))
189        {
190            return Some(node);
191        }
192
193        self.self_node
194            .as_ref()
195            .filter(|n| {
196                IpAddr::from(n.tailnet_address.ipv4.addr()) == ip
197                    || IpAddr::from(n.tailnet_address.ipv6.addr()) == ip
198            })
199            .cloned()
200    }
201
202    /// Decide how to resolve a non-overlay `name` against the split-DNS routes and recursive
203    /// resolvers, returning the upstreams to forward to.
204    ///
205    /// Longest-suffix wins among [`DnsConfig::routes`]: a route's suffix matches `name` if `name`
206    /// equals it or ends with `.suffix`. A matched route with a non-empty upstream list forwards
207    /// there; a matched route with an **empty** list is a negative route ([`Upstreams::Block`] =>
208    /// NXDOMAIN). With no route match, [`DnsConfig::fallback_resolvers`] (preferred) or
209    /// [`DnsConfig::resolvers`] resolve recursively; if neither is configured we stay fail-closed
210    /// ([`Upstreams::None`] => NXDOMAIN).
211    fn route_for(&self, name: &str) -> Upstreams<'_> {
212        let mut best: Option<(&str, &Vec<DnsResolver>)> = None;
213        for (suffix, upstreams) in &self.cfg.routes {
214            if suffix_matches(name, suffix) && best.is_none_or(|(b, _)| suffix.len() > b.len()) {
215                best = Some((suffix.as_str(), upstreams));
216            }
217        }
218
219        if let Some((_, upstreams)) = best {
220            return if upstreams.is_empty() {
221                Upstreams::Block
222            } else {
223                // A deliberately-configured split-DNS route: not eligible for exit-node DoH
224                // delegation — these upstreams (e.g. an internal resolver reachable over a subnet
225                // route) must keep receiving the query directly.
226                Upstreams::Route(upstreams)
227            };
228        }
229
230        if !self.cfg.fallback_resolvers.is_empty() {
231            return Upstreams::Recursive(&self.cfg.fallback_resolvers);
232        }
233        if !self.cfg.resolvers.is_empty() {
234            return Upstreams::Recursive(&self.cfg.resolvers);
235        }
236        Upstreams::None
237    }
238}
239
240/// The upstreams a non-overlay query should be forwarded to (or why it should not be forwarded).
241enum Upstreams<'a> {
242    /// A split-DNS route matched: forward to these route-specific upstreams (never DoH-delegated).
243    Route(&'a [DnsResolver]),
244    /// No route matched: forward to these recursive (fallback/global) resolvers. Eligible for
245    /// exit-node DoH delegation in the client serve loop.
246    Recursive(&'a [DnsResolver]),
247    /// A negative split-DNS route matched: do not resolve (NXDOMAIN).
248    Block,
249    /// No route and no resolver configured: fail closed (NXDOMAIN).
250    None,
251}
252
253/// What the (sync) decision step concluded for a query: either a complete response to send back,
254/// or a request to forward the original query to an upstream resolver.
255pub(crate) enum Decision {
256    /// A fully-formed response is ready to send.
257    Reply(Vec<u8>),
258    /// Forward the original query datagram to one of these upstream UDP resolvers; on success
259    /// relay the upstream answer, on failure/timeout answer NXDOMAIN with the given id+question.
260    Forward {
261        /// UDP upstreams to try, in order.
262        upstreams: Vec<SocketAddr>,
263        /// The original query bytes to forward verbatim.
264        query: Vec<u8>,
265        /// Fallback NXDOMAIN response if every upstream fails.
266        nxdomain: Vec<u8>,
267        /// Whether this is a *recursive* (catch-all fallback/global resolver) forward, as opposed
268        /// to a deliberately-configured split-DNS route. Only recursive forwards are eligible for
269        /// exit-node DoH delegation in the client serve loop (see [`DnsView::exit_doh`]); split-DNS
270        /// routes always stay on their configured upstreams (typically subnet-reachable internal
271        /// resolvers). The peerAPI DoH *server* ignores this flag entirely.
272        recursive: bool,
273    },
274}
275
276/// Whether `name` is `suffix` or sits under it at a label boundary: `"a.corp"` matches `"corp"`,
277/// `"acorp"` does not. An **empty** suffix never matches (defense-in-depth: an empty suffix would
278/// otherwise make `ends_with("")` match every name and either over-route or treat everything as a
279/// tailnet name — both leak-prone).
280fn suffix_matches(name: &str, suffix: &str) -> bool {
281    if suffix.is_empty() {
282        return false;
283    }
284    name == suffix
285        || (name.len() > suffix.len()
286            && name.ends_with(suffix)
287            && name.as_bytes()[name.len() - suffix.len() - 1] == b'.')
288}
289
290/// Returns `true` if `name` falls under one of the tailnet search domains. Such names are
291/// authoritative MagicDNS names and are NEVER forwarded to an upstream resolver — anti-leak: a
292/// tailnet name (and the fact that it was queried) must not escape to a third-party resolver.
293fn is_tailnet_name(view: &DnsView, name: &str) -> bool {
294    view.cfg
295        .search_domains
296        .iter()
297        .any(|suffix| suffix_matches(name, suffix))
298}
299
300/// Whether `name` is an IPv6 reverse-DNS (`PTR`) name (ends in `ip6.arpa`). This fork is IPv4-only
301/// on the tailnet; an IPv6 reverse lookup must NEVER be forwarded to a third-party resolver
302/// (anti-leak: it would reveal that a tailnet v6 address — e.g. a ULA `fd7a:…` — was probed). All
303/// such queries fail closed to NXDOMAIN.
304fn is_ip6_arpa(name: &str) -> bool {
305    suffix_matches(name, "ip6.arpa")
306}
307
308/// Whether `ip` is in the Tailscale CGNAT range `100.64.0.0/10` (RFC 6598, the tailnet IPv4 space).
309/// Reverse (`PTR`) queries for these addresses are authoritative to MagicDNS: if no peer owns the
310/// IP we fail closed to NXDOMAIN rather than forwarding the probe to a third-party resolver.
311fn is_tailnet_cgnat(ip: Ipv4Addr) -> bool {
312    let o = ip.octets();
313    o[0] == 100 && (64..=127).contains(&o[1])
314}
315
316/// Decide what to do with a single DNS query against `view`: either a complete response is ready
317/// ([`Decision::Reply`]), the query should be forwarded to upstream resolvers
318/// ([`Decision::Forward`]), or the packet should be dropped without answering (`None`).
319///
320/// Pure (no I/O), factored out of the socket loop so it can be unit-tested without a netstack. It
321/// never panics and fails closed: an unknown, unroutable, or tailnet-suffix name resolves to
322/// NXDOMAIN rather than leaking to an upstream resolver.
323pub(crate) fn decide(view: &DnsView, buf: &[u8]) -> Option<Decision> {
324    // Malformed / non-query input is dropped: we never answer something we can't parse.
325    let query = decode_query(buf).ok()?;
326    let q = &query.question;
327    let id = query.id;
328
329    let reply = |rcode, answers: &[RData]| Decision::Reply(encode_response(id, q, rcode, answers));
330
331    // Fail closed: MagicDNS off, or the node doesn't accept the tailnet's DNS config
332    // (`--accept-dns` / `CorpDNS` is false) => serve nothing. The `accept_dns` gate mirrors Go
333    // applying an empty `dns.Config` when `CorpDNS` is off: the node ignores the control-pushed DNS
334    // config and refuses every query. This one read site covers the netstack responder, the peerAPI
335    // DoH server that shares the view, and (via `tun_actor::plan_intercept`) the TUN query path.
336    if !view.cfg.magic_dns || !view.accept_dns {
337        return Some(reply(Rcode::Refused, &[]));
338    }
339
340    let canon = q.name.to_canon();
341
342    // We only serve the internet (IN) class authoritatively. A non-IN class (CHAOS, HESIOD, the
343    // ANY/255 class, ...) is NOT refused outright: Go's local resolver does no class check and
344    // forwards such a query like any other name. Treat it as an unsupported authoritative type —
345    // NODATA for a tailnet name, forward for an off-tailnet name — so a `CH TXT version.bind`
346    // diagnostic or a `qclass=ANY` probe reaches upstream instead of getting REFUSED.
347    const CLASS_IN: u16 = 1;
348    if q.qclass != CLASS_IN {
349        return Some(forward_or_nodata(view, &canon, buf, id, q));
350    }
351
352    Some(match &q.qtype {
353        QType::A => match view.resolve_addr(&canon, true) {
354            Some(IpAddr::V4(v4)) => reply(Rcode::NoError, &[RData::A(v4.octets())]),
355            // No overlay/extra-record answer: try split-DNS / recursive upstreams.
356            _ => forward_or_nxdomain(view, &canon, buf, id, q),
357        },
358        QType::Aaaa => match view.resolve_addr(&canon, false) {
359            // A tailnet/overlay/self (or extra-record) AAAA match. Gate on IPv6: with IPv6 OFF
360            // (default) the client is IPv4-only, so answering with the overlay v6 address would
361            // only hand out an unroutable address — dead connections plus a fingerprint. Return
362            // NoError with an empty answer (NODATA) instead. With the gate ON, answer from overlay
363            // data as historically. We never forward this name to a recursive upstream either way:
364            // a positive overlay match is authoritative.
365            Some(IpAddr::V6(v6)) if view.enable_ipv6 => {
366                reply(Rcode::NoError, &[RData::Aaaa(v6.octets())])
367            }
368            Some(IpAddr::V6(_)) => reply(Rcode::NoError, &[]),
369            // No overlay/extra-record answer: split-DNS / recursive upstreams (off-tailnet names);
370            // tailnet names fail closed to NXDOMAIN inside `forward_or_nxdomain`.
371            _ => forward_or_nxdomain(view, &canon, buf, id, q),
372        },
373        QType::Ptr => match q.name.ptr_to_ipv4() {
374            Some(octets) => {
375                let v4: Ipv4Addr = octets.into();
376                let ip = IpAddr::V4(v4);
377                match view.node_by_ip(ip) {
378                    Some(node) => {
379                        let fqdn = node.fqdn(false);
380                        let labels: Vec<String> = fqdn.split('.').map(str::to_owned).collect();
381                        reply(Rcode::NoError, &[RData::Ptr(Name(labels))])
382                    }
383                    // Anti-leak: a reverse query for an IP in the tailnet CGNAT range
384                    // (100.64.0.0/10) that misses the peer set is authoritative-but-unknown; fail
385                    // closed to NXDOMAIN rather than leaking the probed tailnet IP upstream. Only
386                    // genuinely off-tailnet reverse queries are forwarded.
387                    None if is_tailnet_cgnat(v4) => reply(Rcode::NxDomain, &[]),
388                    None => forward_or_nxdomain(view, &canon, buf, id, q),
389                }
390            }
391            // Anti-leak / IPv4-only-tailnet: an IPv6 reverse (`ip6.arpa`) PTR must never be
392            // forwarded — relaying it would reveal that a tailnet v6 address (e.g. a ULA `fd7a:…`)
393            // was probed. Fail closed to NXDOMAIN, exactly like the IPv4 CGNAT guard above.
394            None if is_ip6_arpa(&canon) => reply(Rcode::NxDomain, &[]),
395            None => forward_or_nxdomain(view, &canon, buf, id, q),
396        },
397        // Anything else (TXT, SRV, MX, HTTPS/SVCB, CNAME, ...): we hold no authoritative record of
398        // that type, so — like Go's resolver — forward it to upstream for an off-tailnet name and
399        // return NODATA (empty NOERROR) for a tailnet-authoritative name. NOT REFUSED: a stub reads
400        // REFUSED as "this server won't serve me" and abandons the resolver, which would break
401        // ordinary client lookups (notably HTTPS/SVCB type 65, issued routinely by browsers for
402        // HTTP/3 + ECH) for the same off-tailnet names whose A/AAAA already forward.
403        QType::Other(_) => forward_or_nodata(view, &canon, buf, id, q),
404    })
405}
406
407/// For a name with no overlay answer, consult the split-DNS routes + recursive resolvers and
408/// either forward (to UDP upstreams) or fail closed with NXDOMAIN.
409///
410/// Anti-leak: a name under a tailnet search domain is authoritative and is never forwarded — it
411/// fails closed to NXDOMAIN so neither the name nor the query leaks to a third-party resolver.
412fn forward_or_nxdomain(
413    view: &DnsView,
414    canon: &str,
415    buf: &[u8],
416    id: u16,
417    q: &ts_dns_wire::Question,
418) -> Decision {
419    let nxdomain = encode_response(id, q, Rcode::NxDomain, &[]);
420
421    if is_tailnet_name(view, canon) {
422        return Decision::Reply(nxdomain);
423    }
424
425    let (resolvers, recursive) = match view.route_for(canon) {
426        Upstreams::Route(resolvers) => (resolvers, false),
427        Upstreams::Recursive(resolvers) => (resolvers, true),
428        // Negative route or nothing configured: fail closed.
429        Upstreams::Block | Upstreams::None => return Decision::Reply(nxdomain),
430    };
431
432    let upstreams: Vec<SocketAddr> = resolvers
433        .iter()
434        .map(DnsResolver::udp_addr)
435        // Anti-leak / IPv6-off: only forward over IPv4 upstreams; never open a v6 socket.
436        .filter(SocketAddr::is_ipv4)
437        .collect();
438    if upstreams.is_empty() {
439        Decision::Reply(nxdomain)
440    } else {
441        Decision::Forward {
442            upstreams,
443            query: buf.to_vec(),
444            nxdomain,
445            recursive,
446        }
447    }
448}
449
450/// For a query whose *qtype/qclass* we don't serve authoritatively (anything other than an IN-class
451/// A/AAAA/PTR — e.g. TXT, SRV, MX, HTTPS/SVCB, or a CHAOS-class query): forward it to upstream like
452/// any other name, but for a tailnet-authoritative name return an empty NOERROR (NODATA) instead of
453/// NXDOMAIN.
454///
455/// This mirrors Go's resolver: an authoritative name with no record of the requested type returns
456/// `RCodeSuccess` with no answers ("the name exists, but no records of that type"), NOT NXDOMAIN and
457/// NOT REFUSED; a non-authoritative name is forwarded verbatim regardless of qtype. The fork
458/// previously REFUSED every non-A/AAAA/PTR qtype (and every non-IN class) for *all* names, which a
459/// stub resolver reads as "this server won't serve me" — so it would abandon the resolver, breaking
460/// ordinary client lookups (HTTPS/SVCB type 65 issued routinely by browsers for HTTP/3 + ECH, plus
461/// MX/TXT/SRV) for off-tailnet names that A/AAAA queries already forward. Refusing these was never an
462/// anti-leak measure (the same name's A/AAAA already egresses); it was just broken interop.
463///
464/// Anti-leak is preserved: a tailnet-suffix name still never leaves this node (NODATA, not forward),
465/// exactly as the A/AAAA path keeps a positive overlay match authoritative.
466fn forward_or_nodata(
467    view: &DnsView,
468    canon: &str,
469    buf: &[u8],
470    id: u16,
471    q: &ts_dns_wire::Question,
472) -> Decision {
473    // Authoritative tailnet name: NODATA (empty NOERROR), not NXDOMAIN — the name exists.
474    if is_tailnet_name(view, canon) {
475        return Decision::Reply(encode_response(id, q, Rcode::NoError, &[]));
476    }
477    // Anti-leak parity with the `QType::Ptr` arm: a reverse query for a tailnet CGNAT IPv4
478    // (100.64.0.0/10) or ANY `ip6.arpa` name must NEVER egress to an upstream resolver, regardless
479    // of qtype/class — forwarding it would reveal that a specific tailnet IP was probed. The PTR arm
480    // enforces this (NXDOMAIN) but its guards live only inside that arm; without re-checking here, an
481    // exotic-qtype (TXT/ANY/…) or non-IN-class query for a tailnet reverse name would slip through to
482    // the forward path below. Fail closed to NXDOMAIN, matching the PTR arm's disposition.
483    if is_ip6_arpa(canon) {
484        return Decision::Reply(encode_response(id, q, Rcode::NxDomain, &[]));
485    }
486    if let Some(octets) = q.name.ptr_to_ipv4()
487        && is_tailnet_cgnat(octets.into())
488    {
489        return Decision::Reply(encode_response(id, q, Rcode::NxDomain, &[]));
490    }
491    // Off-tailnet, non-reverse-zone: forward verbatim. `forward_or_nxdomain` already forwards
492    // non-tailnet names and fails closed (NXDOMAIN) when no upstream is configured/routable; reuse it
493    // (the tailnet branch above is already handled, so its tailnet→NXDOMAIN path is unreachable here).
494    forward_or_nxdomain(view, canon, buf, id, q)
495}
496
497/// Client-side plan for a *recursive* forward: keep resolving over local UDP upstreams, or delegate
498/// the query to the active exit node's peerAPI DoH endpoint over the overlay.
499#[derive(Debug, PartialEq, Eq)]
500pub(crate) enum RecursivePlan {
501    /// Forward over UDP to these upstreams. Used when no exit node is active, or when the config
502    /// has `use_with_exit_node` resolvers (kept local even with an exit node selected).
503    Udp(Vec<SocketAddr>),
504    /// Delegate the query to the exit node's peerAPI DoH server at this overlay address.
505    Doh(SocketAddr),
506}
507
508/// Decide whether a recursive forward should stay on local UDP upstreams or be delegated to the
509/// active exit node's DoH endpoint. Pure (no I/O) so the delegation rule is unit-testable.
510///
511/// - No active exit node ([`DnsView::exit_doh`] is `None`) => keep `default_upstreams` (UDP).
512/// - Exit node active, but the config has [`use_with_exit_node`][ts_control::DnsResolver::use_with_exit_node]
513///   resolvers => those resolvers stay local (Go keeps `UseWithExitNode` resolvers when an exit node
514///   is selected); forward to them over UDP, do NOT delegate.
515/// - Exit node active, no kept-local resolvers => delegate to the exit node's DoH. Recursive DNS
516///   then egresses from the exit node, not this host (the whole point of routing through an exit
517///   node: this node's real IP is never used to resolve the peer's public names).
518pub(crate) fn recursive_plan(view: &DnsView, default_upstreams: Vec<SocketAddr>) -> RecursivePlan {
519    let Some(doh) = view.exit_doh else {
520        return RecursivePlan::Udp(default_upstreams);
521    };
522    let kept: Vec<SocketAddr> = view
523        .cfg
524        .resolvers_with_exit_node()
525        .map(DnsResolver::udp_addr)
526        // Anti-leak / IPv6-off: only ever resolve over IPv4 upstreams; never open a v6 socket.
527        .filter(SocketAddr::is_ipv4)
528        .collect();
529    if kept.is_empty() {
530        RecursivePlan::Doh(doh)
531    } else {
532        RecursivePlan::Udp(kept)
533    }
534}
535
536/// Cap a forwarded upstream response to a single UDP datagram ([`MAX_UPSTREAM_RESPONSE`]). When the
537/// response is too large it is truncated mid-message, so we set the `TC` (truncation) flag in the
538/// DNS header (byte 2, bit `0x02`) telling the stub resolver to retry over TCP — relaying a chopped
539/// answer without `TC` would surface a malformed-but-"complete" message. The flag is only set when
540/// truncation actually occurs.
541fn cap_response(mut resp: Vec<u8>) -> Vec<u8> {
542    if resp.len() > MAX_UPSTREAM_RESPONSE {
543        resp.truncate(MAX_UPSTREAM_RESPONSE);
544        // The header is 12 bytes; the TC bit lives in the second flags byte (header byte 2). A
545        // capped datagram is always >= the header length, but guard anyway to never panic.
546        if let Some(flags_hi) = resp.get_mut(2) {
547            *flags_hi |= 0x02;
548        }
549    }
550    resp
551}
552
553/// The byte length of a fixed DNS header.
554const DNS_HEADER_LEN: usize = 12;
555
556/// Return the byte range of the first question section (QNAME + QTYPE + QCLASS) within `msg`,
557/// starting just after the 12-byte header. Returns [`None`] if the name is malformed, uses a
558/// compression pointer (illegal in a question), or runs past the buffer. Used to byte-compare a
559/// forwarded query's question against the upstream response's question.
560fn question_range(msg: &[u8]) -> Option<std::ops::Range<usize>> {
561    let mut off = DNS_HEADER_LEN;
562    // Walk the QNAME label sequence to the terminating root label (0x00).
563    loop {
564        let len = *msg.get(off)? as usize;
565        // A compression pointer (top two bits set) is not valid in a question section.
566        if len & 0xC0 != 0 {
567            return None;
568        }
569        off += 1;
570        if len == 0 {
571            break; // root label: QNAME complete.
572        }
573        off = off.checked_add(len)?;
574        if off > msg.len() {
575            return None;
576        }
577    }
578    // QTYPE (2) + QCLASS (2) follow the name.
579    let end = off.checked_add(4)?;
580    if end > msg.len() {
581        return None;
582    }
583    Some(DNS_HEADER_LEN..end)
584}
585
586/// Whether `resp` is a plausible DNS response to `query`: same 16-bit transaction id, the QR
587/// (response) bit set, and a byte-identical question section (QNAME + QTYPE + QCLASS). Both buffers
588/// carry the DNS header in the first 12 bytes (id at [0..2], flags at [2..4], QR is the high bit of
589/// byte 2). Used to reject off-path/forged datagrams before relaying them back to the stub resolver
590/// as authoritative: matching only the id + QR lets an injector that guesses the id swap in an
591/// answer for a different question, so we also require the echoed question to match.
592fn response_matches_query(query: &[u8], resp: &[u8]) -> bool {
593    if query.len() < DNS_HEADER_LEN || resp.len() < DNS_HEADER_LEN {
594        return false;
595    }
596    let id_matches = query[0..2] == resp[0..2];
597    let is_response = resp[2] & 0x80 != 0;
598    if !id_matches || !is_response {
599        return false;
600    }
601    // The response must echo the exact question we asked. Parse both question sections and compare
602    // their bytes; a parse failure on either side is treated as a non-match (fail closed).
603    match (question_range(query), question_range(resp)) {
604        (Some(q), Some(r)) => query[q] == resp[r],
605        _ => false,
606    }
607}
608
609/// Forward `query` to each upstream in order over the **overlay** netstack, returning the first
610/// well-formed response, or `nxdomain` if every upstream times out or errors.
611///
612/// Anti-leak: forwarding goes through the overlay netstack `channel` (a fresh `0.0.0.0:0` overlay
613/// UDP socket per query), NEVER a host socket — so the real origin IP can't leak to the resolver,
614/// and split-DNS upstreams reachable only over the tailnet/subnet-router work. Each upstream is
615/// bounded by [`UPSTREAM_TIMEOUT`]; responses are capped at [`MAX_UPSTREAM_RESPONSE`].
616pub(crate) async fn forward_query(
617    channel: &Channel,
618    upstreams: &[SocketAddr],
619    query: &[u8],
620    nxdomain: Vec<u8>,
621) -> Vec<u8> {
622    for upstream in upstreams {
623        let socket = match channel
624            .udp_bind(SocketAddr::from((Ipv4Addr::UNSPECIFIED, 0)))
625            .await
626        {
627            Ok(s) => s,
628            Err(e) => {
629                tracing::warn!(error = %e, %upstream, "magic dns upstream bind failed");
630                continue;
631            }
632        };
633
634        if let Err(e) = socket.send_to(*upstream, query).await {
635            tracing::warn!(error = %e, %upstream, "magic dns upstream send failed");
636            continue;
637        }
638
639        match timeout(UPSTREAM_TIMEOUT, socket.recv_from_bytes()).await {
640            Ok(Ok((from, resp))) if !resp.is_empty() => {
641                // Anti-poisoning: only accept a datagram that came from the upstream we queried
642                // and whose DNS header matches this query (same transaction id, QR=response bit
643                // set). An off-path injector racing the real answer is otherwise relayed straight
644                // back to the stub resolver as authoritative.
645                if from.ip() != upstream.ip() || !response_matches_query(query, &resp) {
646                    tracing::debug!(%upstream, %from, "magic dns dropping unsolicited/mismatched response");
647                    continue;
648                }
649                return cap_response(resp.to_vec());
650            }
651            Ok(Ok(_)) => continue,
652            Ok(Err(e)) => {
653                tracing::warn!(error = %e, %upstream, "magic dns upstream recv failed");
654                continue;
655            }
656            Err(_) => {
657                tracing::debug!(%upstream, "magic dns upstream timed out");
658                continue;
659            }
660        }
661    }
662    nxdomain
663}
664
665/// Run the receive/answer loop for the bound socket until it (or the netstack) goes away.
666///
667/// Authoritative answers are sent inline. Forwarded queries are handled on spawned tasks (each
668/// cloning the overlay `channel`) so a slow upstream never blocks other queries.
669async fn serve(
670    socket: netstack::netsock::UdpSocket,
671    rx: watch::Receiver<Arc<DnsView>>,
672    channel: Channel,
673) {
674    let socket = Arc::new(socket);
675    let mut forwards = JoinSet::new();
676    // Bounds concurrent in-flight forwards (see `MAX_INFLIGHT_FORWARDS`); a permit is held for the
677    // lifetime of each spawned forward task and released on completion.
678    let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
679    loop {
680        let (src, buf) = match socket.recv_from_bytes().await {
681            Ok(pkt) => pkt,
682            Err(e) => {
683                tracing::warn!(error = %e, "magic dns socket recv failed, stopping responder");
684                return;
685            }
686        };
687
688        // Read the freshest view per packet.
689        let view = rx.borrow().clone();
690
691        match decide(&view, &buf) {
692            // Malformed query: drop silently.
693            None => continue,
694            Some(Decision::Reply(resp)) => {
695                if let Err(e) = socket.send_to(src, &resp).await {
696                    tracing::warn!(error = %e, %src, "magic dns response send failed");
697                }
698            }
699            Some(Decision::Forward {
700                upstreams,
701                query,
702                nxdomain,
703                recursive,
704            }) => {
705                // A recursive forward is eligible for exit-node DoH delegation; a split-DNS route
706                // always stays on its configured upstreams. Decide the plan against the current
707                // view so a query routed while an exit node is active egresses from that exit node.
708                let plan = if recursive {
709                    recursive_plan(&view, upstreams)
710                } else {
711                    RecursivePlan::Udp(upstreams)
712                };
713                // Fail closed at the in-flight cap: drop the query (the stub resolver retries or
714                // times out) rather than spawn an unbounded task that pins an overlay socket for up
715                // to UPSTREAM_TIMEOUT. The permit is moved into the task as a named `_permit` binding
716                // (NOT `let _ =`, which would drop it immediately) so it is released only when the
717                // task body completes.
718                let Ok(permit) = inflight.clone().try_acquire_owned() else {
719                    tracing::warn!(
720                        %src,
721                        max = MAX_INFLIGHT_FORWARDS,
722                        "magic dns drop: at max in-flight forwarded queries"
723                    );
724                    continue;
725                };
726                let socket = socket.clone();
727                let channel = channel.clone();
728                forwards.spawn(async move {
729                    let _permit = permit;
730                    let resp = match plan {
731                        RecursivePlan::Udp(upstreams) => {
732                            forward_query(&channel, &upstreams, &query, nxdomain).await
733                        }
734                        RecursivePlan::Doh(doh_addr) => {
735                            crate::peerapi_doh::forward_doh(&channel, doh_addr, &query, nxdomain)
736                                .await
737                        }
738                    };
739                    if let Err(e) = socket.send_to(src, &resp).await {
740                        tracing::warn!(error = %e, %src, "magic dns forwarded response send failed");
741                    }
742                });
743            }
744        }
745
746        // Reap finished forward tasks without blocking. The unreaped completed-handle backlog is
747        // bounded by MAX_INFLIGHT_FORWARDS (a task spawns only after acquiring a permit, and there
748        // are at most that many), so this bounds JoinSet memory too — not just the reap cadence.
749        while forwards.try_join_next().is_some() {}
750    }
751}
752
753/// The MagicDNS responder actor.
754///
755/// Subscribes to control state (for the DNS config + self node) and peer state (for the peer
756/// database), keeping a [`DnsView`] that the spawned answer loop reads for every query.
757pub struct MagicDnsActor {
758    /// Keeps the socket-serving task alive for the lifetime of the actor.
759    _joinset: JoinSet<()>,
760    /// The latest view, shared with the answer loop.
761    view_tx: watch::Sender<Arc<DnsView>>,
762    /// The runtime [`Env`], retained so each view rebuild (the `StateUpdate` / `PeerState` handlers)
763    /// can re-read the live [`Env::accept_dns`] cell. Unlike `enable_ipv6` (snapshotted once at
764    /// spawn), `accept_dns` is runtime-settable via `Device::set_accept_dns`, so it must be read at
765    /// rebuild time — not captured once — for a toggle to reach the served view.
766    env: Env,
767    /// The overlay channel, retained so the [`Query`] handler can run a query through the same
768    /// forward path the serve loop uses ([`forward_query`] / [`forward_doh`], both binding
769    /// `0.0.0.0:0` on this channel — never a host socket).
770    channel: Channel,
771}
772
773/// A programmatic DNS query routed through the live MagicDNS responder (the `100.100.100.100` path),
774/// for [`Device::query_dns`](crate::Device::query_dns). The handler synthesizes a query packet and
775/// drives it through the exact same [`decide`]/forward logic as an on-the-wire query, so the result
776/// (and its anti-leak posture) matches what a tailnet client would observe.
777pub struct Query {
778    /// The canonical name to resolve (e.g. `example.com`, no trailing dot).
779    pub name: String,
780    /// The DNS query type (`1`=A, `28`=AAAA, `12`=PTR, or any other RFC 1035 TYPE).
781    pub qtype: u16,
782}
783
784/// The outcome of a [`Query`]: the raw DNS response bytes, the RCODE, and which upstream resolvers
785/// (if any) were consulted. The response is returned as raw bytes (matching Go `LocalClient.QueryDNS`)
786/// rather than parsed records — this fork's wire codec has no answer-record decoder.
787#[derive(Debug, Clone, kameo::Reply)]
788pub struct DnsQueryResult {
789    /// The raw DNS response datagram (header + question + any answer records).
790    pub response: Vec<u8>,
791    /// The RCODE from the response header's low 4 bits (`0`=NoError, `2`=SERVFAIL, `3`=NXDOMAIN,
792    /// `5`=Refused, …).
793    pub rcode: u8,
794    /// The upstream resolver(s) the query was forwarded to. For a UDP forward this is the candidate
795    /// list tried in order (the forwarder returns on the first that answers); for an exit-node DoH
796    /// forward it is the single DoH endpoint. Empty for a locally-answered query (an authoritative
797    /// tailnet name, a NODATA, or a fail-closed NXDOMAIN — nothing egressed).
798    pub resolvers_consulted: Vec<SocketAddr>,
799}
800
801impl kameo::Actor for MagicDnsActor {
802    type Args = (Env, Channel);
803    type Error = Error;
804
805    async fn on_start(
806        (env, channel): Self::Args,
807        slf: ActorRef<Self>,
808    ) -> Result<Self, Self::Error> {
809        env.subscribe::<Arc<ts_control::StateUpdate>>(&slf).await?;
810        env.subscribe::<Arc<PeerState>>(&slf).await?;
811        env.subscribe::<crate::route_updater::ActiveExitNode>(&slf)
812            .await?;
813
814        // Seed the view with the runtime's IPv6 gate (default off) and the current accept-dns value.
815        // Subsequent control/peer updates clone-and-modify this view: `enable_ipv6` (set once here)
816        // is preserved, while `accept_dns` is re-read live from `Env` on every rebuild (it is
817        // runtime-settable). The seed value is moot — no query is served before the first
818        // StateUpdate — but seeding it keeps the pre-update view internally consistent.
819        let (view_tx, view_rx) = watch::channel(Arc::new(DnsView {
820            enable_ipv6: env.enable_ipv6,
821            accept_dns: env.accept_dns(),
822            ..DnsView::default()
823        }));
824
825        let mut joinset = JoinSet::new();
826
827        // Bind the MagicDNS socket. If the bind fails we still start (fail closed: the actor just
828        // never answers anything) so a transient bind error doesn't take down the runtime.
829        let addr = SocketAddr::from((MAGIC_DNS_IP, MAGIC_DNS_PORT));
830        match channel.udp_bind(addr).await {
831            Ok(socket) => {
832                tracing::debug!(%addr, "magic dns responder bound");
833                joinset.spawn(serve(socket, view_rx.clone(), channel.clone()));
834            }
835            Err(e) => {
836                tracing::error!(error = %e, %addr, "magic dns udp bind failed; responder inert");
837            }
838        }
839
840        // When this node advertises a peerAPI port, run the single peerAPI server on the same shared
841        // view. It routes `/dns-query` to the exit-node DoH handler (recursive resolution gated by
842        // `forward_exit_egress`, see `peerapi_doh`) and `/v0/put/<name>` to the Taildrop receive
843        // handler when a store is configured (access-gated, fail-closed, see `peerapi`).
844        if let Some(port) = env.peerapi_port {
845            let channel = channel.clone();
846            let view_rx = view_rx.clone();
847            let forward_exit_egress = env.forward_exit_egress;
848            let taildrop = env.taildrop_store.clone();
849            let funnel_ingress = env.funnel_ingress.clone();
850            joinset.spawn(crate::peerapi::serve(
851                channel,
852                port,
853                view_rx,
854                forward_exit_egress,
855                taildrop,
856                funnel_ingress,
857            ));
858        }
859
860        Ok(Self {
861            _joinset: joinset,
862            view_tx,
863            env,
864            channel,
865        })
866    }
867}
868
869/// A bare SERVFAIL response header for a [`Query`] whose name could not be encoded into a
870/// well-formed query (a non-ASCII label or an over-255-byte name). A 12-byte header with QR=1 (this
871/// is a response) and RCODE=2 (server failure); no question or answer section (we never produced a
872/// parseable question). Lets `query_dns` return a definite, honest RCODE instead of an empty buffer
873/// that would read back as a fabricated NoError.
874fn servfail_response() -> Vec<u8> {
875    let mut resp = vec![0u8; 12];
876    // Flags: QR=1 (byte 2, 0x80) + RCODE=2 (low nibble of byte 3). All other bits clear.
877    resp[2] = 0x80;
878    resp[3] = 0x02;
879    resp
880}
881
882impl Message<Query> for MagicDnsActor {
883    type Reply = DnsQueryResult;
884
885    async fn handle(&mut self, query: Query, _ctx: &mut Context<Self, Self::Reply>) -> Self::Reply {
886        // Synthesize a query packet and drive it through the SAME decide/forward path the serve loop
887        // uses, against the freshest view — so the result and its anti-leak posture exactly match an
888        // on-the-wire query. The id is fixed (0): a programmatic query has no concurrent-demux need,
889        // and `response_matches_query` validates the echoed id against this same buffer.
890        //
891        // Normalize the name into labels: strip a single trailing dot (an FQDN's root marker — Go's
892        // `dnsname.ToFQDN` does the same) and drop empty labels. An empty label would otherwise encode
893        // as a lone `0x00`, identical to the QNAME root terminator, truncating the wire query and
894        // corrupting the QTYPE/QCLASS that follow.
895        let trimmed = query.name.strip_suffix('.').unwrap_or(&query.name);
896        let labels: Vec<String> = trimmed
897            .split('.')
898            .filter(|label| !label.is_empty())
899            .map(str::to_owned)
900            .collect();
901        let qtype = match query.qtype {
902            1 => ts_dns_wire::QType::A,
903            28 => ts_dns_wire::QType::Aaaa,
904            12 => ts_dns_wire::QType::Ptr,
905            other => ts_dns_wire::QType::Other(other),
906        };
907        // Class IN (1) — the only class the responder serves authoritatively (a non-IN class still
908        // forwards via `forward_or_nodata`, matching the on-the-wire path).
909        let buf = ts_dns_wire::encode_query(0, &ts_dns_wire::Name(labels), &qtype, 1);
910
911        let view = self.view_tx.borrow().clone();
912
913        let (response, resolvers_consulted) = match decide(&view, &buf) {
914            // `decide` returns `None` only when `decode_query` rejects the buffer we just built. With
915            // the name normalized above that can still happen for a name `encode_query` accepts but
916            // `decode_query` rejects — a non-ASCII/IDN label (the caller must pass punycode) or a name
917            // whose wire form exceeds 255 bytes. Surface a SERVFAIL (RCODE 2: "could not process")
918            // rather than an empty buffer that would read back as a fabricated NoError. The serve loop
919            // silently drops here (the on-wire client times out); a programmatic caller gets a
920            // definite, honest error instead.
921            None => (servfail_response(), Vec::new()),
922            Some(Decision::Reply(resp)) => (resp, Vec::new()),
923            Some(Decision::Forward {
924                upstreams,
925                query,
926                nxdomain,
927                recursive,
928            }) => {
929                let plan = if recursive {
930                    recursive_plan(&view, upstreams)
931                } else {
932                    RecursivePlan::Udp(upstreams)
933                };
934                match plan {
935                    RecursivePlan::Udp(upstreams) => {
936                        let resp = forward_query(&self.channel, &upstreams, &query, nxdomain).await;
937                        (resp, upstreams)
938                    }
939                    RecursivePlan::Doh(doh_addr) => {
940                        let resp = crate::peerapi_doh::forward_doh(
941                            &self.channel,
942                            doh_addr,
943                            &query,
944                            nxdomain,
945                        )
946                        .await;
947                        // The query egressed via the exit node's DoH endpoint, not a local UDP
948                        // upstream — report the DoH address as the resolver consulted.
949                        (resp, vec![doh_addr])
950                    }
951                }
952            }
953        };
954
955        // RCODE is the low 4 bits of the second flags byte (header byte 3).
956        let rcode = response.get(3).map(|b| b & 0x0F).unwrap_or(0);
957
958        DnsQueryResult {
959            response,
960            rcode,
961            resolvers_consulted,
962        }
963    }
964}
965
966impl Message<Arc<ts_control::StateUpdate>> for MagicDnsActor {
967    type Reply = ();
968
969    async fn handle(
970        &mut self,
971        update: Arc<ts_control::StateUpdate>,
972        _ctx: &mut Context<Self, Self::Reply>,
973    ) {
974        // Re-read the live accept-dns cell on every rebuild (it is runtime-settable via
975        // `Device::set_accept_dns`); `enable_ipv6` is preserved from the seed (set once at spawn).
976        let accept_dns = self.env.accept_dns();
977        self.view_tx.send_modify(|view| {
978            let mut next = (**view).clone();
979            next.cfg = update.dns_config.clone().unwrap_or_default();
980            next.self_node = update.node.clone();
981            next.accept_dns = accept_dns;
982            *view = Arc::new(next);
983        });
984    }
985}
986
987impl Message<Arc<PeerState>> for MagicDnsActor {
988    type Reply = ();
989
990    async fn handle(&mut self, state: Arc<PeerState>, _ctx: &mut Context<Self, Self::Reply>) {
991        // Re-read the live accept-dns cell on every rebuild: `Device::set_accept_dns` triggers a
992        // `RepublishState` that lands here, so this is the path that re-applies the gate after a
993        // runtime toggle (covers the netstack responder AND the peerAPI DoH server sharing the view).
994        let accept_dns = self.env.accept_dns();
995        self.view_tx.send_modify(|view| {
996            let mut next = (**view).clone();
997            next.peers = Some(state.peers.clone());
998            next.accept_dns = accept_dns;
999            *view = Arc::new(next);
1000        });
1001    }
1002}
1003
1004impl Message<crate::route_updater::ActiveExitNode> for MagicDnsActor {
1005    type Reply = ();
1006
1007    async fn handle(
1008        &mut self,
1009        active: crate::route_updater::ActiveExitNode,
1010        _ctx: &mut Context<Self, Self::Reply>,
1011    ) {
1012        // Cache the active exit node's DoH endpoint so the serve loop delegates recursive queries
1013        // to it. `None` (no exit node, or one that can't proxy DNS) keeps recursion local. Resolving
1014        // the address here — once, from the route updater's authoritative selection — means the
1015        // serve loop never re-resolves the selector.
1016        let exit_doh = active.node.as_ref().and_then(|n| n.peerapi_doh_addr());
1017        self.view_tx.send_modify(|view| {
1018            let mut next = (**view).clone();
1019            next.exit_doh = exit_doh;
1020            *view = Arc::new(next);
1021        });
1022    }
1023}
1024
1025#[cfg(test)]
1026mod tests {
1027    use ts_control::{StableNodeId, TailnetAddress};
1028
1029    use super::*;
1030
1031    /// Test wrapper: run [`decide`] and extract the reply bytes. These tests configure no
1032    /// upstream resolvers, so an unresolved name fails closed to a `Reply` (NXDOMAIN), never a
1033    /// `Forward`; a `Forward` here is a bug and panics.
1034    fn answer(view: &DnsView, buf: &[u8]) -> Option<Vec<u8>> {
1035        match decide(view, buf)? {
1036            Decision::Reply(resp) => Some(resp),
1037            Decision::Forward { .. } => panic!("unexpected forward in authoritative-only test"),
1038        }
1039    }
1040
1041    /// Build a `Node` named `host.user.ts.net` with a known v4/v6 tailnet address.
1042    fn test_node() -> Node {
1043        Node {
1044            id: 1,
1045            stable_id: StableNodeId("n1".to_string()),
1046            hostname: "host".to_string(),
1047            user_id: 0,
1048            tailnet: Some("user.ts.net".to_string()),
1049            tags: vec![],
1050            tailnet_address: TailnetAddress {
1051                ipv4: "100.64.0.1/32".parse().unwrap(),
1052                ipv6: "fd7a::1/128".parse().unwrap(),
1053            },
1054            node_key: [0u8; 32].into(),
1055            node_key_expiry: None,
1056            online: None,
1057            last_seen: None,
1058            key_signature: vec![],
1059            machine_key: None,
1060            disco_key: None,
1061            accepted_routes: vec![],
1062            underlay_addresses: vec![],
1063            derp_region: None,
1064            cap: Default::default(),
1065            cap_map: Default::default(),
1066            peerapi_port: None,
1067            peerapi_dns_proxy: false,
1068            is_wireguard_only: false,
1069            exit_node_dns_resolvers: vec![],
1070            peer_relay: false,
1071            service_vips: Default::default(),
1072        }
1073    }
1074
1075    /// A view with MagicDNS on and a single peer in the db.
1076    fn view_with_peer() -> DnsView {
1077        let mut db = PeerDb::default();
1078        db.upsert(&test_node());
1079
1080        DnsView {
1081            cfg: DnsConfig {
1082                magic_dns: true,
1083                search_domains: vec!["user.ts.net".to_string()],
1084                ..Default::default()
1085            },
1086            peers: Some(Arc::new(db)),
1087            self_node: None,
1088            exit_doh: None,
1089            enable_ipv6: false,
1090            accept_dns: true,
1091        }
1092    }
1093
1094    /// Build a raw DNS query buffer for `labels` with the given id, qtype, qclass.
1095    fn build_query(id: u16, labels: &[&str], qtype: u16, qclass: u16) -> Vec<u8> {
1096        let mut buf: Vec<u8> = Vec::new();
1097        buf.extend_from_slice(&id.to_be_bytes());
1098        buf.extend_from_slice(&0u16.to_be_bytes()); // flags: QR=0 (query)
1099        buf.extend_from_slice(&1u16.to_be_bytes()); // QDCOUNT
1100        buf.extend_from_slice(&0u16.to_be_bytes()); // ANCOUNT
1101        buf.extend_from_slice(&0u16.to_be_bytes()); // NSCOUNT
1102        buf.extend_from_slice(&0u16.to_be_bytes()); // ARCOUNT
1103        for label in labels {
1104            buf.push(label.len() as u8);
1105            buf.extend_from_slice(label.as_bytes());
1106        }
1107        buf.push(0); // root label
1108        buf.extend_from_slice(&qtype.to_be_bytes());
1109        buf.extend_from_slice(&qclass.to_be_bytes());
1110        buf
1111    }
1112
1113    /// Parse a response header: returns `(id, rcode, ancount)`.
1114    fn parse_header(resp: &[u8]) -> (u16, u8, u16) {
1115        let id = u16::from_be_bytes([resp[0], resp[1]]);
1116        let flags = u16::from_be_bytes([resp[2], resp[3]]);
1117        let ancount = u16::from_be_bytes([resp[6], resp[7]]);
1118        (id, (flags & 0x000F) as u8, ancount)
1119    }
1120
1121    #[test]
1122    fn a_query_for_known_peer_answers_v4() {
1123        let view = view_with_peer();
1124        let buf = build_query(0x1234, &["host", "user", "ts", "net"], 1, 1);
1125
1126        let resp = answer(&view, &buf).expect("answers");
1127        let (id, rcode, ancount) = parse_header(&resp);
1128        assert_eq!(id, 0x1234);
1129        assert_eq!(rcode, 0, "NoError");
1130        assert_eq!(ancount, 1);
1131
1132        // The trailing RDATA of the single A record is the peer's tailnet v4 octets.
1133        let tail = &resp[resp.len() - 4..];
1134        assert_eq!(tail, &[100, 64, 0, 1]);
1135    }
1136
1137    #[test]
1138    fn aaaa_query_for_known_peer_is_nodata_when_ipv6_off() {
1139        // Gate OFF (default): an AAAA query for a known overlay peer must return NoError with an
1140        // empty answer (NODATA) — NOT the overlay v6 address, which the IPv4-only client can't
1141        // route. This is the anti-fingerprint / no-dead-connections posture.
1142        let view = view_with_peer();
1143        assert!(!view.enable_ipv6, "default gate is off");
1144        let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1145
1146        let resp = answer(&view, &buf).expect("answers");
1147        let (_, rcode, ancount) = parse_header(&resp);
1148        assert_eq!(rcode, 0, "NoError (NODATA)");
1149        assert_eq!(ancount, 0, "empty answer: no AAAA handed out with IPv6 off");
1150    }
1151
1152    #[test]
1153    fn a_query_still_resolves_when_ipv6_off() {
1154        // Gate OFF must not touch the A (v4) path: the v4 answer is byte-for-byte unchanged.
1155        let view = view_with_peer();
1156        let buf = build_query(0x6, &["host", "user", "ts", "net"], 1, 1);
1157
1158        let resp = answer(&view, &buf).expect("answers");
1159        let (_, rcode, ancount) = parse_header(&resp);
1160        assert_eq!(rcode, 0, "NoError");
1161        assert_eq!(ancount, 1);
1162        let tail = &resp[resp.len() - 4..];
1163        assert_eq!(tail, &[100, 64, 0, 1]);
1164    }
1165
1166    #[test]
1167    fn aaaa_query_for_known_peer_answers_v6_when_ipv6_on() {
1168        // Gate ON: historical behavior — answer AAAA from the overlay v6 address.
1169        let mut view = view_with_peer();
1170        view.enable_ipv6 = true;
1171        let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1172
1173        let resp = answer(&view, &buf).expect("answers");
1174        let (_, rcode, ancount) = parse_header(&resp);
1175        assert_eq!(rcode, 0, "NoError");
1176        assert_eq!(ancount, 1);
1177
1178        let expected = "fd7a::1".parse::<std::net::Ipv6Addr>().unwrap().octets();
1179        let tail = &resp[resp.len() - 16..];
1180        assert_eq!(tail, expected);
1181    }
1182
1183    #[test]
1184    fn aaaa_for_unknown_tailnet_name_is_nxdomain_not_forwarded_with_ipv6_off() {
1185        // Anti-leak, unchanged by the gate: an AAAA for a name under the tailnet suffix that has no
1186        // overlay match still fails closed to NXDOMAIN — never forwarded to a recursive upstream,
1187        // even with resolvers configured. (Gate OFF only changes the *positive* overlay match into
1188        // NODATA; a non-match still routes through `forward_or_nxdomain`.)
1189        let mut db = PeerDb::default();
1190        db.upsert(&test_node());
1191        let view = DnsView {
1192            cfg: DnsConfig {
1193                magic_dns: true,
1194                search_domains: vec!["user.ts.net".to_string()],
1195                fallback_resolvers: vec![DnsResolver {
1196                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1197                    use_with_exit_node: false,
1198                }],
1199                ..Default::default()
1200            },
1201            peers: Some(Arc::new(db)),
1202            self_node: None,
1203            exit_doh: None,
1204            enable_ipv6: false,
1205            accept_dns: true,
1206        };
1207        let buf = build_query(0x5A, &["ghost", "user", "ts", "net"], 28, 1);
1208
1209        match decide(&view, &buf).expect("decides") {
1210            Decision::Reply(resp) => {
1211                let (_, rcode, _) = parse_header(&resp);
1212                assert_eq!(rcode, 3, "NxDomain: tailnet AAAA not leaked upstream");
1213            }
1214            Decision::Forward { .. } => panic!("tailnet AAAA must never be forwarded"),
1215        }
1216    }
1217
1218    #[test]
1219    fn bare_hostname_resolves() {
1220        // The name index also stores the bare hostname.
1221        let view = view_with_peer();
1222        let buf = build_query(0x7, &["host"], 1, 1);
1223
1224        let resp = answer(&view, &buf).expect("answers");
1225        let (_, rcode, ancount) = parse_header(&resp);
1226        assert_eq!(rcode, 0);
1227        assert_eq!(ancount, 1);
1228    }
1229
1230    #[test]
1231    fn unknown_name_is_nxdomain() {
1232        let view = view_with_peer();
1233        let buf = build_query(0x9, &["nope", "example", "com"], 1, 1);
1234
1235        let resp = answer(&view, &buf).expect("answers");
1236        let (_, rcode, ancount) = parse_header(&resp);
1237        assert_eq!(rcode, 3, "NxDomain");
1238        assert_eq!(ancount, 0);
1239    }
1240
1241    #[test]
1242    fn magic_dns_off_is_refused() {
1243        // Fail closed: with MagicDNS disabled, even a known name is refused.
1244        let mut view = view_with_peer();
1245        view.cfg.magic_dns = false;
1246        let buf = build_query(0xAB, &["host", "user", "ts", "net"], 1, 1);
1247
1248        let resp = answer(&view, &buf).expect("answers");
1249        let (_, rcode, ancount) = parse_header(&resp);
1250        assert_eq!(rcode, 5, "Refused");
1251        assert_eq!(ancount, 0);
1252    }
1253
1254    #[test]
1255    fn accept_dns_false_refuses_otherwise_answerable_query() {
1256        // The accept-dns gate (Go `CorpDNS`): with `accept_dns == false` the node ignores the
1257        // tailnet DNS config, so even a known peer name that would normally answer authoritatively is
1258        // REFUSED (the responder serves nothing) — mirroring Go applying an empty `dns.Config`.
1259        let mut view = view_with_peer();
1260        assert!(view.cfg.magic_dns, "MagicDNS itself is on");
1261        view.accept_dns = false;
1262        let buf = build_query(0xDD, &["host", "user", "ts", "net"], 1, 1);
1263
1264        let resp = answer(&view, &buf).expect("answers");
1265        let (_, rcode, ancount) = parse_header(&resp);
1266        assert_eq!(rcode, 5, "Refused: accept_dns off ⇒ serve nothing");
1267        assert_eq!(ancount, 0);
1268
1269        // Flip accept_dns back ON (the config was never destroyed, only gated): the same query now
1270        // answers authoritatively — proving the OFF→ON restore is automatic.
1271        view.accept_dns = true;
1272        let resp = answer(&view, &buf).expect("answers");
1273        let (_, rcode, ancount) = parse_header(&resp);
1274        assert_eq!(rcode, 0, "NoError: accept_dns on ⇒ the known peer answers");
1275        assert_eq!(ancount, 1);
1276        let tail = &resp[resp.len() - 4..];
1277        assert_eq!(tail, &[100, 64, 0, 1], "the peer's tailnet v4 is served");
1278    }
1279
1280    #[test]
1281    fn default_view_serves_nothing() {
1282        // The default (no dns_config seen) has magic_dns == false: fail closed.
1283        let view = DnsView::default();
1284        let buf = build_query(0x1, &["host", "user", "ts", "net"], 1, 1);
1285
1286        let resp = answer(&view, &buf).expect("answers");
1287        let (_, rcode, _) = parse_header(&resp);
1288        assert_eq!(rcode, 5, "Refused");
1289    }
1290
1291    #[test]
1292    fn unsupported_qtype_on_tailnet_name_is_nodata_not_refused() {
1293        // TXT (type 16) for a tailnet-authoritative name: the name exists but we hold no TXT, so —
1294        // like Go — return NODATA (empty NOERROR), NOT REFUSED (which would make a stub abandon the
1295        // resolver) and NOT NXDOMAIN (the name exists). The name is never forwarded (anti-leak).
1296        let view = view_with_peer();
1297        let buf = build_query(0x1, &["host", "user", "ts", "net"], 16, 1);
1298
1299        let resp = answer(&view, &buf).expect("answers");
1300        let (_, rcode, ancount) = parse_header(&resp);
1301        assert_eq!(rcode, 0, "NoError (NODATA), not Refused");
1302        assert_eq!(ancount, 0, "no answer records (NODATA)");
1303    }
1304
1305    #[test]
1306    fn unsupported_qtype_off_tailnet_forwards_or_nxdomains() {
1307        // A non-A/AAAA/PTR qtype for an OFF-tailnet name must be forwardable like A/AAAA — never
1308        // REFUSED. With no upstream configured in this view it fails closed to NXDOMAIN (the same
1309        // disposition an off-tailnet A query gets here), proving the qtype no longer short-circuits
1310        // to REFUSED. HTTPS/SVCB is type 65 (the browser HTTP/3 + ECH case the old REFUSED broke).
1311        let view = view_with_peer();
1312        let buf = build_query(0x1, &["example", "com"], 65, 1);
1313
1314        let resp = answer(&view, &buf).expect("answers");
1315        let (_, rcode, _) = parse_header(&resp);
1316        assert_eq!(
1317            rcode, 3,
1318            "off-tailnet, no upstream -> NXDOMAIN (forwardable, not Refused)"
1319        );
1320    }
1321
1322    #[test]
1323    fn malformed_query_is_dropped() {
1324        // A response (QR bit set) is not a query; we drop it (no answer).
1325        let mut buf = build_query(0x1, &["host"], 1, 1);
1326        buf[2] = 0x80; // set QR bit
1327        assert!(answer(&view_with_peer(), &buf).is_none());
1328    }
1329
1330    #[test]
1331    fn ptr_for_known_ip_answers_fqdn() {
1332        let view = view_with_peer();
1333        // Reverse name for 100.64.0.1 => 1.0.64.100.in-addr.arpa
1334        let buf = build_query(0x33, &["1", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1335
1336        let resp = answer(&view, &buf).expect("answers");
1337        let (_, rcode, ancount) = parse_header(&resp);
1338        assert_eq!(rcode, 0, "NoError");
1339        assert_eq!(ancount, 1);
1340
1341        // The PTR rdata encodes the peer's fqdn "host.user.ts.net" as length-prefixed labels.
1342        let expected = {
1343            let mut out = Vec::new();
1344            for label in ["host", "user", "ts", "net"] {
1345                out.push(label.len() as u8);
1346                out.extend_from_slice(label.as_bytes());
1347            }
1348            out.push(0);
1349            out
1350        };
1351        let tail = &resp[resp.len() - expected.len()..];
1352        assert_eq!(tail, expected.as_slice());
1353    }
1354
1355    #[test]
1356    fn ptr_for_unknown_ip_is_nxdomain() {
1357        let view = view_with_peer();
1358        // 9.9.9.9 is not a known tailnet IP.
1359        let buf = build_query(0x34, &["9", "9", "9", "9", "in-addr", "arpa"], 12, 1);
1360
1361        let resp = answer(&view, &buf).expect("answers");
1362        let (_, rcode, _) = parse_header(&resp);
1363        assert_eq!(rcode, 3, "NxDomain");
1364    }
1365
1366    #[test]
1367    fn ptr_for_unknown_tailnet_ip_is_nxdomain_not_forwarded() {
1368        // A view WITH an upstream resolver: an off-tailnet reverse query would forward, but a
1369        // reverse query for an unmatched IP in the CGNAT range (100.64.0.0/10) must fail closed to
1370        // NXDOMAIN — the probed tailnet IP must never leak upstream.
1371        let mut db = PeerDb::default();
1372        db.upsert(&test_node());
1373        let view = DnsView {
1374            cfg: DnsConfig {
1375                magic_dns: true,
1376                search_domains: vec!["user.ts.net".to_string()],
1377                fallback_resolvers: vec![DnsResolver {
1378                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1379                    use_with_exit_node: false,
1380                }],
1381                ..Default::default()
1382            },
1383            peers: Some(Arc::new(db)),
1384            self_node: None,
1385            exit_doh: None,
1386            enable_ipv6: false,
1387            accept_dns: true,
1388        };
1389
1390        // 100.64.0.9 is in CGNAT range but owned by no peer => NXDOMAIN, never a Forward.
1391        let buf = build_query(0x35, &["9", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1392        match decide(&view, &buf).expect("decides") {
1393            Decision::Reply(resp) => {
1394                let (_, rcode, _) = parse_header(&resp);
1395                assert_eq!(rcode, 3, "NxDomain");
1396            }
1397            Decision::Forward { .. } => {
1398                panic!("tailnet CGNAT PTR must never be forwarded upstream")
1399            }
1400        }
1401    }
1402
1403    /// Anti-leak regression for the exotic-qtype forward path: a NON-PTR query (TXT, type 16) for a
1404    /// tailnet CGNAT reverse name, with an upstream configured, must STILL fail closed to NXDOMAIN —
1405    /// never forward. The PTR arm guards this, but the `QType::Other` path routes through
1406    /// `forward_or_nodata`, which must re-apply the reverse-zone guard or the tailnet IP leaks.
1407    #[test]
1408    fn exotic_qtype_for_tailnet_cgnat_reverse_is_nxdomain_not_forwarded() {
1409        let mut db = PeerDb::default();
1410        db.upsert(&test_node());
1411        let view = DnsView {
1412            cfg: DnsConfig {
1413                magic_dns: true,
1414                search_domains: vec!["user.ts.net".to_string()],
1415                fallback_resolvers: vec![DnsResolver {
1416                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1417                    use_with_exit_node: false,
1418                }],
1419                ..Default::default()
1420            },
1421            peers: Some(Arc::new(db)),
1422            self_node: None,
1423            exit_doh: None,
1424            enable_ipv6: false,
1425            accept_dns: true,
1426        };
1427
1428        // TXT (16) for a CGNAT reverse name => NXDOMAIN, never a Forward (no tailnet-IP leak).
1429        let buf = build_query(0x36, &["9", "0", "64", "100", "in-addr", "arpa"], 16, 1);
1430        match decide(&view, &buf).expect("decides") {
1431            Decision::Reply(resp) => {
1432                let (_, rcode, _) = parse_header(&resp);
1433                assert_eq!(rcode, 3, "NxDomain");
1434            }
1435            Decision::Forward { .. } => {
1436                panic!("a non-PTR query for a tailnet CGNAT reverse name must never forward")
1437            }
1438        }
1439    }
1440
1441    /// Same anti-leak guard for an `ip6.arpa` reverse name under an exotic qtype: must NXDOMAIN, not
1442    /// forward (revealing a tailnet ULA was probed).
1443    #[test]
1444    fn exotic_qtype_for_ip6_arpa_is_nxdomain_not_forwarded() {
1445        let view = view_with_routes(
1446            std::collections::BTreeMap::new(),
1447            vec![udp("9.9.9.9:53")],
1448            vec![],
1449        );
1450        // An ip6.arpa reverse name with a TXT (16) qtype must fail closed.
1451        let buf = build_query(
1452            0x37,
1453            &[
1454                "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
1455                "a", "7", "d", "f", "ip6", "arpa",
1456            ],
1457            16,
1458            1,
1459        );
1460        match decide(&view, &buf).expect("decides") {
1461            Decision::Reply(resp) => {
1462                let (_, rcode, _) = parse_header(&resp);
1463                assert_eq!(rcode, 3, "NxDomain");
1464            }
1465            Decision::Forward { .. } => panic!("an ip6.arpa exotic-qtype query must never forward"),
1466        }
1467    }
1468
1469    #[test]
1470    fn is_tailnet_cgnat_classifies_range() {
1471        assert!(is_tailnet_cgnat("100.64.0.0".parse().unwrap()));
1472        assert!(is_tailnet_cgnat("100.64.0.1".parse().unwrap()));
1473        assert!(is_tailnet_cgnat("100.127.255.255".parse().unwrap()));
1474        // Outside the /10:
1475        assert!(!is_tailnet_cgnat("100.63.255.255".parse().unwrap()));
1476        assert!(!is_tailnet_cgnat("100.128.0.0".parse().unwrap()));
1477        assert!(!is_tailnet_cgnat("9.9.9.9".parse().unwrap()));
1478        // The MagicDNS resolver IP 100.100.100.100 is itself inside the /10.
1479        assert!(is_tailnet_cgnat("100.100.100.100".parse().unwrap()));
1480    }
1481
1482    #[test]
1483    fn response_matches_query_validates_id_and_qr() {
1484        // query id 0x1234, QR=0
1485        let query = build_query(0x1234, &["a", "com"], 1, 1);
1486
1487        // A well-formed response: same id, QR=1.
1488        let mut good = query.clone();
1489        good[2] |= 0x80;
1490        assert!(response_matches_query(&query, &good));
1491
1492        // Same id but QR still 0 (not a response): rejected.
1493        assert!(!response_matches_query(&query, &query));
1494
1495        // QR=1 but a different transaction id: rejected (off-path forgery).
1496        let mut wrong_id = good.clone();
1497        wrong_id[0] ^= 0xFF;
1498        assert!(!response_matches_query(&query, &wrong_id));
1499
1500        // Too-short buffers: rejected.
1501        assert!(!response_matches_query(&query, &[0u8; 2]));
1502        assert!(!response_matches_query(&[0u8; 3], &good));
1503    }
1504
1505    #[test]
1506    fn self_node_resolves_when_no_peer_match() {
1507        // With the peer db empty but a self node set, the self node answers for its own name.
1508        let view = DnsView {
1509            cfg: DnsConfig {
1510                magic_dns: true,
1511                search_domains: vec![],
1512                ..Default::default()
1513            },
1514            peers: None,
1515            self_node: Some(test_node()),
1516            exit_doh: None,
1517            enable_ipv6: false,
1518            accept_dns: true,
1519        };
1520        let buf = build_query(0x44, &["host", "user", "ts", "net"], 1, 1);
1521
1522        let resp = answer(&view, &buf).expect("answers");
1523        let (_, rcode, ancount) = parse_header(&resp);
1524        assert_eq!(rcode, 0);
1525        assert_eq!(ancount, 1);
1526        let tail = &resp[resp.len() - 4..];
1527        assert_eq!(tail, &[100, 64, 0, 1]);
1528    }
1529
1530    #[test]
1531    fn partially_qualified_name_resolves_via_search_domain() {
1532        // "host.user" is not indexed directly, but the "user.ts.net" search domain qualifies it
1533        // to "host.user.user.ts.net"... which does NOT match. The realistic case is "host" (bare,
1534        // already indexed) and "host.user.ts.net" (fqdn). Verify a name needing suffix expansion:
1535        // with search domain "ts.net" the partially-qualified "host.user" => "host.user.ts.net".
1536        let mut view = view_with_peer();
1537        view.cfg.search_domains = vec!["ts.net".to_string()];
1538        let buf = build_query(0x55, &["host", "user"], 1, 1);
1539
1540        let resp = answer(&view, &buf).expect("answers");
1541        let (_, rcode, ancount) = parse_header(&resp);
1542        assert_eq!(rcode, 0, "NoError via search-domain expansion");
1543        assert_eq!(ancount, 1);
1544        let tail = &resp[resp.len() - 4..];
1545        assert_eq!(tail, &[100, 64, 0, 1]);
1546    }
1547
1548    #[test]
1549    fn extra_record_a_answers_when_no_peer_match() {
1550        // A control-pushed static A record answers for a non-peer name, fail-closed otherwise.
1551        let mut view = view_with_peer();
1552        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1553            name: "static.user.ts.net".to_string(),
1554            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1555        }];
1556        let buf = build_query(0x77, &["static", "user", "ts", "net"], 1, 1);
1557
1558        let resp = answer(&view, &buf).expect("answers");
1559        let (_, rcode, ancount) = parse_header(&resp);
1560        assert_eq!(rcode, 0, "NoError from extra record");
1561        assert_eq!(ancount, 1);
1562        let tail = &resp[resp.len() - 4..];
1563        assert_eq!(tail, &[100, 64, 0, 9]);
1564    }
1565
1566    #[test]
1567    fn extra_record_matches_query_case_insensitively() {
1568        // The query name is canonicalized (lowercased) at decode time, so a mixed-case query
1569        // matches a lowercase extra record.
1570        let mut view = view_with_peer();
1571        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1572            name: "static.user.ts.net".to_string(),
1573            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1574        }];
1575        let buf = build_query(0x7A, &["Static", "User", "TS", "net"], 1, 1);
1576
1577        let resp = answer(&view, &buf).expect("answers");
1578        let (_, rcode, ancount) = parse_header(&resp);
1579        assert_eq!(rcode, 0, "NoError: case-insensitive match");
1580        assert_eq!(ancount, 1);
1581        let tail = &resp[resp.len() - 4..];
1582        assert_eq!(tail, &[100, 64, 0, 9]);
1583    }
1584
1585    #[test]
1586    fn extra_record_not_expanded_by_search_domain() {
1587        // Unlike peer names, an extra record is matched as an FQDN only: a bare query that would
1588        // need search-domain expansion to reach the record name must NOT resolve.
1589        let mut view = view_with_peer();
1590        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1591            name: "static.user.ts.net".to_string(),
1592            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1593        }];
1594        // "static" would only reach "static.user.ts.net" via the "user.ts.net" search domain.
1595        let buf = build_query(0x7B, &["static"], 1, 1);
1596
1597        let resp = answer(&view, &buf).expect("answers");
1598        let (_, rcode, _) = parse_header(&resp);
1599        assert_eq!(rcode, 3, "NxDomain: extra records are not search-expanded");
1600    }
1601
1602    #[test]
1603    fn extra_record_aaaa_family_is_isolated() {
1604        // An A-only extra record must NOT answer an AAAA query for the same name (NxDomain).
1605        let mut view = view_with_peer();
1606        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1607            name: "v4only.user.ts.net".to_string(),
1608            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1609        }];
1610        let buf = build_query(0x78, &["v4only", "user", "ts", "net"], 28, 1);
1611
1612        let resp = answer(&view, &buf).expect("answers");
1613        let (_, rcode, _) = parse_header(&resp);
1614        assert_eq!(rcode, 3, "NxDomain: A record does not satisfy AAAA");
1615    }
1616
1617    #[test]
1618    fn extra_record_ignored_when_magic_dns_off() {
1619        // Fail closed: extra records are never served while MagicDNS is disabled.
1620        let mut view = view_with_peer();
1621        view.cfg.magic_dns = false;
1622        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1623            name: "static.user.ts.net".to_string(),
1624            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1625        }];
1626        let buf = build_query(0x79, &["static", "user", "ts", "net"], 1, 1);
1627
1628        let resp = answer(&view, &buf).expect("answers");
1629        let (_, rcode, _) = parse_header(&resp);
1630        assert_eq!(rcode, 5, "Refused");
1631    }
1632
1633    #[test]
1634    fn non_in_class_on_tailnet_name_is_nodata_not_answered_as_in() {
1635        // A CHAOS-class (3) query for a tailnet name must NOT be answered as IN (no overlay A), and
1636        // must NOT be REFUSED (Go does no class check on the local path). It's an unsupported
1637        // authoritative class -> NODATA (empty NOERROR), and never forwarded (tailnet name).
1638        let view = view_with_peer();
1639        let buf = build_query(0x66, &["host", "user", "ts", "net"], 1, 3);
1640
1641        let resp = answer(&view, &buf).expect("answers");
1642        let (_, rcode, ancount) = parse_header(&resp);
1643        assert_eq!(
1644            rcode, 0,
1645            "NoError (NODATA), not Refused and not an IN answer"
1646        );
1647        assert_eq!(
1648            ancount, 0,
1649            "must not hand out the overlay A for a non-IN class"
1650        );
1651    }
1652
1653    #[test]
1654    fn non_in_class_off_tailnet_forwards_or_nxdomains() {
1655        // A non-IN class for an OFF-tailnet name is forwardable (Go forwards it), never REFUSED.
1656        // No upstream here -> NXDOMAIN, proving the class gate no longer short-circuits to Refused.
1657        let view = view_with_peer();
1658        let buf = build_query(0x66, &["example", "com"], 1, 3);
1659
1660        let resp = answer(&view, &buf).expect("answers");
1661        let (_, rcode, _) = parse_header(&resp);
1662        assert_eq!(
1663            rcode, 3,
1664            "off-tailnet non-IN class, no upstream -> NXDOMAIN, not Refused"
1665        );
1666    }
1667
1668    /// A view with MagicDNS on, the `user.ts.net` search domain, and the given split-DNS routes
1669    /// + global resolvers.
1670    fn view_with_routes(
1671        routes: std::collections::BTreeMap<String, Vec<DnsResolver>>,
1672        resolvers: Vec<DnsResolver>,
1673        fallback: Vec<DnsResolver>,
1674    ) -> DnsView {
1675        DnsView {
1676            cfg: DnsConfig {
1677                magic_dns: true,
1678                search_domains: vec!["user.ts.net".to_string()],
1679                routes,
1680                resolvers,
1681                fallback_resolvers: fallback,
1682                ..Default::default()
1683            },
1684            peers: None,
1685            self_node: None,
1686            exit_doh: None,
1687            enable_ipv6: false,
1688            accept_dns: true,
1689        }
1690    }
1691
1692    fn udp(addr: &str) -> DnsResolver {
1693        DnsResolver {
1694            transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
1695            use_with_exit_node: false,
1696        }
1697    }
1698
1699    #[test]
1700    fn split_dns_route_forwards_to_matching_upstream() {
1701        let mut routes = std::collections::BTreeMap::new();
1702        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1703        let view = view_with_routes(routes, vec![], vec![]);
1704        let buf = build_query(0x100, &["api", "corp", "example"], 1, 1);
1705
1706        match decide(&view, &buf).expect("decides") {
1707            Decision::Forward { upstreams, .. } => {
1708                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1709            }
1710            Decision::Reply(_) => panic!("expected forward to the split-DNS upstream"),
1711        }
1712    }
1713
1714    #[test]
1715    fn exotic_qtype_off_tailnet_forwards_to_upstream() {
1716        // The core of the fix: an HTTPS/SVCB (type 65) query for an off-tailnet name with a matching
1717        // route must FORWARD to the upstream (verbatim), exactly like an A query would — not REFUSE
1718        // and not NXDOMAIN. This is the browser HTTP/3 + ECH case the old blanket-REFUSE broke.
1719        let mut routes = std::collections::BTreeMap::new();
1720        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1721        let view = view_with_routes(routes, vec![], vec![]);
1722        let buf = build_query(0x102, &["api", "corp", "example"], 65, 1);
1723
1724        match decide(&view, &buf).expect("decides") {
1725            Decision::Forward {
1726                upstreams, query, ..
1727            } => {
1728                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1729                assert_eq!(query, buf, "the exotic-qtype query is forwarded verbatim");
1730            }
1731            Decision::Reply(_) => {
1732                panic!("an off-tailnet HTTPS-record query must forward, not reply")
1733            }
1734        }
1735    }
1736
1737    #[test]
1738    fn non_in_class_off_tailnet_forwards_to_upstream() {
1739        // A non-IN class for an off-tailnet routed name forwards too (Go does no class check on the
1740        // local path). Proves the class gate no longer short-circuits to REFUSED before routing.
1741        let mut routes = std::collections::BTreeMap::new();
1742        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1743        let view = view_with_routes(routes, vec![], vec![]);
1744        let buf = build_query(0x103, &["api", "corp", "example"], 1, 3);
1745
1746        match decide(&view, &buf).expect("decides") {
1747            Decision::Forward { upstreams, .. } => {
1748                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1749            }
1750            Decision::Reply(_) => {
1751                panic!("an off-tailnet non-IN-class query must forward, not reply")
1752            }
1753        }
1754    }
1755
1756    /// The local responder bounds concurrent in-flight forwards: `serve` acquires one
1757    /// `MAX_INFLIGHT_FORWARDS` permit per spawned forward task and drops the query fail-closed when
1758    /// the pool is exhausted (a client spraying forwardable names can't open unbounded overlay
1759    /// sockets). This pins the gating semantics `serve` relies on — drained pool refuses a new
1760    /// permit; releasing one restores capacity — and the cap constant itself. (The async `serve`
1761    /// loop has no netstack-free test seam, so the semaphore behavior is exercised directly here, the
1762    /// same `Arc<Semaphore>::try_acquire_owned` the loop uses.)
1763    #[test]
1764    fn forward_inflight_cap_fails_closed_when_saturated() {
1765        use std::sync::Arc;
1766
1767        use tokio::sync::Semaphore;
1768
1769        let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
1770
1771        // Drain every permit (one per concurrently in-flight forward).
1772        let mut held = Vec::with_capacity(MAX_INFLIGHT_FORWARDS);
1773        for _ in 0..MAX_INFLIGHT_FORWARDS {
1774            held.push(
1775                inflight
1776                    .clone()
1777                    .try_acquire_owned()
1778                    .expect("permits available below the cap"),
1779            );
1780        }
1781
1782        // At the cap, the next forward is refused — `serve` would drop the query, not spawn.
1783        assert!(
1784            inflight.clone().try_acquire_owned().is_err(),
1785            "a saturated forward pool must refuse a new permit (fail closed)"
1786        );
1787
1788        // Completing an in-flight forward releases its permit and restores capacity.
1789        drop(held.pop());
1790        assert!(
1791            inflight.clone().try_acquire_owned().is_ok(),
1792            "releasing a permit must let the next forward proceed"
1793        );
1794    }
1795
1796    /// A permit moved into a spawned forward task (the `let _permit = permit;` shape `serve` uses)
1797    /// must stay held for the *whole* task body — across the `.await` on the upstream — and release
1798    /// only when the task completes. This guards the regression the saturation test above can't see:
1799    /// "tidying" `let _permit = permit;` to `let _ = permit;` would drop the permit immediately,
1800    /// re-opening unbounded concurrency while leaving the synchronous drain/restore test green. Here a
1801    /// 1-permit pool is consumed by a task that holds it across a yield; the pool must read empty
1802    /// while the task runs and refill once it finishes.
1803    #[tokio::test]
1804    async fn forward_permit_is_held_for_the_task_lifetime_not_dropped_early() {
1805        use std::sync::Arc;
1806
1807        use tokio::sync::Semaphore;
1808
1809        let inflight = Arc::new(Semaphore::new(1));
1810        let permit = inflight
1811            .clone()
1812            .try_acquire_owned()
1813            .expect("the sole permit is available");
1814
1815        let (started_tx, started_rx) = tokio::sync::oneshot::channel();
1816        let (release_tx, release_rx) = tokio::sync::oneshot::channel();
1817        let task = tokio::spawn(async move {
1818            // Same shape as `serve`'s spawned forward: the permit is a named binding moved into the
1819            // task, so it lives until the body ends — not dropped at the `let`.
1820            let _permit = permit;
1821            started_tx.send(()).unwrap();
1822            // Stand in for the `.await` on the upstream forward.
1823            release_rx.await.unwrap();
1824        });
1825
1826        started_rx.await.unwrap();
1827        // While the task runs, the permit it moved in is still held — the pool is empty.
1828        assert!(
1829            inflight.clone().try_acquire_owned().is_err(),
1830            "a permit moved into a running task must stay held across its await"
1831        );
1832
1833        // Let the task finish; its permit drops with the body and capacity returns.
1834        release_tx.send(()).unwrap();
1835        task.await.unwrap();
1836        assert!(
1837            inflight.clone().try_acquire_owned().is_ok(),
1838            "the permit must be released once the task body completes"
1839        );
1840    }
1841
1842    #[test]
1843    fn longest_suffix_route_wins() {
1844        let mut routes = std::collections::BTreeMap::new();
1845        routes.insert("example".to_string(), vec![udp("10.0.0.1:53")]);
1846        routes.insert("corp.example".to_string(), vec![udp("10.0.0.2:53")]);
1847        let view = view_with_routes(routes, vec![], vec![]);
1848        let buf = build_query(0x101, &["api", "corp", "example"], 1, 1);
1849
1850        match decide(&view, &buf).expect("decides") {
1851            Decision::Forward { upstreams, .. } => {
1852                assert_eq!(
1853                    upstreams,
1854                    vec!["10.0.0.2:53".parse().unwrap()],
1855                    "longer suffix wins"
1856                );
1857            }
1858            Decision::Reply(_) => panic!("expected forward"),
1859        }
1860    }
1861
1862    #[test]
1863    fn negative_route_is_nxdomain_not_forwarded() {
1864        // An empty upstream list is a negative route: fail closed, never forward.
1865        let mut routes = std::collections::BTreeMap::new();
1866        routes.insert("blocked.example".to_string(), vec![]);
1867        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
1868        let buf = build_query(0x102, &["x", "blocked", "example"], 1, 1);
1869
1870        match decide(&view, &buf).expect("decides") {
1871            Decision::Reply(resp) => {
1872                let (_, rcode, _) = parse_header(&resp);
1873                assert_eq!(rcode, 3, "NxDomain: negative route is not forwarded");
1874            }
1875            Decision::Forward { .. } => panic!("negative route must not forward"),
1876        }
1877    }
1878
1879    #[test]
1880    fn unrouted_name_forwards_to_fallback_then_global() {
1881        // No route matches: fallback resolvers are preferred over global resolvers.
1882        let view = view_with_routes(
1883            std::collections::BTreeMap::new(),
1884            vec![udp("8.8.8.8:53")],
1885            vec![udp("1.1.1.1:53")],
1886        );
1887        let buf = build_query(0x103, &["example", "com"], 1, 1);
1888
1889        match decide(&view, &buf).expect("decides") {
1890            Decision::Forward { upstreams, .. } => {
1891                assert_eq!(
1892                    upstreams,
1893                    vec!["1.1.1.1:53".parse().unwrap()],
1894                    "fallback preferred"
1895                );
1896            }
1897            Decision::Reply(_) => panic!("expected forward to fallback"),
1898        }
1899    }
1900
1901    #[test]
1902    fn unrouted_name_forwards_to_global_when_no_fallback() {
1903        let view = view_with_routes(
1904            std::collections::BTreeMap::new(),
1905            vec![udp("8.8.8.8:53")],
1906            vec![],
1907        );
1908        let buf = build_query(0x104, &["example", "com"], 1, 1);
1909
1910        match decide(&view, &buf).expect("decides") {
1911            Decision::Forward { upstreams, .. } => {
1912                assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
1913            }
1914            Decision::Reply(_) => panic!("expected forward to global resolver"),
1915        }
1916    }
1917
1918    #[test]
1919    fn tailnet_name_is_never_forwarded() {
1920        // Anti-leak: a name under a tailnet search domain that has no overlay match must fail
1921        // closed to NXDOMAIN, never leak to an upstream resolver, even with resolvers configured.
1922        let view = view_with_routes(
1923            std::collections::BTreeMap::new(),
1924            vec![udp("8.8.8.8:53")],
1925            vec![udp("1.1.1.1:53")],
1926        );
1927        // "ghost.user.ts.net" is under the tailnet suffix but matches no peer.
1928        let buf = build_query(0x105, &["ghost", "user", "ts", "net"], 1, 1);
1929
1930        match decide(&view, &buf).expect("decides") {
1931            Decision::Reply(resp) => {
1932                let (_, rcode, _) = parse_header(&resp);
1933                assert_eq!(rcode, 3, "NxDomain: tailnet name not leaked upstream");
1934            }
1935            Decision::Forward { .. } => panic!("tailnet name must never be forwarded"),
1936        }
1937    }
1938
1939    #[test]
1940    fn no_resolvers_fails_closed() {
1941        // No route, no resolvers: an unknown name is NXDOMAIN, not forwarded.
1942        let view = view_with_routes(std::collections::BTreeMap::new(), vec![], vec![]);
1943        let buf = build_query(0x106, &["example", "com"], 1, 1);
1944
1945        match decide(&view, &buf).expect("decides") {
1946            Decision::Reply(resp) => {
1947                let (_, rcode, _) = parse_header(&resp);
1948                assert_eq!(rcode, 3, "NxDomain");
1949            }
1950            Decision::Forward { .. } => panic!("must not forward with no resolvers"),
1951        }
1952    }
1953
1954    #[test]
1955    fn overlay_match_wins_over_forwarding() {
1956        // A known peer name resolves authoritatively even when upstream resolvers are configured.
1957        let mut db = PeerDb::default();
1958        db.upsert(&test_node());
1959        let view = DnsView {
1960            cfg: DnsConfig {
1961                magic_dns: true,
1962                search_domains: vec!["user.ts.net".to_string()],
1963                resolvers: vec![udp("8.8.8.8:53")],
1964                ..Default::default()
1965            },
1966            peers: Some(Arc::new(db)),
1967            self_node: None,
1968            exit_doh: None,
1969            enable_ipv6: false,
1970            accept_dns: true,
1971        };
1972        let buf = build_query(0x107, &["host", "user", "ts", "net"], 1, 1);
1973
1974        match decide(&view, &buf).expect("decides") {
1975            Decision::Reply(resp) => {
1976                let (_, rcode, ancount) = parse_header(&resp);
1977                assert_eq!(rcode, 0, "authoritative answer wins");
1978                assert_eq!(ancount, 1);
1979            }
1980            Decision::Forward { .. } => panic!("overlay match must not forward"),
1981        }
1982    }
1983
1984    #[test]
1985    fn ipv6_reverse_ptr_is_nxdomain_not_forwarded() {
1986        // Anti-leak: an `ip6.arpa` reverse PTR for a tailnet ULA (fd7a:…) must fail closed to
1987        // NXDOMAIN, never be forwarded — even with an upstream resolver configured. This fork is
1988        // IPv4-only on the tailnet; forwarding would reveal that a v6 address was probed.
1989        let view = view_with_routes(
1990            std::collections::BTreeMap::new(),
1991            vec![udp("8.8.8.8:53")],
1992            vec![udp("1.1.1.1:53")],
1993        );
1994        // Reverse name for fd7a::1 (nibble-reversed) under ip6.arpa. The exact nibble labels don't
1995        // matter to the guard — any name ending in ip6.arpa must fail closed.
1996        let labels = vec![
1997            "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
1998            "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "a", "7", "d", "f", "ip6",
1999            "arpa",
2000        ];
2001        let buf = build_query(0x200, &labels, 12, 1);
2002
2003        match decide(&view, &buf).expect("decides") {
2004            Decision::Reply(resp) => {
2005                let (_, rcode, _) = parse_header(&resp);
2006                assert_eq!(
2007                    rcode, 3,
2008                    "NxDomain: ip6.arpa reverse must not leak upstream"
2009                );
2010            }
2011            Decision::Forward { .. } => panic!("ip6.arpa PTR must never be forwarded"),
2012        }
2013    }
2014
2015    #[test]
2016    fn cap_response_sets_tc_when_truncated() {
2017        // An oversize upstream answer is capped to a single datagram AND marked truncated (TC bit)
2018        // so the stub resolver retries over TCP rather than trusting a chopped message.
2019        let mut big = build_query(0x300, &["example", "com"], 1, 1);
2020        big[2] |= 0x80; // make it a response (QR=1)
2021        big.resize(MAX_UPSTREAM_RESPONSE + 500, 0xAB);
2022
2023        let out = cap_response(big);
2024        assert_eq!(out.len(), MAX_UPSTREAM_RESPONSE, "capped to one datagram");
2025        assert_ne!(out[2] & 0x02, 0, "TC bit set on truncation");
2026    }
2027
2028    #[test]
2029    fn cap_response_leaves_small_response_untouched() {
2030        // A response that fits is returned verbatim with no TC bit forced on.
2031        let mut small = build_query(0x301, &["example", "com"], 1, 1);
2032        small[2] |= 0x80;
2033        let before = small.clone();
2034
2035        let out = cap_response(small);
2036        assert_eq!(out, before, "small response unchanged");
2037        assert_eq!(out[2] & 0x02, 0, "TC bit not set when no truncation");
2038    }
2039
2040    #[test]
2041    fn response_matches_query_rejects_mismatched_question() {
2042        // id + QR match but the echoed question differs (different QNAME) => rejected. This guards
2043        // against an off-path injector that guesses the id but answers a different question.
2044        let query = build_query(0x1234, &["a", "com"], 1, 1);
2045
2046        let mut wrong_question = build_query(0x1234, &["b", "com"], 1, 1);
2047        wrong_question[2] |= 0x80; // QR=1, same id
2048        assert!(
2049            !response_matches_query(&query, &wrong_question),
2050            "different QNAME must be rejected"
2051        );
2052
2053        // A different QTYPE with the same name is also rejected.
2054        let mut wrong_qtype = build_query(0x1234, &["a", "com"], 28, 1);
2055        wrong_qtype[2] |= 0x80;
2056        assert!(
2057            !response_matches_query(&query, &wrong_qtype),
2058            "different QTYPE must be rejected"
2059        );
2060
2061        // The exact echoed question with QR=1 is accepted.
2062        let mut good = query.clone();
2063        good[2] |= 0x80;
2064        assert!(
2065            response_matches_query(&query, &good),
2066            "matching question accepted"
2067        );
2068    }
2069
2070    #[test]
2071    fn suffix_matches_handles_boundaries_and_empty() {
2072        // Exact and label-boundary matches.
2073        assert!(suffix_matches("corp", "corp"));
2074        assert!(suffix_matches("a.corp", "corp"));
2075        assert!(suffix_matches("a.b.corp", "corp"));
2076        // Not a label boundary.
2077        assert!(!suffix_matches("acorp", "corp"));
2078        // Empty suffix never matches (defense-in-depth against `ends_with("")`).
2079        assert!(!suffix_matches("anything.example", ""));
2080        assert!(!suffix_matches("", ""));
2081    }
2082
2083    #[test]
2084    fn empty_search_domain_does_not_capture_everything() {
2085        // Defense-in-depth: an empty search domain must NOT make every name look like a tailnet
2086        // name (which would fail-close legitimate recursive queries / mis-route). With an empty
2087        // suffix present alongside a real resolver, an off-tailnet name still forwards.
2088        let mut view = view_with_routes(
2089            std::collections::BTreeMap::new(),
2090            vec![udp("8.8.8.8:53")],
2091            vec![],
2092        );
2093        view.cfg.search_domains = vec![String::new()];
2094        let buf = build_query(0x400, &["example", "com"], 1, 1);
2095
2096        match decide(&view, &buf).expect("decides") {
2097            Decision::Forward { upstreams, .. } => {
2098                assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2099            }
2100            Decision::Reply(_) => {
2101                panic!("empty search domain must not treat every name as tailnet")
2102            }
2103        }
2104    }
2105
2106    #[test]
2107    fn empty_route_suffix_does_not_capture_everything() {
2108        // Defense-in-depth: an empty route suffix must not match every name (which would route all
2109        // queries to that route's upstreams). With an empty-suffix route present, an unrelated name
2110        // still falls through to the global resolver.
2111        let mut routes = std::collections::BTreeMap::new();
2112        routes.insert(String::new(), vec![udp("10.9.9.9:53")]);
2113        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2114        let buf = build_query(0x401, &["example", "com"], 1, 1);
2115
2116        match decide(&view, &buf).expect("decides") {
2117            Decision::Forward { upstreams, .. } => {
2118                assert_eq!(
2119                    upstreams,
2120                    vec!["8.8.8.8:53".parse().unwrap()],
2121                    "empty route suffix must not capture; falls through to global"
2122                );
2123            }
2124            Decision::Reply(_) => panic!("expected forward to global resolver"),
2125        }
2126    }
2127
2128    fn udp_exit(addr: &str) -> DnsResolver {
2129        DnsResolver {
2130            transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
2131            use_with_exit_node: true,
2132        }
2133    }
2134
2135    #[test]
2136    fn recursive_forward_is_flagged_route_forward_is_not() {
2137        // A recursive (global/fallback) forward sets `recursive = true` (eligible for DoH
2138        // delegation); a deliberately-configured split-DNS route sets `recursive = false`.
2139        let mut routes = std::collections::BTreeMap::new();
2140        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
2141        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2142
2143        let routed = build_query(0x500, &["api", "corp", "example"], 1, 1);
2144        match decide(&view, &routed).expect("decides") {
2145            Decision::Forward { recursive, .. } => {
2146                assert!(!recursive, "split-DNS route is not a recursive forward")
2147            }
2148            Decision::Reply(_) => panic!("expected route forward"),
2149        }
2150
2151        let global = build_query(0x501, &["example", "com"], 1, 1);
2152        match decide(&view, &global).expect("decides") {
2153            Decision::Forward { recursive, .. } => {
2154                assert!(recursive, "unrouted name is a recursive forward")
2155            }
2156            Decision::Reply(_) => panic!("expected recursive forward"),
2157        }
2158    }
2159
2160    #[test]
2161    fn recursive_plan_keeps_udp_without_exit_node() {
2162        // No active exit node: a recursive forward stays on its default UDP upstreams.
2163        let view = view_with_routes(
2164            std::collections::BTreeMap::new(),
2165            vec![udp("8.8.8.8:53")],
2166            vec![],
2167        );
2168        let default = vec!["8.8.8.8:53".parse().unwrap()];
2169        assert_eq!(
2170            recursive_plan(&view, default.clone()),
2171            RecursivePlan::Udp(default)
2172        );
2173    }
2174
2175    #[test]
2176    fn recursive_plan_delegates_to_doh_with_exit_node() {
2177        // Exit node active, no kept-local resolvers: recursive queries delegate to the exit node's
2178        // DoH endpoint so resolution egresses from the exit node, not this host.
2179        let mut view = view_with_routes(
2180            std::collections::BTreeMap::new(),
2181            vec![udp("8.8.8.8:53")],
2182            vec![],
2183        );
2184        let doh: SocketAddr = "100.64.0.5:8080".parse().unwrap();
2185        view.exit_doh = Some(doh);
2186        assert_eq!(
2187            recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2188            RecursivePlan::Doh(doh)
2189        );
2190    }
2191
2192    #[test]
2193    fn recursive_plan_keeps_use_with_exit_node_resolvers_local() {
2194        // Even with an exit node active, resolvers flagged `use_with_exit_node` stay local (Go keeps
2195        // UseWithExitNode resolvers). The plan forwards to those over UDP, never delegating to DoH.
2196        let mut view = view_with_routes(
2197            std::collections::BTreeMap::new(),
2198            vec![udp_exit("10.0.0.53:53"), udp("8.8.8.8:53")],
2199            vec![],
2200        );
2201        view.exit_doh = Some("100.64.0.5:8080".parse().unwrap());
2202        // The default upstreams the caller computed are irrelevant when kept-local resolvers exist;
2203        // the plan must use the kept-local ones.
2204        assert_eq!(
2205            recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2206            RecursivePlan::Udp(vec!["10.0.0.53:53".parse().unwrap()])
2207        );
2208    }
2209}