Skip to main content

ts_runtime/
magic_dns.rs

1//! MagicDNS responder with a split-DNS / recursive forwarder.
2//!
3//! An in-netstack DNS server bound to `100.100.100.100:53`. It is authoritative for in-tailnet
4//! peer names and control-pushed [`ExtraRecord`][ts_control::ExtraRecord]s, answering `A`/`AAAA`/
5//! `PTR` for those directly. For names it is *not* authoritative for, it brings tsnet-style
6//! split-DNS and recursive resolution:
7//!
8//! - **Split DNS** ([`DnsConfig::routes`]): the longest matching suffix route forwards the query
9//!   to one of that route's upstream resolvers. A route with an **empty** upstream list is a
10//!   negative route — names under it are `NXDOMAIN` (Go keeps them on the built-in resolver; for
11//!   us that means fail-closed unless an overlay/extra record matched first).
12//! - **Recursive** ([`DnsConfig::fallback_resolvers`] / [`DnsConfig::resolvers`]): names matching
13//!   no route are forwarded to the fallback resolvers, else the global resolvers.
14//! - **Fail closed**: if no route and no resolver is configured, an unknown name is `NXDOMAIN`.
15//!
16//! Anti-leak / IPv6-off posture: upstream forwarding binds `0.0.0.0:0` (UDP, IPv4 only) and never
17//! opens an IPv6 socket. AAAA handling is gated on [`DnsView::enable_ipv6`] (default off): with the
18//! gate OFF an AAAA query for a tailnet/overlay/self name returns NoError with an empty answer
19//! (NODATA) rather than the overlay v6 address — answering a v6 the IPv4-only client can't route
20//! would only create dead connections and a fingerprint. With the gate ON, AAAA is answered from
21//! overlay data (the v6 overlay addr), as historically. AAAA for tailnet names is never forwarded
22//! to a recursive upstream regardless of the gate.
23//!
24//! - MagicDNS disabled (`dns_config == None` or `magic_dns == false`), OR the node does not accept
25//!   the tailnet DNS config ([`DnsView::accept_dns`] is `false`, i.e. `--accept-dns` / `CorpDNS`
26//!   off) => `REFUSED` for every query (the responder serves nothing, mirroring Go applying an empty
27//!   `dns.Config` when `CorpDNS` is off).
28//! - A qtype/class we don't serve authoritatively (anything but IN-class A/AAAA/PTR — TXT, SRV, MX,
29//!   HTTPS/SVCB, a CHAOS-class query, …) => NODATA (empty NOERROR) for a tailnet-authoritative name,
30//!   forwarded verbatim to upstream for an off-tailnet name — exactly like Go's resolver, NOT
31//!   `REFUSED` (a stub reads REFUSED as "won't serve me" and abandons the resolver). Tailnet reverse
32//!   zones (CGNAT `in-addr.arpa` / any `ip6.arpa`) still fail closed to NXDOMAIN for every qtype
33//!   (never forwarded — anti-leak).
34//! - Malformed query => dropped (no response).
35
36use std::{
37    net::{IpAddr, Ipv4Addr, SocketAddr},
38    sync::Arc,
39    time::Duration,
40};
41
42use kameo::{
43    actor::ActorRef,
44    message::{Context, Message},
45};
46use netstack::{CreateSocket, netcore::Channel};
47use tokio::{
48    sync::{Semaphore, watch},
49    task::JoinSet,
50    time::timeout,
51};
52use ts_control::{DnsConfig, DnsResolver, Node};
53use ts_dns_wire::{Name, QType, RData, Rcode, decode_query, encode_response};
54
55use crate::{
56    Error,
57    env::Env,
58    peer_tracker::{PeerDb, PeerState},
59};
60
61/// How long to wait for an upstream resolver to answer a forwarded query before giving up.
62const UPSTREAM_TIMEOUT: Duration = Duration::from_secs(5);
63/// Cap on concurrent in-flight forwarded queries on the local `100.100.100.100:53` responder.
64///
65/// Each forward is spawned onto a task that holds an overlay UDP socket until the upstream answers
66/// or [`UPSTREAM_TIMEOUT`] elapses. Without a cap, a local/tailnet client spraying distinct
67/// forwardable names opens unbounded concurrent overlay sockets + tasks (a resource-exhaustion DoS
68/// on a slow/black-holed upstream, since each lingers for the full timeout). Bound it the same way
69/// the peerAPI DoH server bounds its request handlers ([`crate::peerapi`]'s `MAX_INFLIGHT`): acquire
70/// a permit before spawning and drop the query fail-closed when saturated. A dropped DNS query is a
71/// benign outcome — the stub resolver simply retries or times out — and Go's resolver likewise
72/// bounds outstanding forwards rather than spawning without limit.
73const MAX_INFLIGHT_FORWARDS: usize = 512;
74/// Cap on a forwarded upstream response we read into memory (a single UDP datagram).
75///
76/// Matches Go's forwarder read buffer (`maxResponseBytes`, ~4 KiB). The client's query is forwarded
77/// verbatim, so a client advertising a large EDNS UDP size can elicit a legitimately large
78/// (1300–4096 byte) UDP answer (big TXT sets, DNSSEC, many-record round-robins). Capping at the old
79/// 1232 truncated those and set TC, forcing a TCP retry this fork's UDP-only forwarder can't serve —
80/// so the large answer became unreachable. 4096 relays them intact.
81const MAX_UPSTREAM_RESPONSE: usize = 4096;
82
83/// The MagicDNS service IP. The netstack interface owns this address, so a `udp_bind` here
84/// receives the tailnet's DNS traffic.
85const MAGIC_DNS_IP: Ipv4Addr = Ipv4Addr::new(100, 100, 100, 100);
86/// The DNS service port.
87const MAGIC_DNS_PORT: u16 = 53;
88
89/// The latest view the answer loop resolves queries against.
90///
91/// Updated by the actor's message handlers (from control `StateUpdate` and peer `PeerState`
92/// updates) and read fresh by the answer loop for every packet.
93#[derive(Clone, Default)]
94pub(crate) struct DnsView {
95    /// The DNS configuration. `magic_dns == false` (the default) means serve nothing.
96    pub(crate) cfg: DnsConfig,
97    /// The current peer database, if we've seen a peer update.
98    pub(crate) peers: Option<Arc<PeerDb>>,
99    /// This node, if we've seen a self-node update.
100    pub(crate) self_node: Option<Node>,
101    /// The peerAPI DoH socket address of the currently-selected exit node, if one is active and can
102    /// proxy DNS ([`Node::peerapi_doh_addr`]). When set, the MagicDNS *client* serve loop delegates
103    /// recursive resolution to this address over the overlay instead of forwarding to the locally
104    /// configured upstream resolvers — so recursive DNS egresses from the exit node, not this host.
105    ///
106    /// Only consumed by the local MagicDNS responder's serve loop (the client side). The peerAPI
107    /// DoH *server* shares this same view but ignores this field: an exit-node DNS proxy resolves
108    /// recursively itself (gated by `forward_exit_egress`), it never re-delegates to its own exit
109    /// node. `None` means no active exit node / no DoH delegation — recursion stays local.
110    pub(crate) exit_doh: Option<SocketAddr>,
111    /// Whether IPv6 is enabled on the tailnet overlay (from [`Env::enable_ipv6`], default `false`).
112    ///
113    /// Governs the AAAA answer path only: with the gate OFF (default) an AAAA query for a
114    /// tailnet/overlay/self name is answered NoError-with-empty-answer (NODATA) instead of the
115    /// overlay v6 address; with it ON, AAAA is answered from overlay data as historically. Set once
116    /// from the runtime `Env` when the actor starts; never changes for the life of the runtime.
117    pub(crate) enable_ipv6: bool,
118    /// Whether the tailnet's DNS configuration is accepted (`--accept-dns` / `CorpDNS`, from
119    /// [`Env::accept_dns`]). When `false`, [`decide`] refuses every query (the responder serves
120    /// nothing), mirroring Go applying an empty `dns.Config` when `CorpDNS` is off — so a node can
121    /// join for connectivity without taking over DNS.
122    ///
123    /// Unlike [`enable_ipv6`](DnsView::enable_ipv6) (snapshotted once at actor spawn), this is
124    /// runtime-settable via `Device::set_accept_dns`, so it is re-read from the live
125    /// [`Env::accept_dns`] cell on **every** view rebuild (the `StateUpdate` and `PeerState`
126    /// handlers), not just at spawn — otherwise a runtime toggle would never reach the served view.
127    pub(crate) accept_dns: bool,
128}
129
130impl DnsView {
131    /// Find the node (peer or self) that answers to `name`, case/dot-insensitively.
132    fn node_by_name(&self, name: &str) -> Option<Node> {
133        if let Some(node) = self
134            .peers
135            .as_ref()
136            .and_then(|p| p.get(&name).map(|(_, n)| n.clone()))
137        {
138            return Some(node);
139        }
140
141        self.self_node
142            .as_ref()
143            .filter(|n| n.matches_name(name))
144            .cloned()
145    }
146
147    /// Resolve `canon` to an answer address of the requested family. A tailnet peer/self match
148    /// wins first — tried as written and then qualified by each tailnet search domain (so a
149    /// short/partially-qualified name like `host` or `host.user` still resolves to
150    /// `host.user.ts.net`). Failing that, a control-pushed [`ExtraRecord`] of the matching family
151    /// answers, matched as a fully-qualified name only (no search-domain expansion — like Go tsnet,
152    /// ExtraRecords are authoritative FQDN entries, not subject to client search-list qualification).
153    /// Still fail-closed: only ever resolves to a known tailnet peer/self or an explicitly
154    /// control-pushed static record — never anything else.
155    fn resolve_addr(&self, canon: &str, want_v4: bool) -> Option<IpAddr> {
156        let addr_of = |node: Node| -> IpAddr {
157            if want_v4 {
158                IpAddr::from(node.tailnet_address.ipv4.addr())
159            } else {
160                IpAddr::from(node.tailnet_address.ipv6.addr())
161            }
162        };
163
164        if let Some(node) = self.node_by_name(canon) {
165            return Some(addr_of(node));
166        }
167        for suffix in &self.cfg.search_domains {
168            if let Some(node) = self.node_by_name(&format!("{canon}.{suffix}")) {
169                return Some(addr_of(node));
170            }
171        }
172
173        // Control-pushed static records match the fully-qualified query name only.
174        self.cfg.extra_records.iter().find_map(|rec| {
175            let family_ok = matches!(
176                (rec.addr, want_v4),
177                (IpAddr::V4(_), true) | (IpAddr::V6(_), false)
178            );
179            (rec.name == canon && family_ok).then_some(rec.addr)
180        })
181    }
182
183    /// Find the node (peer or self) that owns the tailnet IP `ip`.
184    fn node_by_ip(&self, ip: IpAddr) -> Option<Node> {
185        if let Some(node) = self
186            .peers
187            .as_ref()
188            .and_then(|p| p.get(&ip).map(|(_, n)| n.clone()))
189        {
190            return Some(node);
191        }
192
193        self.self_node
194            .as_ref()
195            .filter(|n| {
196                IpAddr::from(n.tailnet_address.ipv4.addr()) == ip
197                    || IpAddr::from(n.tailnet_address.ipv6.addr()) == ip
198            })
199            .cloned()
200    }
201
202    /// Decide how to resolve a non-overlay `name` against the split-DNS routes and recursive
203    /// resolvers, returning the upstreams to forward to.
204    ///
205    /// Longest-suffix wins among [`DnsConfig::routes`]: a route's suffix matches `name` if `name`
206    /// equals it or ends with `.suffix`. A matched route with a non-empty upstream list forwards
207    /// there; a matched route with an **empty** list is a negative route ([`Upstreams::Block`] =>
208    /// NXDOMAIN). With no route match, [`DnsConfig::fallback_resolvers`] (preferred) or
209    /// [`DnsConfig::resolvers`] resolve recursively; if neither is configured we stay fail-closed
210    /// ([`Upstreams::None`] => NXDOMAIN).
211    fn route_for(&self, name: &str) -> Upstreams<'_> {
212        let mut best: Option<(&str, &Vec<DnsResolver>)> = None;
213        for (suffix, upstreams) in &self.cfg.routes {
214            if suffix_matches(name, suffix) && best.is_none_or(|(b, _)| suffix.len() > b.len()) {
215                best = Some((suffix.as_str(), upstreams));
216            }
217        }
218
219        if let Some((_, upstreams)) = best {
220            return if upstreams.is_empty() {
221                Upstreams::Block
222            } else {
223                // A deliberately-configured split-DNS route: not eligible for exit-node DoH
224                // delegation — these upstreams (e.g. an internal resolver reachable over a subnet
225                // route) must keep receiving the query directly.
226                Upstreams::Route(upstreams)
227            };
228        }
229
230        if !self.cfg.fallback_resolvers.is_empty() {
231            return Upstreams::Recursive(&self.cfg.fallback_resolvers);
232        }
233        if !self.cfg.resolvers.is_empty() {
234            return Upstreams::Recursive(&self.cfg.resolvers);
235        }
236        Upstreams::None
237    }
238}
239
240/// The upstreams a non-overlay query should be forwarded to (or why it should not be forwarded).
241enum Upstreams<'a> {
242    /// A split-DNS route matched: forward to these route-specific upstreams (never DoH-delegated).
243    Route(&'a [DnsResolver]),
244    /// No route matched: forward to these recursive (fallback/global) resolvers. Eligible for
245    /// exit-node DoH delegation in the client serve loop.
246    Recursive(&'a [DnsResolver]),
247    /// A negative split-DNS route matched: do not resolve (NXDOMAIN).
248    Block,
249    /// No route and no resolver configured: fail closed (NXDOMAIN).
250    None,
251}
252
253/// What the (sync) decision step concluded for a query: either a complete response to send back,
254/// or a request to forward the original query to an upstream resolver.
255pub(crate) enum Decision {
256    /// A fully-formed response is ready to send.
257    Reply(Vec<u8>),
258    /// Forward the original query datagram to one of these upstream UDP resolvers; on success
259    /// relay the upstream answer, on failure/timeout answer NXDOMAIN with the given id+question.
260    Forward {
261        /// UDP upstreams to try, in order.
262        upstreams: Vec<SocketAddr>,
263        /// The original query bytes to forward verbatim.
264        query: Vec<u8>,
265        /// Fallback NXDOMAIN response if every upstream fails.
266        nxdomain: Vec<u8>,
267        /// Whether this is a *recursive* (catch-all fallback/global resolver) forward, as opposed
268        /// to a deliberately-configured split-DNS route. Only recursive forwards are eligible for
269        /// exit-node DoH delegation in the client serve loop (see [`DnsView::exit_doh`]); split-DNS
270        /// routes always stay on their configured upstreams (typically subnet-reachable internal
271        /// resolvers). The peerAPI DoH *server* ignores this flag entirely.
272        recursive: bool,
273    },
274}
275
276/// Whether `name` is `suffix` or sits under it at a label boundary: `"a.corp"` matches `"corp"`,
277/// `"acorp"` does not. An **empty** suffix never matches (defense-in-depth: an empty suffix would
278/// otherwise make `ends_with("")` match every name and either over-route or treat everything as a
279/// tailnet name — both leak-prone).
280fn suffix_matches(name: &str, suffix: &str) -> bool {
281    if suffix.is_empty() {
282        return false;
283    }
284    name == suffix
285        || (name.len() > suffix.len()
286            && name.ends_with(suffix)
287            && name.as_bytes()[name.len() - suffix.len() - 1] == b'.')
288}
289
290/// Returns `true` if `name` falls under one of the tailnet search domains. Such names are
291/// authoritative MagicDNS names and are NEVER forwarded to an upstream resolver — anti-leak: a
292/// tailnet name (and the fact that it was queried) must not escape to a third-party resolver.
293fn is_tailnet_name(view: &DnsView, name: &str) -> bool {
294    view.cfg
295        .search_domains
296        .iter()
297        .any(|suffix| suffix_matches(name, suffix))
298}
299
300/// Whether `name` is an IPv6 reverse-DNS (`PTR`) name (ends in `ip6.arpa`). This fork is IPv4-only
301/// on the tailnet; an IPv6 reverse lookup must NEVER be forwarded to a third-party resolver
302/// (anti-leak: it would reveal that a tailnet v6 address — e.g. a ULA `fd7a:…` — was probed). All
303/// such queries fail closed to NXDOMAIN.
304fn is_ip6_arpa(name: &str) -> bool {
305    suffix_matches(name, "ip6.arpa")
306}
307
308/// Whether `ip` is in the Tailscale CGNAT range `100.64.0.0/10` (RFC 6598, the tailnet IPv4 space).
309/// Reverse (`PTR`) queries for these addresses are authoritative to MagicDNS: if no peer owns the
310/// IP we fail closed to NXDOMAIN rather than forwarding the probe to a third-party resolver.
311fn is_tailnet_cgnat(ip: Ipv4Addr) -> bool {
312    let o = ip.octets();
313    o[0] == 100 && (64..=127).contains(&o[1])
314}
315
316/// Decide what to do with a single DNS query against `view`: either a complete response is ready
317/// ([`Decision::Reply`]), the query should be forwarded to upstream resolvers
318/// ([`Decision::Forward`]), or the packet should be dropped without answering (`None`).
319///
320/// Pure (no I/O), factored out of the socket loop so it can be unit-tested without a netstack. It
321/// never panics and fails closed: an unknown, unroutable, or tailnet-suffix name resolves to
322/// NXDOMAIN rather than leaking to an upstream resolver.
323pub(crate) fn decide(view: &DnsView, buf: &[u8]) -> Option<Decision> {
324    // Malformed / non-query input is dropped: we never answer something we can't parse.
325    let query = decode_query(buf).ok()?;
326    let q = &query.question;
327    let id = query.id;
328
329    let reply = |rcode, answers: &[RData]| Decision::Reply(encode_response(id, q, rcode, answers));
330
331    // Fail closed: MagicDNS off, or the node doesn't accept the tailnet's DNS config
332    // (`--accept-dns` / `CorpDNS` is false) => serve nothing. The `accept_dns` gate mirrors Go
333    // applying an empty `dns.Config` when `CorpDNS` is off: the node ignores the control-pushed DNS
334    // config and refuses every query. This one read site covers the netstack responder, the peerAPI
335    // DoH server that shares the view, and (via `tun_actor::plan_intercept`) the TUN query path.
336    if !view.cfg.magic_dns || !view.accept_dns {
337        return Some(reply(Rcode::Refused, &[]));
338    }
339
340    let canon = q.name.to_canon();
341
342    // We only serve the internet (IN) class authoritatively. A non-IN class (CHAOS, HESIOD, the
343    // ANY/255 class, ...) is NOT refused outright: Go's local resolver does no class check and
344    // forwards such a query like any other name. Treat it as an unsupported authoritative type —
345    // NODATA for a tailnet name, forward for an off-tailnet name — so a `CH TXT version.bind`
346    // diagnostic or a `qclass=ANY` probe reaches upstream instead of getting REFUSED.
347    const CLASS_IN: u16 = 1;
348    if q.qclass != CLASS_IN {
349        return Some(forward_or_nodata(view, &canon, buf, id, q));
350    }
351
352    Some(match &q.qtype {
353        QType::A => match view.resolve_addr(&canon, true) {
354            Some(IpAddr::V4(v4)) => reply(Rcode::NoError, &[RData::A(v4.octets())]),
355            // No overlay/extra-record answer: try split-DNS / recursive upstreams.
356            _ => forward_or_nxdomain(view, &canon, buf, id, q),
357        },
358        QType::Aaaa => match view.resolve_addr(&canon, false) {
359            // A tailnet/overlay/self (or extra-record) AAAA match. Gate on IPv6: with IPv6 OFF
360            // (default) the client is IPv4-only, so answering with the overlay v6 address would
361            // only hand out an unroutable address — dead connections plus a fingerprint. Return
362            // NoError with an empty answer (NODATA) instead. With the gate ON, answer from overlay
363            // data as historically. We never forward this name to a recursive upstream either way:
364            // a positive overlay match is authoritative.
365            Some(IpAddr::V6(v6)) if view.enable_ipv6 => {
366                reply(Rcode::NoError, &[RData::Aaaa(v6.octets())])
367            }
368            Some(IpAddr::V6(_)) => reply(Rcode::NoError, &[]),
369            // No overlay/extra-record answer: split-DNS / recursive upstreams (off-tailnet names);
370            // tailnet names fail closed to NXDOMAIN inside `forward_or_nxdomain`.
371            _ => forward_or_nxdomain(view, &canon, buf, id, q),
372        },
373        QType::Ptr => match q.name.ptr_to_ipv4() {
374            Some(octets) => {
375                let v4: Ipv4Addr = octets.into();
376                let ip = IpAddr::V4(v4);
377                match view.node_by_ip(ip) {
378                    Some(node) => {
379                        let fqdn = node.fqdn(false);
380                        let labels: Vec<String> = fqdn.split('.').map(str::to_owned).collect();
381                        reply(Rcode::NoError, &[RData::Ptr(Name(labels))])
382                    }
383                    // Anti-leak: a reverse query for an IP in the tailnet CGNAT range
384                    // (100.64.0.0/10) that misses the peer set is authoritative-but-unknown; fail
385                    // closed to NXDOMAIN rather than leaking the probed tailnet IP upstream. Only
386                    // genuinely off-tailnet reverse queries are forwarded.
387                    None if is_tailnet_cgnat(v4) => reply(Rcode::NxDomain, &[]),
388                    None => forward_or_nxdomain(view, &canon, buf, id, q),
389                }
390            }
391            // Anti-leak / IPv4-only-tailnet: an IPv6 reverse (`ip6.arpa`) PTR must never be
392            // forwarded — relaying it would reveal that a tailnet v6 address (e.g. a ULA `fd7a:…`)
393            // was probed. Fail closed to NXDOMAIN, exactly like the IPv4 CGNAT guard above.
394            None if is_ip6_arpa(&canon) => reply(Rcode::NxDomain, &[]),
395            None => forward_or_nxdomain(view, &canon, buf, id, q),
396        },
397        // Anything else (TXT, SRV, MX, HTTPS/SVCB, CNAME, ...): we hold no authoritative record of
398        // that type, so — like Go's resolver — forward it to upstream for an off-tailnet name and
399        // return NODATA (empty NOERROR) for a tailnet-authoritative name. NOT REFUSED: a stub reads
400        // REFUSED as "this server won't serve me" and abandons the resolver, which would break
401        // ordinary client lookups (notably HTTPS/SVCB type 65, issued routinely by browsers for
402        // HTTP/3 + ECH) for the same off-tailnet names whose A/AAAA already forward.
403        QType::Other(_) => forward_or_nodata(view, &canon, buf, id, q),
404    })
405}
406
407/// For a name with no overlay answer, consult the split-DNS routes + recursive resolvers and
408/// either forward (to UDP upstreams) or fail closed with NXDOMAIN.
409///
410/// Anti-leak: a name under a tailnet search domain is authoritative and is never forwarded — it
411/// fails closed to NXDOMAIN so neither the name nor the query leaks to a third-party resolver.
412fn forward_or_nxdomain(
413    view: &DnsView,
414    canon: &str,
415    buf: &[u8],
416    id: u16,
417    q: &ts_dns_wire::Question,
418) -> Decision {
419    let nxdomain = encode_response(id, q, Rcode::NxDomain, &[]);
420
421    if is_tailnet_name(view, canon) {
422        return Decision::Reply(nxdomain);
423    }
424
425    let (resolvers, recursive) = match view.route_for(canon) {
426        Upstreams::Route(resolvers) => (resolvers, false),
427        Upstreams::Recursive(resolvers) => (resolvers, true),
428        // Negative route or nothing configured: fail closed.
429        Upstreams::Block | Upstreams::None => return Decision::Reply(nxdomain),
430    };
431
432    let upstreams: Vec<SocketAddr> = resolvers
433        .iter()
434        .map(DnsResolver::udp_addr)
435        // Anti-leak / IPv6-off: only forward over IPv4 upstreams; never open a v6 socket.
436        .filter(SocketAddr::is_ipv4)
437        .collect();
438    if upstreams.is_empty() {
439        Decision::Reply(nxdomain)
440    } else {
441        Decision::Forward {
442            upstreams,
443            query: buf.to_vec(),
444            nxdomain,
445            recursive,
446        }
447    }
448}
449
450/// For a query whose *qtype/qclass* we don't serve authoritatively (anything other than an IN-class
451/// A/AAAA/PTR — e.g. TXT, SRV, MX, HTTPS/SVCB, or a CHAOS-class query): forward it to upstream like
452/// any other name, but for a tailnet-authoritative name return an empty NOERROR (NODATA) instead of
453/// NXDOMAIN.
454///
455/// This mirrors Go's resolver: an authoritative name with no record of the requested type returns
456/// `RCodeSuccess` with no answers ("the name exists, but no records of that type"), NOT NXDOMAIN and
457/// NOT REFUSED; a non-authoritative name is forwarded verbatim regardless of qtype. The fork
458/// previously REFUSED every non-A/AAAA/PTR qtype (and every non-IN class) for *all* names, which a
459/// stub resolver reads as "this server won't serve me" — so it would abandon the resolver, breaking
460/// ordinary client lookups (HTTPS/SVCB type 65 issued routinely by browsers for HTTP/3 + ECH, plus
461/// MX/TXT/SRV) for off-tailnet names that A/AAAA queries already forward. Refusing these was never an
462/// anti-leak measure (the same name's A/AAAA already egresses); it was just broken interop.
463///
464/// Anti-leak is preserved: a tailnet-suffix name still never leaves this node (NODATA, not forward),
465/// exactly as the A/AAAA path keeps a positive overlay match authoritative.
466fn forward_or_nodata(
467    view: &DnsView,
468    canon: &str,
469    buf: &[u8],
470    id: u16,
471    q: &ts_dns_wire::Question,
472) -> Decision {
473    // Authoritative tailnet name: NODATA (empty NOERROR), not NXDOMAIN — the name exists.
474    if is_tailnet_name(view, canon) {
475        return Decision::Reply(encode_response(id, q, Rcode::NoError, &[]));
476    }
477    // Anti-leak parity with the `QType::Ptr` arm: a reverse query for a tailnet CGNAT IPv4
478    // (100.64.0.0/10) or ANY `ip6.arpa` name must NEVER egress to an upstream resolver, regardless
479    // of qtype/class — forwarding it would reveal that a specific tailnet IP was probed. The PTR arm
480    // enforces this (NXDOMAIN) but its guards live only inside that arm; without re-checking here, an
481    // exotic-qtype (TXT/ANY/…) or non-IN-class query for a tailnet reverse name would slip through to
482    // the forward path below. Fail closed to NXDOMAIN, matching the PTR arm's disposition.
483    if is_ip6_arpa(canon) {
484        return Decision::Reply(encode_response(id, q, Rcode::NxDomain, &[]));
485    }
486    if let Some(octets) = q.name.ptr_to_ipv4()
487        && is_tailnet_cgnat(octets.into())
488    {
489        return Decision::Reply(encode_response(id, q, Rcode::NxDomain, &[]));
490    }
491    // Off-tailnet, non-reverse-zone: forward verbatim. `forward_or_nxdomain` already forwards
492    // non-tailnet names and fails closed (NXDOMAIN) when no upstream is configured/routable; reuse it
493    // (the tailnet branch above is already handled, so its tailnet→NXDOMAIN path is unreachable here).
494    forward_or_nxdomain(view, canon, buf, id, q)
495}
496
497/// Client-side plan for a *recursive* forward: keep resolving over local UDP upstreams, or delegate
498/// the query to the active exit node's peerAPI DoH endpoint over the overlay.
499#[derive(Debug, PartialEq, Eq)]
500pub(crate) enum RecursivePlan {
501    /// Forward over UDP to these upstreams. Used when no exit node is active, or when the config
502    /// has `use_with_exit_node` resolvers (kept local even with an exit node selected).
503    Udp(Vec<SocketAddr>),
504    /// Delegate the query to the exit node's peerAPI DoH server at this overlay address.
505    Doh(SocketAddr),
506}
507
508/// Decide whether a recursive forward should stay on local UDP upstreams or be delegated to the
509/// active exit node's DoH endpoint. Pure (no I/O) so the delegation rule is unit-testable.
510///
511/// - No active exit node ([`DnsView::exit_doh`] is `None`) => keep `default_upstreams` (UDP).
512/// - Exit node active, but the config has [`use_with_exit_node`][ts_control::DnsResolver::use_with_exit_node]
513///   resolvers => those resolvers stay local (Go keeps `UseWithExitNode` resolvers when an exit node
514///   is selected); forward to them over UDP, do NOT delegate.
515/// - Exit node active, no kept-local resolvers => delegate to the exit node's DoH. Recursive DNS
516///   then egresses from the exit node, not this host (the whole point of routing through an exit
517///   node: this node's real IP is never used to resolve the peer's public names).
518pub(crate) fn recursive_plan(view: &DnsView, default_upstreams: Vec<SocketAddr>) -> RecursivePlan {
519    let Some(doh) = view.exit_doh else {
520        return RecursivePlan::Udp(default_upstreams);
521    };
522    let kept: Vec<SocketAddr> = view
523        .cfg
524        .resolvers_with_exit_node()
525        .map(DnsResolver::udp_addr)
526        // Anti-leak / IPv6-off: only ever resolve over IPv4 upstreams; never open a v6 socket.
527        .filter(SocketAddr::is_ipv4)
528        .collect();
529    if kept.is_empty() {
530        RecursivePlan::Doh(doh)
531    } else {
532        RecursivePlan::Udp(kept)
533    }
534}
535
536/// Cap a forwarded upstream response to a single UDP datagram ([`MAX_UPSTREAM_RESPONSE`]). When the
537/// response is too large it is truncated mid-message, so we set the `TC` (truncation) flag in the
538/// DNS header (byte 2, bit `0x02`) telling the stub resolver to retry over TCP — relaying a chopped
539/// answer without `TC` would surface a malformed-but-"complete" message. The flag is only set when
540/// truncation actually occurs.
541fn cap_response(mut resp: Vec<u8>) -> Vec<u8> {
542    if resp.len() > MAX_UPSTREAM_RESPONSE {
543        resp.truncate(MAX_UPSTREAM_RESPONSE);
544        // The header is 12 bytes; the TC bit lives in the second flags byte (header byte 2). A
545        // capped datagram is always >= the header length, but guard anyway to never panic.
546        if let Some(flags_hi) = resp.get_mut(2) {
547            *flags_hi |= 0x02;
548        }
549    }
550    resp
551}
552
553/// The byte length of a fixed DNS header.
554const DNS_HEADER_LEN: usize = 12;
555
556/// Return the byte range of the first question section (QNAME + QTYPE + QCLASS) within `msg`,
557/// starting just after the 12-byte header. Returns [`None`] if the name is malformed, uses a
558/// compression pointer (illegal in a question), or runs past the buffer. Used to byte-compare a
559/// forwarded query's question against the upstream response's question.
560fn question_range(msg: &[u8]) -> Option<std::ops::Range<usize>> {
561    let mut off = DNS_HEADER_LEN;
562    // Walk the QNAME label sequence to the terminating root label (0x00).
563    loop {
564        let len = *msg.get(off)? as usize;
565        // A compression pointer (top two bits set) is not valid in a question section.
566        if len & 0xC0 != 0 {
567            return None;
568        }
569        off += 1;
570        if len == 0 {
571            break; // root label: QNAME complete.
572        }
573        off = off.checked_add(len)?;
574        if off > msg.len() {
575            return None;
576        }
577    }
578    // QTYPE (2) + QCLASS (2) follow the name.
579    let end = off.checked_add(4)?;
580    if end > msg.len() {
581        return None;
582    }
583    Some(DNS_HEADER_LEN..end)
584}
585
586/// Whether `resp` is a plausible DNS response to `query`: same 16-bit transaction id, the QR
587/// (response) bit set, and a byte-identical question section (QNAME + QTYPE + QCLASS). Both buffers
588/// carry the DNS header in the first 12 bytes (id at [0..2], flags at [2..4], QR is the high bit of
589/// byte 2). Used to reject off-path/forged datagrams before relaying them back to the stub resolver
590/// as authoritative: matching only the id + QR lets an injector that guesses the id swap in an
591/// answer for a different question, so we also require the echoed question to match.
592fn response_matches_query(query: &[u8], resp: &[u8]) -> bool {
593    if query.len() < DNS_HEADER_LEN || resp.len() < DNS_HEADER_LEN {
594        return false;
595    }
596    let id_matches = query[0..2] == resp[0..2];
597    let is_response = resp[2] & 0x80 != 0;
598    if !id_matches || !is_response {
599        return false;
600    }
601    // The response must echo the exact question we asked. Parse both question sections and compare
602    // their bytes; a parse failure on either side is treated as a non-match (fail closed).
603    match (question_range(query), question_range(resp)) {
604        (Some(q), Some(r)) => query[q] == resp[r],
605        _ => false,
606    }
607}
608
609/// Forward `query` to each upstream in order over the **overlay** netstack, returning the first
610/// well-formed response, or `nxdomain` if every upstream times out or errors.
611///
612/// Anti-leak: forwarding goes through the overlay netstack `channel` (a fresh `0.0.0.0:0` overlay
613/// UDP socket per query), NEVER a host socket — so the real origin IP can't leak to the resolver,
614/// and split-DNS upstreams reachable only over the tailnet/subnet-router work. Each upstream is
615/// bounded by [`UPSTREAM_TIMEOUT`]; responses are capped at [`MAX_UPSTREAM_RESPONSE`].
616pub(crate) async fn forward_query(
617    channel: &Channel,
618    upstreams: &[SocketAddr],
619    query: &[u8],
620    nxdomain: Vec<u8>,
621) -> Vec<u8> {
622    for upstream in upstreams {
623        let socket = match channel
624            .udp_bind(SocketAddr::from((Ipv4Addr::UNSPECIFIED, 0)))
625            .await
626        {
627            Ok(s) => s,
628            Err(e) => {
629                tracing::warn!(error = %e, %upstream, "magic dns upstream bind failed");
630                continue;
631            }
632        };
633
634        if let Err(e) = socket.send_to(*upstream, query).await {
635            tracing::warn!(error = %e, %upstream, "magic dns upstream send failed");
636            continue;
637        }
638
639        match timeout(UPSTREAM_TIMEOUT, socket.recv_from_bytes()).await {
640            Ok(Ok((from, resp))) if !resp.is_empty() => {
641                // Anti-poisoning: only accept a datagram that came from the upstream we queried
642                // and whose DNS header matches this query (same transaction id, QR=response bit
643                // set). An off-path injector racing the real answer is otherwise relayed straight
644                // back to the stub resolver as authoritative.
645                if from.ip() != upstream.ip() || !response_matches_query(query, &resp) {
646                    tracing::debug!(%upstream, %from, "magic dns dropping unsolicited/mismatched response");
647                    continue;
648                }
649                return cap_response(resp.to_vec());
650            }
651            Ok(Ok(_)) => continue,
652            Ok(Err(e)) => {
653                tracing::warn!(error = %e, %upstream, "magic dns upstream recv failed");
654                continue;
655            }
656            Err(_) => {
657                tracing::debug!(%upstream, "magic dns upstream timed out");
658                continue;
659            }
660        }
661    }
662    nxdomain
663}
664
665/// Run the receive/answer loop for the bound socket until it (or the netstack) goes away.
666///
667/// Authoritative answers are sent inline. Forwarded queries are handled on spawned tasks (each
668/// cloning the overlay `channel`) so a slow upstream never blocks other queries.
669async fn serve(
670    socket: netstack::netsock::UdpSocket,
671    rx: watch::Receiver<Arc<DnsView>>,
672    channel: Channel,
673) {
674    let socket = Arc::new(socket);
675    let mut forwards = JoinSet::new();
676    // Bounds concurrent in-flight forwards (see `MAX_INFLIGHT_FORWARDS`); a permit is held for the
677    // lifetime of each spawned forward task and released on completion.
678    let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
679    loop {
680        let (src, buf) = match socket.recv_from_bytes().await {
681            Ok(pkt) => pkt,
682            Err(e) => {
683                tracing::warn!(error = %e, "magic dns socket recv failed, stopping responder");
684                return;
685            }
686        };
687
688        // Read the freshest view per packet.
689        let view = rx.borrow().clone();
690
691        match decide(&view, &buf) {
692            // Malformed query: drop silently.
693            None => continue,
694            Some(Decision::Reply(resp)) => {
695                if let Err(e) = socket.send_to(src, &resp).await {
696                    tracing::warn!(error = %e, %src, "magic dns response send failed");
697                }
698            }
699            Some(Decision::Forward {
700                upstreams,
701                query,
702                nxdomain,
703                recursive,
704            }) => {
705                // A recursive forward is eligible for exit-node DoH delegation; a split-DNS route
706                // always stays on its configured upstreams. Decide the plan against the current
707                // view so a query routed while an exit node is active egresses from that exit node.
708                let plan = if recursive {
709                    recursive_plan(&view, upstreams)
710                } else {
711                    RecursivePlan::Udp(upstreams)
712                };
713                // Fail closed at the in-flight cap: drop the query (the stub resolver retries or
714                // times out) rather than spawn an unbounded task that pins an overlay socket for up
715                // to UPSTREAM_TIMEOUT. The permit is moved into the task as a named `_permit` binding
716                // (NOT `let _ =`, which would drop it immediately) so it is released only when the
717                // task body completes.
718                let Ok(permit) = inflight.clone().try_acquire_owned() else {
719                    tracing::warn!(
720                        %src,
721                        max = MAX_INFLIGHT_FORWARDS,
722                        "magic dns drop: at max in-flight forwarded queries"
723                    );
724                    continue;
725                };
726                let socket = socket.clone();
727                let channel = channel.clone();
728                forwards.spawn(async move {
729                    let _permit = permit;
730                    let resp = match plan {
731                        RecursivePlan::Udp(upstreams) => {
732                            forward_query(&channel, &upstreams, &query, nxdomain).await
733                        }
734                        RecursivePlan::Doh(doh_addr) => {
735                            crate::peerapi_doh::forward_doh(&channel, doh_addr, &query, nxdomain)
736                                .await
737                        }
738                    };
739                    if let Err(e) = socket.send_to(src, &resp).await {
740                        tracing::warn!(error = %e, %src, "magic dns forwarded response send failed");
741                    }
742                });
743            }
744        }
745
746        // Reap finished forward tasks without blocking. The unreaped completed-handle backlog is
747        // bounded by MAX_INFLIGHT_FORWARDS (a task spawns only after acquiring a permit, and there
748        // are at most that many), so this bounds JoinSet memory too — not just the reap cadence.
749        while forwards.try_join_next().is_some() {}
750    }
751}
752
753/// The MagicDNS responder actor.
754///
755/// Subscribes to control state (for the DNS config + self node) and peer state (for the peer
756/// database), keeping a [`DnsView`] that the spawned answer loop reads for every query.
757pub struct MagicDnsActor {
758    /// Keeps the socket-serving task alive for the lifetime of the actor.
759    _joinset: JoinSet<()>,
760    /// The latest view, shared with the answer loop.
761    view_tx: watch::Sender<Arc<DnsView>>,
762    /// The runtime [`Env`], retained so each view rebuild (the `StateUpdate` / `PeerState` handlers)
763    /// can re-read the live [`Env::accept_dns`] cell. Unlike `enable_ipv6` (snapshotted once at
764    /// spawn), `accept_dns` is runtime-settable via `Device::set_accept_dns`, so it must be read at
765    /// rebuild time — not captured once — for a toggle to reach the served view.
766    env: Env,
767    /// The overlay channel, retained so the [`Query`] handler can run a query through the same
768    /// forward path the serve loop uses ([`forward_query`] / [`forward_doh`], both binding
769    /// `0.0.0.0:0` on this channel — never a host socket).
770    channel: Channel,
771}
772
773/// A programmatic DNS query routed through the live MagicDNS responder (the `100.100.100.100` path),
774/// for [`Device::query_dns`](crate::Device::query_dns). The handler synthesizes a query packet and
775/// drives it through the exact same [`decide`]/forward logic as an on-the-wire query, so the result
776/// (and its anti-leak posture) matches what a tailnet client would observe.
777pub struct Query {
778    /// The canonical name to resolve (e.g. `example.com`, no trailing dot).
779    pub name: String,
780    /// The DNS query type (`1`=A, `28`=AAAA, `12`=PTR, or any other RFC 1035 TYPE).
781    pub qtype: u16,
782}
783
784/// The outcome of a `Query`: the raw DNS response bytes, the RCODE, and which upstream resolvers
785/// (if any) were consulted. The response is returned as raw bytes (matching Go `LocalClient.QueryDNS`)
786/// rather than parsed records — this fork's wire codec has no answer-record decoder.
787///
788/// (`Query` is the crate-internal actor message; not linked here as it is a private item — a
789/// `pub` doc cannot intra-doc-link to it without erroring under the doc-lint gate.)
790#[derive(Debug, Clone, kameo::Reply)]
791pub struct DnsQueryResult {
792    /// The raw DNS response datagram (header + question + any answer records).
793    pub response: Vec<u8>,
794    /// The RCODE from the response header's low 4 bits (`0`=NoError, `2`=SERVFAIL, `3`=NXDOMAIN,
795    /// `5`=Refused, …).
796    pub rcode: u8,
797    /// The upstream resolver(s) the query was forwarded to. For a UDP forward this is the candidate
798    /// list tried in order (the forwarder returns on the first that answers); for an exit-node DoH
799    /// forward it is the single DoH endpoint. Empty for a locally-answered query (an authoritative
800    /// tailnet name, a NODATA, or a fail-closed NXDOMAIN — nothing egressed).
801    pub resolvers_consulted: Vec<SocketAddr>,
802}
803
804impl kameo::Actor for MagicDnsActor {
805    type Args = (Env, Channel);
806    type Error = Error;
807
808    async fn on_start(
809        (env, channel): Self::Args,
810        slf: ActorRef<Self>,
811    ) -> Result<Self, Self::Error> {
812        env.subscribe::<Arc<ts_control::StateUpdate>>(&slf).await?;
813        env.subscribe::<Arc<PeerState>>(&slf).await?;
814        env.subscribe::<crate::route_updater::ActiveExitNode>(&slf)
815            .await?;
816
817        // Seed the view with the runtime's IPv6 gate (default off) and the current accept-dns value.
818        // Subsequent control/peer updates clone-and-modify this view: `enable_ipv6` (set once here)
819        // is preserved, while `accept_dns` is re-read live from `Env` on every rebuild (it is
820        // runtime-settable). The seed value is moot — no query is served before the first
821        // StateUpdate — but seeding it keeps the pre-update view internally consistent.
822        let (view_tx, view_rx) = watch::channel(Arc::new(DnsView {
823            enable_ipv6: env.enable_ipv6,
824            accept_dns: env.accept_dns(),
825            ..DnsView::default()
826        }));
827
828        let mut joinset = JoinSet::new();
829
830        // Bind the MagicDNS socket. If the bind fails we still start (fail closed: the actor just
831        // never answers anything) so a transient bind error doesn't take down the runtime.
832        let addr = SocketAddr::from((MAGIC_DNS_IP, MAGIC_DNS_PORT));
833        match channel.udp_bind(addr).await {
834            Ok(socket) => {
835                tracing::debug!(%addr, "magic dns responder bound");
836                joinset.spawn(serve(socket, view_rx.clone(), channel.clone()));
837            }
838            Err(e) => {
839                tracing::error!(error = %e, %addr, "magic dns udp bind failed; responder inert");
840            }
841        }
842
843        // When this node advertises a peerAPI port, run the single peerAPI server on the same shared
844        // view. It routes `/dns-query` to the exit-node DoH handler (recursive resolution gated by
845        // `forward_exit_egress`, see `peerapi_doh`) and `/v0/put/<name>` to the Taildrop receive
846        // handler when a store is configured (access-gated, fail-closed, see `peerapi`).
847        if let Some(port) = env.peerapi_port {
848            let channel = channel.clone();
849            let view_rx = view_rx.clone();
850            let forward_exit_egress = env.forward_exit_egress;
851            let taildrop = env.taildrop_store.clone();
852            let funnel_ingress = env.funnel_ingress.clone();
853            joinset.spawn(crate::peerapi::serve(
854                channel,
855                port,
856                view_rx,
857                forward_exit_egress,
858                taildrop,
859                funnel_ingress,
860            ));
861        }
862
863        Ok(Self {
864            _joinset: joinset,
865            view_tx,
866            env,
867            channel,
868        })
869    }
870}
871
872/// A bare SERVFAIL response header for a [`Query`] whose name could not be encoded into a
873/// well-formed query (a non-ASCII label or an over-255-byte name). A 12-byte header with QR=1 (this
874/// is a response) and RCODE=2 (server failure); no question or answer section (we never produced a
875/// parseable question). Lets `query_dns` return a definite, honest RCODE instead of an empty buffer
876/// that would read back as a fabricated NoError.
877fn servfail_response() -> Vec<u8> {
878    let mut resp = vec![0u8; 12];
879    // Flags: QR=1 (byte 2, 0x80) + RCODE=2 (low nibble of byte 3). All other bits clear.
880    resp[2] = 0x80;
881    resp[3] = 0x02;
882    resp
883}
884
885impl Message<Query> for MagicDnsActor {
886    type Reply = DnsQueryResult;
887
888    async fn handle(&mut self, query: Query, _ctx: &mut Context<Self, Self::Reply>) -> Self::Reply {
889        // Synthesize a query packet and drive it through the SAME decide/forward path the serve loop
890        // uses, against the freshest view — so the result and its anti-leak posture exactly match an
891        // on-the-wire query. The id is fixed (0): a programmatic query has no concurrent-demux need,
892        // and `response_matches_query` validates the echoed id against this same buffer.
893        //
894        // Normalize the name into labels: strip a single trailing dot (an FQDN's root marker — Go's
895        // `dnsname.ToFQDN` does the same) and drop empty labels. An empty label would otherwise encode
896        // as a lone `0x00`, identical to the QNAME root terminator, truncating the wire query and
897        // corrupting the QTYPE/QCLASS that follow.
898        let trimmed = query.name.strip_suffix('.').unwrap_or(&query.name);
899        let labels: Vec<String> = trimmed
900            .split('.')
901            .filter(|label| !label.is_empty())
902            .map(str::to_owned)
903            .collect();
904        let qtype = match query.qtype {
905            1 => ts_dns_wire::QType::A,
906            28 => ts_dns_wire::QType::Aaaa,
907            12 => ts_dns_wire::QType::Ptr,
908            other => ts_dns_wire::QType::Other(other),
909        };
910        // Class IN (1) — the only class the responder serves authoritatively (a non-IN class still
911        // forwards via `forward_or_nodata`, matching the on-the-wire path).
912        let buf = ts_dns_wire::encode_query(0, &ts_dns_wire::Name(labels), &qtype, 1);
913
914        let view = self.view_tx.borrow().clone();
915
916        let (response, resolvers_consulted) = match decide(&view, &buf) {
917            // `decide` returns `None` only when `decode_query` rejects the buffer we just built. With
918            // the name normalized above that can still happen for a name `encode_query` accepts but
919            // `decode_query` rejects — a non-ASCII/IDN label (the caller must pass punycode) or a name
920            // whose wire form exceeds 255 bytes. Surface a SERVFAIL (RCODE 2: "could not process")
921            // rather than an empty buffer that would read back as a fabricated NoError. The serve loop
922            // silently drops here (the on-wire client times out); a programmatic caller gets a
923            // definite, honest error instead.
924            None => (servfail_response(), Vec::new()),
925            Some(Decision::Reply(resp)) => (resp, Vec::new()),
926            Some(Decision::Forward {
927                upstreams,
928                query,
929                nxdomain,
930                recursive,
931            }) => {
932                let plan = if recursive {
933                    recursive_plan(&view, upstreams)
934                } else {
935                    RecursivePlan::Udp(upstreams)
936                };
937                match plan {
938                    RecursivePlan::Udp(upstreams) => {
939                        let resp = forward_query(&self.channel, &upstreams, &query, nxdomain).await;
940                        (resp, upstreams)
941                    }
942                    RecursivePlan::Doh(doh_addr) => {
943                        let resp = crate::peerapi_doh::forward_doh(
944                            &self.channel,
945                            doh_addr,
946                            &query,
947                            nxdomain,
948                        )
949                        .await;
950                        // The query egressed via the exit node's DoH endpoint, not a local UDP
951                        // upstream — report the DoH address as the resolver consulted.
952                        (resp, vec![doh_addr])
953                    }
954                }
955            }
956        };
957
958        // RCODE is the low 4 bits of the second flags byte (header byte 3).
959        let rcode = response.get(3).map(|b| b & 0x0F).unwrap_or(0);
960
961        DnsQueryResult {
962            response,
963            rcode,
964            resolvers_consulted,
965        }
966    }
967}
968
969impl Message<Arc<ts_control::StateUpdate>> for MagicDnsActor {
970    type Reply = ();
971
972    async fn handle(
973        &mut self,
974        update: Arc<ts_control::StateUpdate>,
975        _ctx: &mut Context<Self, Self::Reply>,
976    ) {
977        // Re-read the live accept-dns cell on every rebuild (it is runtime-settable via
978        // `Device::set_accept_dns`); `enable_ipv6` is preserved from the seed (set once at spawn).
979        let accept_dns = self.env.accept_dns();
980        self.view_tx.send_modify(|view| {
981            let mut next = (**view).clone();
982            next.cfg = update.dns_config.clone().unwrap_or_default();
983            next.self_node = update.node.clone();
984            next.accept_dns = accept_dns;
985            *view = Arc::new(next);
986        });
987    }
988}
989
990impl Message<Arc<PeerState>> for MagicDnsActor {
991    type Reply = ();
992
993    async fn handle(&mut self, state: Arc<PeerState>, _ctx: &mut Context<Self, Self::Reply>) {
994        // Re-read the live accept-dns cell on every rebuild: `Device::set_accept_dns` triggers a
995        // `RepublishState` that lands here, so this is the path that re-applies the gate after a
996        // runtime toggle (covers the netstack responder AND the peerAPI DoH server sharing the view).
997        let accept_dns = self.env.accept_dns();
998        self.view_tx.send_modify(|view| {
999            let mut next = (**view).clone();
1000            next.peers = Some(state.peers.clone());
1001            next.accept_dns = accept_dns;
1002            *view = Arc::new(next);
1003        });
1004    }
1005}
1006
1007impl Message<crate::route_updater::ActiveExitNode> for MagicDnsActor {
1008    type Reply = ();
1009
1010    async fn handle(
1011        &mut self,
1012        active: crate::route_updater::ActiveExitNode,
1013        _ctx: &mut Context<Self, Self::Reply>,
1014    ) {
1015        // Cache the active exit node's DoH endpoint so the serve loop delegates recursive queries
1016        // to it. `None` (no exit node, or one that can't proxy DNS) keeps recursion local. Resolving
1017        // the address here — once, from the route updater's authoritative selection — means the
1018        // serve loop never re-resolves the selector.
1019        let exit_doh = active.node.as_ref().and_then(|n| n.peerapi_doh_addr());
1020        self.view_tx.send_modify(|view| {
1021            let mut next = (**view).clone();
1022            next.exit_doh = exit_doh;
1023            *view = Arc::new(next);
1024        });
1025    }
1026}
1027
1028#[cfg(test)]
1029mod tests {
1030    use ts_control::{StableNodeId, TailnetAddress};
1031
1032    use super::*;
1033
1034    /// Test wrapper: run [`decide`] and extract the reply bytes. These tests configure no
1035    /// upstream resolvers, so an unresolved name fails closed to a `Reply` (NXDOMAIN), never a
1036    /// `Forward`; a `Forward` here is a bug and panics.
1037    fn answer(view: &DnsView, buf: &[u8]) -> Option<Vec<u8>> {
1038        match decide(view, buf)? {
1039            Decision::Reply(resp) => Some(resp),
1040            Decision::Forward { .. } => panic!("unexpected forward in authoritative-only test"),
1041        }
1042    }
1043
1044    /// Build a `Node` named `host.user.ts.net` with a known v4/v6 tailnet address.
1045    fn test_node() -> Node {
1046        Node {
1047            id: 1,
1048            stable_id: StableNodeId("n1".to_string()),
1049            hostname: "host".to_string(),
1050            user_id: 0,
1051            tailnet: Some("user.ts.net".to_string()),
1052            tags: vec![],
1053            tailnet_address: TailnetAddress {
1054                ipv4: "100.64.0.1/32".parse().unwrap(),
1055                ipv6: "fd7a::1/128".parse().unwrap(),
1056            },
1057            node_key: [0u8; 32].into(),
1058            node_key_expiry: None,
1059            online: None,
1060            last_seen: None,
1061            key_signature: vec![],
1062            machine_key: None,
1063            disco_key: None,
1064            accepted_routes: vec![],
1065            underlay_addresses: vec![],
1066            derp_region: None,
1067            cap: Default::default(),
1068            cap_map: Default::default(),
1069            peerapi_port: None,
1070            peerapi_dns_proxy: false,
1071            is_wireguard_only: false,
1072            exit_node_dns_resolvers: vec![],
1073            peer_relay: false,
1074            service_vips: Default::default(),
1075        }
1076    }
1077
1078    /// A view with MagicDNS on and a single peer in the db.
1079    fn view_with_peer() -> DnsView {
1080        let mut db = PeerDb::default();
1081        db.upsert(&test_node());
1082
1083        DnsView {
1084            cfg: DnsConfig {
1085                magic_dns: true,
1086                search_domains: vec!["user.ts.net".to_string()],
1087                ..Default::default()
1088            },
1089            peers: Some(Arc::new(db)),
1090            self_node: None,
1091            exit_doh: None,
1092            enable_ipv6: false,
1093            accept_dns: true,
1094        }
1095    }
1096
1097    /// Build a raw DNS query buffer for `labels` with the given id, qtype, qclass.
1098    fn build_query(id: u16, labels: &[&str], qtype: u16, qclass: u16) -> Vec<u8> {
1099        let mut buf: Vec<u8> = Vec::new();
1100        buf.extend_from_slice(&id.to_be_bytes());
1101        buf.extend_from_slice(&0u16.to_be_bytes()); // flags: QR=0 (query)
1102        buf.extend_from_slice(&1u16.to_be_bytes()); // QDCOUNT
1103        buf.extend_from_slice(&0u16.to_be_bytes()); // ANCOUNT
1104        buf.extend_from_slice(&0u16.to_be_bytes()); // NSCOUNT
1105        buf.extend_from_slice(&0u16.to_be_bytes()); // ARCOUNT
1106        for label in labels {
1107            buf.push(label.len() as u8);
1108            buf.extend_from_slice(label.as_bytes());
1109        }
1110        buf.push(0); // root label
1111        buf.extend_from_slice(&qtype.to_be_bytes());
1112        buf.extend_from_slice(&qclass.to_be_bytes());
1113        buf
1114    }
1115
1116    /// Parse a response header: returns `(id, rcode, ancount)`.
1117    fn parse_header(resp: &[u8]) -> (u16, u8, u16) {
1118        let id = u16::from_be_bytes([resp[0], resp[1]]);
1119        let flags = u16::from_be_bytes([resp[2], resp[3]]);
1120        let ancount = u16::from_be_bytes([resp[6], resp[7]]);
1121        (id, (flags & 0x000F) as u8, ancount)
1122    }
1123
1124    #[test]
1125    fn a_query_for_known_peer_answers_v4() {
1126        let view = view_with_peer();
1127        let buf = build_query(0x1234, &["host", "user", "ts", "net"], 1, 1);
1128
1129        let resp = answer(&view, &buf).expect("answers");
1130        let (id, rcode, ancount) = parse_header(&resp);
1131        assert_eq!(id, 0x1234);
1132        assert_eq!(rcode, 0, "NoError");
1133        assert_eq!(ancount, 1);
1134
1135        // The trailing RDATA of the single A record is the peer's tailnet v4 octets.
1136        let tail = &resp[resp.len() - 4..];
1137        assert_eq!(tail, &[100, 64, 0, 1]);
1138    }
1139
1140    #[test]
1141    fn aaaa_query_for_known_peer_is_nodata_when_ipv6_off() {
1142        // Gate OFF (default): an AAAA query for a known overlay peer must return NoError with an
1143        // empty answer (NODATA) — NOT the overlay v6 address, which the IPv4-only client can't
1144        // route. This is the anti-fingerprint / no-dead-connections posture.
1145        let view = view_with_peer();
1146        assert!(!view.enable_ipv6, "default gate is off");
1147        let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1148
1149        let resp = answer(&view, &buf).expect("answers");
1150        let (_, rcode, ancount) = parse_header(&resp);
1151        assert_eq!(rcode, 0, "NoError (NODATA)");
1152        assert_eq!(ancount, 0, "empty answer: no AAAA handed out with IPv6 off");
1153    }
1154
1155    #[test]
1156    fn a_query_still_resolves_when_ipv6_off() {
1157        // Gate OFF must not touch the A (v4) path: the v4 answer is byte-for-byte unchanged.
1158        let view = view_with_peer();
1159        let buf = build_query(0x6, &["host", "user", "ts", "net"], 1, 1);
1160
1161        let resp = answer(&view, &buf).expect("answers");
1162        let (_, rcode, ancount) = parse_header(&resp);
1163        assert_eq!(rcode, 0, "NoError");
1164        assert_eq!(ancount, 1);
1165        let tail = &resp[resp.len() - 4..];
1166        assert_eq!(tail, &[100, 64, 0, 1]);
1167    }
1168
1169    #[test]
1170    fn aaaa_query_for_known_peer_answers_v6_when_ipv6_on() {
1171        // Gate ON: historical behavior — answer AAAA from the overlay v6 address.
1172        let mut view = view_with_peer();
1173        view.enable_ipv6 = true;
1174        let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1175
1176        let resp = answer(&view, &buf).expect("answers");
1177        let (_, rcode, ancount) = parse_header(&resp);
1178        assert_eq!(rcode, 0, "NoError");
1179        assert_eq!(ancount, 1);
1180
1181        let expected = "fd7a::1".parse::<std::net::Ipv6Addr>().unwrap().octets();
1182        let tail = &resp[resp.len() - 16..];
1183        assert_eq!(tail, expected);
1184    }
1185
1186    #[test]
1187    fn aaaa_for_unknown_tailnet_name_is_nxdomain_not_forwarded_with_ipv6_off() {
1188        // Anti-leak, unchanged by the gate: an AAAA for a name under the tailnet suffix that has no
1189        // overlay match still fails closed to NXDOMAIN — never forwarded to a recursive upstream,
1190        // even with resolvers configured. (Gate OFF only changes the *positive* overlay match into
1191        // NODATA; a non-match still routes through `forward_or_nxdomain`.)
1192        let mut db = PeerDb::default();
1193        db.upsert(&test_node());
1194        let view = DnsView {
1195            cfg: DnsConfig {
1196                magic_dns: true,
1197                search_domains: vec!["user.ts.net".to_string()],
1198                fallback_resolvers: vec![DnsResolver {
1199                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1200                    use_with_exit_node: false,
1201                }],
1202                ..Default::default()
1203            },
1204            peers: Some(Arc::new(db)),
1205            self_node: None,
1206            exit_doh: None,
1207            enable_ipv6: false,
1208            accept_dns: true,
1209        };
1210        let buf = build_query(0x5A, &["ghost", "user", "ts", "net"], 28, 1);
1211
1212        match decide(&view, &buf).expect("decides") {
1213            Decision::Reply(resp) => {
1214                let (_, rcode, _) = parse_header(&resp);
1215                assert_eq!(rcode, 3, "NxDomain: tailnet AAAA not leaked upstream");
1216            }
1217            Decision::Forward { .. } => panic!("tailnet AAAA must never be forwarded"),
1218        }
1219    }
1220
1221    #[test]
1222    fn bare_hostname_resolves() {
1223        // The name index also stores the bare hostname.
1224        let view = view_with_peer();
1225        let buf = build_query(0x7, &["host"], 1, 1);
1226
1227        let resp = answer(&view, &buf).expect("answers");
1228        let (_, rcode, ancount) = parse_header(&resp);
1229        assert_eq!(rcode, 0);
1230        assert_eq!(ancount, 1);
1231    }
1232
1233    #[test]
1234    fn unknown_name_is_nxdomain() {
1235        let view = view_with_peer();
1236        let buf = build_query(0x9, &["nope", "example", "com"], 1, 1);
1237
1238        let resp = answer(&view, &buf).expect("answers");
1239        let (_, rcode, ancount) = parse_header(&resp);
1240        assert_eq!(rcode, 3, "NxDomain");
1241        assert_eq!(ancount, 0);
1242    }
1243
1244    #[test]
1245    fn magic_dns_off_is_refused() {
1246        // Fail closed: with MagicDNS disabled, even a known name is refused.
1247        let mut view = view_with_peer();
1248        view.cfg.magic_dns = false;
1249        let buf = build_query(0xAB, &["host", "user", "ts", "net"], 1, 1);
1250
1251        let resp = answer(&view, &buf).expect("answers");
1252        let (_, rcode, ancount) = parse_header(&resp);
1253        assert_eq!(rcode, 5, "Refused");
1254        assert_eq!(ancount, 0);
1255    }
1256
1257    #[test]
1258    fn accept_dns_false_refuses_otherwise_answerable_query() {
1259        // The accept-dns gate (Go `CorpDNS`): with `accept_dns == false` the node ignores the
1260        // tailnet DNS config, so even a known peer name that would normally answer authoritatively is
1261        // REFUSED (the responder serves nothing) — mirroring Go applying an empty `dns.Config`.
1262        let mut view = view_with_peer();
1263        assert!(view.cfg.magic_dns, "MagicDNS itself is on");
1264        view.accept_dns = false;
1265        let buf = build_query(0xDD, &["host", "user", "ts", "net"], 1, 1);
1266
1267        let resp = answer(&view, &buf).expect("answers");
1268        let (_, rcode, ancount) = parse_header(&resp);
1269        assert_eq!(rcode, 5, "Refused: accept_dns off ⇒ serve nothing");
1270        assert_eq!(ancount, 0);
1271
1272        // Flip accept_dns back ON (the config was never destroyed, only gated): the same query now
1273        // answers authoritatively — proving the OFF→ON restore is automatic.
1274        view.accept_dns = true;
1275        let resp = answer(&view, &buf).expect("answers");
1276        let (_, rcode, ancount) = parse_header(&resp);
1277        assert_eq!(rcode, 0, "NoError: accept_dns on ⇒ the known peer answers");
1278        assert_eq!(ancount, 1);
1279        let tail = &resp[resp.len() - 4..];
1280        assert_eq!(tail, &[100, 64, 0, 1], "the peer's tailnet v4 is served");
1281    }
1282
1283    #[test]
1284    fn default_view_serves_nothing() {
1285        // The default (no dns_config seen) has magic_dns == false: fail closed.
1286        let view = DnsView::default();
1287        let buf = build_query(0x1, &["host", "user", "ts", "net"], 1, 1);
1288
1289        let resp = answer(&view, &buf).expect("answers");
1290        let (_, rcode, _) = parse_header(&resp);
1291        assert_eq!(rcode, 5, "Refused");
1292    }
1293
1294    #[test]
1295    fn unsupported_qtype_on_tailnet_name_is_nodata_not_refused() {
1296        // TXT (type 16) for a tailnet-authoritative name: the name exists but we hold no TXT, so —
1297        // like Go — return NODATA (empty NOERROR), NOT REFUSED (which would make a stub abandon the
1298        // resolver) and NOT NXDOMAIN (the name exists). The name is never forwarded (anti-leak).
1299        let view = view_with_peer();
1300        let buf = build_query(0x1, &["host", "user", "ts", "net"], 16, 1);
1301
1302        let resp = answer(&view, &buf).expect("answers");
1303        let (_, rcode, ancount) = parse_header(&resp);
1304        assert_eq!(rcode, 0, "NoError (NODATA), not Refused");
1305        assert_eq!(ancount, 0, "no answer records (NODATA)");
1306    }
1307
1308    #[test]
1309    fn unsupported_qtype_off_tailnet_forwards_or_nxdomains() {
1310        // A non-A/AAAA/PTR qtype for an OFF-tailnet name must be forwardable like A/AAAA — never
1311        // REFUSED. With no upstream configured in this view it fails closed to NXDOMAIN (the same
1312        // disposition an off-tailnet A query gets here), proving the qtype no longer short-circuits
1313        // to REFUSED. HTTPS/SVCB is type 65 (the browser HTTP/3 + ECH case the old REFUSED broke).
1314        let view = view_with_peer();
1315        let buf = build_query(0x1, &["example", "com"], 65, 1);
1316
1317        let resp = answer(&view, &buf).expect("answers");
1318        let (_, rcode, _) = parse_header(&resp);
1319        assert_eq!(
1320            rcode, 3,
1321            "off-tailnet, no upstream -> NXDOMAIN (forwardable, not Refused)"
1322        );
1323    }
1324
1325    #[test]
1326    fn malformed_query_is_dropped() {
1327        // A response (QR bit set) is not a query; we drop it (no answer).
1328        let mut buf = build_query(0x1, &["host"], 1, 1);
1329        buf[2] = 0x80; // set QR bit
1330        assert!(answer(&view_with_peer(), &buf).is_none());
1331    }
1332
1333    #[test]
1334    fn ptr_for_known_ip_answers_fqdn() {
1335        let view = view_with_peer();
1336        // Reverse name for 100.64.0.1 => 1.0.64.100.in-addr.arpa
1337        let buf = build_query(0x33, &["1", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1338
1339        let resp = answer(&view, &buf).expect("answers");
1340        let (_, rcode, ancount) = parse_header(&resp);
1341        assert_eq!(rcode, 0, "NoError");
1342        assert_eq!(ancount, 1);
1343
1344        // The PTR rdata encodes the peer's fqdn "host.user.ts.net" as length-prefixed labels.
1345        let expected = {
1346            let mut out = Vec::new();
1347            for label in ["host", "user", "ts", "net"] {
1348                out.push(label.len() as u8);
1349                out.extend_from_slice(label.as_bytes());
1350            }
1351            out.push(0);
1352            out
1353        };
1354        let tail = &resp[resp.len() - expected.len()..];
1355        assert_eq!(tail, expected.as_slice());
1356    }
1357
1358    #[test]
1359    fn ptr_for_unknown_ip_is_nxdomain() {
1360        let view = view_with_peer();
1361        // 9.9.9.9 is not a known tailnet IP.
1362        let buf = build_query(0x34, &["9", "9", "9", "9", "in-addr", "arpa"], 12, 1);
1363
1364        let resp = answer(&view, &buf).expect("answers");
1365        let (_, rcode, _) = parse_header(&resp);
1366        assert_eq!(rcode, 3, "NxDomain");
1367    }
1368
1369    #[test]
1370    fn ptr_for_unknown_tailnet_ip_is_nxdomain_not_forwarded() {
1371        // A view WITH an upstream resolver: an off-tailnet reverse query would forward, but a
1372        // reverse query for an unmatched IP in the CGNAT range (100.64.0.0/10) must fail closed to
1373        // NXDOMAIN — the probed tailnet IP must never leak upstream.
1374        let mut db = PeerDb::default();
1375        db.upsert(&test_node());
1376        let view = DnsView {
1377            cfg: DnsConfig {
1378                magic_dns: true,
1379                search_domains: vec!["user.ts.net".to_string()],
1380                fallback_resolvers: vec![DnsResolver {
1381                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1382                    use_with_exit_node: false,
1383                }],
1384                ..Default::default()
1385            },
1386            peers: Some(Arc::new(db)),
1387            self_node: None,
1388            exit_doh: None,
1389            enable_ipv6: false,
1390            accept_dns: true,
1391        };
1392
1393        // 100.64.0.9 is in CGNAT range but owned by no peer => NXDOMAIN, never a Forward.
1394        let buf = build_query(0x35, &["9", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1395        match decide(&view, &buf).expect("decides") {
1396            Decision::Reply(resp) => {
1397                let (_, rcode, _) = parse_header(&resp);
1398                assert_eq!(rcode, 3, "NxDomain");
1399            }
1400            Decision::Forward { .. } => {
1401                panic!("tailnet CGNAT PTR must never be forwarded upstream")
1402            }
1403        }
1404    }
1405
1406    /// Anti-leak regression for the exotic-qtype forward path: a NON-PTR query (TXT, type 16) for a
1407    /// tailnet CGNAT reverse name, with an upstream configured, must STILL fail closed to NXDOMAIN —
1408    /// never forward. The PTR arm guards this, but the `QType::Other` path routes through
1409    /// `forward_or_nodata`, which must re-apply the reverse-zone guard or the tailnet IP leaks.
1410    #[test]
1411    fn exotic_qtype_for_tailnet_cgnat_reverse_is_nxdomain_not_forwarded() {
1412        let mut db = PeerDb::default();
1413        db.upsert(&test_node());
1414        let view = DnsView {
1415            cfg: DnsConfig {
1416                magic_dns: true,
1417                search_domains: vec!["user.ts.net".to_string()],
1418                fallback_resolvers: vec![DnsResolver {
1419                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1420                    use_with_exit_node: false,
1421                }],
1422                ..Default::default()
1423            },
1424            peers: Some(Arc::new(db)),
1425            self_node: None,
1426            exit_doh: None,
1427            enable_ipv6: false,
1428            accept_dns: true,
1429        };
1430
1431        // TXT (16) for a CGNAT reverse name => NXDOMAIN, never a Forward (no tailnet-IP leak).
1432        let buf = build_query(0x36, &["9", "0", "64", "100", "in-addr", "arpa"], 16, 1);
1433        match decide(&view, &buf).expect("decides") {
1434            Decision::Reply(resp) => {
1435                let (_, rcode, _) = parse_header(&resp);
1436                assert_eq!(rcode, 3, "NxDomain");
1437            }
1438            Decision::Forward { .. } => {
1439                panic!("a non-PTR query for a tailnet CGNAT reverse name must never forward")
1440            }
1441        }
1442    }
1443
1444    /// Same anti-leak guard for an `ip6.arpa` reverse name under an exotic qtype: must NXDOMAIN, not
1445    /// forward (revealing a tailnet ULA was probed).
1446    #[test]
1447    fn exotic_qtype_for_ip6_arpa_is_nxdomain_not_forwarded() {
1448        let view = view_with_routes(
1449            std::collections::BTreeMap::new(),
1450            vec![udp("9.9.9.9:53")],
1451            vec![],
1452        );
1453        // An ip6.arpa reverse name with a TXT (16) qtype must fail closed.
1454        let buf = build_query(
1455            0x37,
1456            &[
1457                "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
1458                "a", "7", "d", "f", "ip6", "arpa",
1459            ],
1460            16,
1461            1,
1462        );
1463        match decide(&view, &buf).expect("decides") {
1464            Decision::Reply(resp) => {
1465                let (_, rcode, _) = parse_header(&resp);
1466                assert_eq!(rcode, 3, "NxDomain");
1467            }
1468            Decision::Forward { .. } => panic!("an ip6.arpa exotic-qtype query must never forward"),
1469        }
1470    }
1471
1472    #[test]
1473    fn is_tailnet_cgnat_classifies_range() {
1474        assert!(is_tailnet_cgnat("100.64.0.0".parse().unwrap()));
1475        assert!(is_tailnet_cgnat("100.64.0.1".parse().unwrap()));
1476        assert!(is_tailnet_cgnat("100.127.255.255".parse().unwrap()));
1477        // Outside the /10:
1478        assert!(!is_tailnet_cgnat("100.63.255.255".parse().unwrap()));
1479        assert!(!is_tailnet_cgnat("100.128.0.0".parse().unwrap()));
1480        assert!(!is_tailnet_cgnat("9.9.9.9".parse().unwrap()));
1481        // The MagicDNS resolver IP 100.100.100.100 is itself inside the /10.
1482        assert!(is_tailnet_cgnat("100.100.100.100".parse().unwrap()));
1483    }
1484
1485    #[test]
1486    fn response_matches_query_validates_id_and_qr() {
1487        // query id 0x1234, QR=0
1488        let query = build_query(0x1234, &["a", "com"], 1, 1);
1489
1490        // A well-formed response: same id, QR=1.
1491        let mut good = query.clone();
1492        good[2] |= 0x80;
1493        assert!(response_matches_query(&query, &good));
1494
1495        // Same id but QR still 0 (not a response): rejected.
1496        assert!(!response_matches_query(&query, &query));
1497
1498        // QR=1 but a different transaction id: rejected (off-path forgery).
1499        let mut wrong_id = good.clone();
1500        wrong_id[0] ^= 0xFF;
1501        assert!(!response_matches_query(&query, &wrong_id));
1502
1503        // Too-short buffers: rejected.
1504        assert!(!response_matches_query(&query, &[0u8; 2]));
1505        assert!(!response_matches_query(&[0u8; 3], &good));
1506    }
1507
1508    #[test]
1509    fn self_node_resolves_when_no_peer_match() {
1510        // With the peer db empty but a self node set, the self node answers for its own name.
1511        let view = DnsView {
1512            cfg: DnsConfig {
1513                magic_dns: true,
1514                search_domains: vec![],
1515                ..Default::default()
1516            },
1517            peers: None,
1518            self_node: Some(test_node()),
1519            exit_doh: None,
1520            enable_ipv6: false,
1521            accept_dns: true,
1522        };
1523        let buf = build_query(0x44, &["host", "user", "ts", "net"], 1, 1);
1524
1525        let resp = answer(&view, &buf).expect("answers");
1526        let (_, rcode, ancount) = parse_header(&resp);
1527        assert_eq!(rcode, 0);
1528        assert_eq!(ancount, 1);
1529        let tail = &resp[resp.len() - 4..];
1530        assert_eq!(tail, &[100, 64, 0, 1]);
1531    }
1532
1533    #[test]
1534    fn partially_qualified_name_resolves_via_search_domain() {
1535        // "host.user" is not indexed directly, but the "user.ts.net" search domain qualifies it
1536        // to "host.user.user.ts.net"... which does NOT match. The realistic case is "host" (bare,
1537        // already indexed) and "host.user.ts.net" (fqdn). Verify a name needing suffix expansion:
1538        // with search domain "ts.net" the partially-qualified "host.user" => "host.user.ts.net".
1539        let mut view = view_with_peer();
1540        view.cfg.search_domains = vec!["ts.net".to_string()];
1541        let buf = build_query(0x55, &["host", "user"], 1, 1);
1542
1543        let resp = answer(&view, &buf).expect("answers");
1544        let (_, rcode, ancount) = parse_header(&resp);
1545        assert_eq!(rcode, 0, "NoError via search-domain expansion");
1546        assert_eq!(ancount, 1);
1547        let tail = &resp[resp.len() - 4..];
1548        assert_eq!(tail, &[100, 64, 0, 1]);
1549    }
1550
1551    #[test]
1552    fn extra_record_a_answers_when_no_peer_match() {
1553        // A control-pushed static A record answers for a non-peer name, fail-closed otherwise.
1554        let mut view = view_with_peer();
1555        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1556            name: "static.user.ts.net".to_string(),
1557            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1558        }];
1559        let buf = build_query(0x77, &["static", "user", "ts", "net"], 1, 1);
1560
1561        let resp = answer(&view, &buf).expect("answers");
1562        let (_, rcode, ancount) = parse_header(&resp);
1563        assert_eq!(rcode, 0, "NoError from extra record");
1564        assert_eq!(ancount, 1);
1565        let tail = &resp[resp.len() - 4..];
1566        assert_eq!(tail, &[100, 64, 0, 9]);
1567    }
1568
1569    #[test]
1570    fn extra_record_matches_query_case_insensitively() {
1571        // The query name is canonicalized (lowercased) at decode time, so a mixed-case query
1572        // matches a lowercase extra record.
1573        let mut view = view_with_peer();
1574        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1575            name: "static.user.ts.net".to_string(),
1576            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1577        }];
1578        let buf = build_query(0x7A, &["Static", "User", "TS", "net"], 1, 1);
1579
1580        let resp = answer(&view, &buf).expect("answers");
1581        let (_, rcode, ancount) = parse_header(&resp);
1582        assert_eq!(rcode, 0, "NoError: case-insensitive match");
1583        assert_eq!(ancount, 1);
1584        let tail = &resp[resp.len() - 4..];
1585        assert_eq!(tail, &[100, 64, 0, 9]);
1586    }
1587
1588    #[test]
1589    fn extra_record_not_expanded_by_search_domain() {
1590        // Unlike peer names, an extra record is matched as an FQDN only: a bare query that would
1591        // need search-domain expansion to reach the record name must NOT resolve.
1592        let mut view = view_with_peer();
1593        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1594            name: "static.user.ts.net".to_string(),
1595            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1596        }];
1597        // "static" would only reach "static.user.ts.net" via the "user.ts.net" search domain.
1598        let buf = build_query(0x7B, &["static"], 1, 1);
1599
1600        let resp = answer(&view, &buf).expect("answers");
1601        let (_, rcode, _) = parse_header(&resp);
1602        assert_eq!(rcode, 3, "NxDomain: extra records are not search-expanded");
1603    }
1604
1605    #[test]
1606    fn extra_record_aaaa_family_is_isolated() {
1607        // An A-only extra record must NOT answer an AAAA query for the same name (NxDomain).
1608        let mut view = view_with_peer();
1609        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1610            name: "v4only.user.ts.net".to_string(),
1611            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1612        }];
1613        let buf = build_query(0x78, &["v4only", "user", "ts", "net"], 28, 1);
1614
1615        let resp = answer(&view, &buf).expect("answers");
1616        let (_, rcode, _) = parse_header(&resp);
1617        assert_eq!(rcode, 3, "NxDomain: A record does not satisfy AAAA");
1618    }
1619
1620    #[test]
1621    fn extra_record_ignored_when_magic_dns_off() {
1622        // Fail closed: extra records are never served while MagicDNS is disabled.
1623        let mut view = view_with_peer();
1624        view.cfg.magic_dns = false;
1625        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1626            name: "static.user.ts.net".to_string(),
1627            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1628        }];
1629        let buf = build_query(0x79, &["static", "user", "ts", "net"], 1, 1);
1630
1631        let resp = answer(&view, &buf).expect("answers");
1632        let (_, rcode, _) = parse_header(&resp);
1633        assert_eq!(rcode, 5, "Refused");
1634    }
1635
1636    #[test]
1637    fn non_in_class_on_tailnet_name_is_nodata_not_answered_as_in() {
1638        // A CHAOS-class (3) query for a tailnet name must NOT be answered as IN (no overlay A), and
1639        // must NOT be REFUSED (Go does no class check on the local path). It's an unsupported
1640        // authoritative class -> NODATA (empty NOERROR), and never forwarded (tailnet name).
1641        let view = view_with_peer();
1642        let buf = build_query(0x66, &["host", "user", "ts", "net"], 1, 3);
1643
1644        let resp = answer(&view, &buf).expect("answers");
1645        let (_, rcode, ancount) = parse_header(&resp);
1646        assert_eq!(
1647            rcode, 0,
1648            "NoError (NODATA), not Refused and not an IN answer"
1649        );
1650        assert_eq!(
1651            ancount, 0,
1652            "must not hand out the overlay A for a non-IN class"
1653        );
1654    }
1655
1656    #[test]
1657    fn non_in_class_off_tailnet_forwards_or_nxdomains() {
1658        // A non-IN class for an OFF-tailnet name is forwardable (Go forwards it), never REFUSED.
1659        // No upstream here -> NXDOMAIN, proving the class gate no longer short-circuits to Refused.
1660        let view = view_with_peer();
1661        let buf = build_query(0x66, &["example", "com"], 1, 3);
1662
1663        let resp = answer(&view, &buf).expect("answers");
1664        let (_, rcode, _) = parse_header(&resp);
1665        assert_eq!(
1666            rcode, 3,
1667            "off-tailnet non-IN class, no upstream -> NXDOMAIN, not Refused"
1668        );
1669    }
1670
1671    /// A view with MagicDNS on, the `user.ts.net` search domain, and the given split-DNS routes
1672    /// + global resolvers.
1673    fn view_with_routes(
1674        routes: std::collections::BTreeMap<String, Vec<DnsResolver>>,
1675        resolvers: Vec<DnsResolver>,
1676        fallback: Vec<DnsResolver>,
1677    ) -> DnsView {
1678        DnsView {
1679            cfg: DnsConfig {
1680                magic_dns: true,
1681                search_domains: vec!["user.ts.net".to_string()],
1682                routes,
1683                resolvers,
1684                fallback_resolvers: fallback,
1685                ..Default::default()
1686            },
1687            peers: None,
1688            self_node: None,
1689            exit_doh: None,
1690            enable_ipv6: false,
1691            accept_dns: true,
1692        }
1693    }
1694
1695    fn udp(addr: &str) -> DnsResolver {
1696        DnsResolver {
1697            transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
1698            use_with_exit_node: false,
1699        }
1700    }
1701
1702    #[test]
1703    fn split_dns_route_forwards_to_matching_upstream() {
1704        let mut routes = std::collections::BTreeMap::new();
1705        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1706        let view = view_with_routes(routes, vec![], vec![]);
1707        let buf = build_query(0x100, &["api", "corp", "example"], 1, 1);
1708
1709        match decide(&view, &buf).expect("decides") {
1710            Decision::Forward { upstreams, .. } => {
1711                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1712            }
1713            Decision::Reply(_) => panic!("expected forward to the split-DNS upstream"),
1714        }
1715    }
1716
1717    #[test]
1718    fn exotic_qtype_off_tailnet_forwards_to_upstream() {
1719        // The core of the fix: an HTTPS/SVCB (type 65) query for an off-tailnet name with a matching
1720        // route must FORWARD to the upstream (verbatim), exactly like an A query would — not REFUSE
1721        // and not NXDOMAIN. This is the browser HTTP/3 + ECH case the old blanket-REFUSE broke.
1722        let mut routes = std::collections::BTreeMap::new();
1723        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1724        let view = view_with_routes(routes, vec![], vec![]);
1725        let buf = build_query(0x102, &["api", "corp", "example"], 65, 1);
1726
1727        match decide(&view, &buf).expect("decides") {
1728            Decision::Forward {
1729                upstreams, query, ..
1730            } => {
1731                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1732                assert_eq!(query, buf, "the exotic-qtype query is forwarded verbatim");
1733            }
1734            Decision::Reply(_) => {
1735                panic!("an off-tailnet HTTPS-record query must forward, not reply")
1736            }
1737        }
1738    }
1739
1740    #[test]
1741    fn non_in_class_off_tailnet_forwards_to_upstream() {
1742        // A non-IN class for an off-tailnet routed name forwards too (Go does no class check on the
1743        // local path). Proves the class gate no longer short-circuits to REFUSED before routing.
1744        let mut routes = std::collections::BTreeMap::new();
1745        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1746        let view = view_with_routes(routes, vec![], vec![]);
1747        let buf = build_query(0x103, &["api", "corp", "example"], 1, 3);
1748
1749        match decide(&view, &buf).expect("decides") {
1750            Decision::Forward { upstreams, .. } => {
1751                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1752            }
1753            Decision::Reply(_) => {
1754                panic!("an off-tailnet non-IN-class query must forward, not reply")
1755            }
1756        }
1757    }
1758
1759    /// The local responder bounds concurrent in-flight forwards: `serve` acquires one
1760    /// `MAX_INFLIGHT_FORWARDS` permit per spawned forward task and drops the query fail-closed when
1761    /// the pool is exhausted (a client spraying forwardable names can't open unbounded overlay
1762    /// sockets). This pins the gating semantics `serve` relies on — drained pool refuses a new
1763    /// permit; releasing one restores capacity — and the cap constant itself. (The async `serve`
1764    /// loop has no netstack-free test seam, so the semaphore behavior is exercised directly here, the
1765    /// same `Arc<Semaphore>::try_acquire_owned` the loop uses.)
1766    #[test]
1767    fn forward_inflight_cap_fails_closed_when_saturated() {
1768        use std::sync::Arc;
1769
1770        use tokio::sync::Semaphore;
1771
1772        let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
1773
1774        // Drain every permit (one per concurrently in-flight forward).
1775        let mut held = Vec::with_capacity(MAX_INFLIGHT_FORWARDS);
1776        for _ in 0..MAX_INFLIGHT_FORWARDS {
1777            held.push(
1778                inflight
1779                    .clone()
1780                    .try_acquire_owned()
1781                    .expect("permits available below the cap"),
1782            );
1783        }
1784
1785        // At the cap, the next forward is refused — `serve` would drop the query, not spawn.
1786        assert!(
1787            inflight.clone().try_acquire_owned().is_err(),
1788            "a saturated forward pool must refuse a new permit (fail closed)"
1789        );
1790
1791        // Completing an in-flight forward releases its permit and restores capacity.
1792        drop(held.pop());
1793        assert!(
1794            inflight.clone().try_acquire_owned().is_ok(),
1795            "releasing a permit must let the next forward proceed"
1796        );
1797    }
1798
1799    /// A permit moved into a spawned forward task (the `let _permit = permit;` shape `serve` uses)
1800    /// must stay held for the *whole* task body — across the `.await` on the upstream — and release
1801    /// only when the task completes. This guards the regression the saturation test above can't see:
1802    /// "tidying" `let _permit = permit;` to `let _ = permit;` would drop the permit immediately,
1803    /// re-opening unbounded concurrency while leaving the synchronous drain/restore test green. Here a
1804    /// 1-permit pool is consumed by a task that holds it across a yield; the pool must read empty
1805    /// while the task runs and refill once it finishes.
1806    #[tokio::test]
1807    async fn forward_permit_is_held_for_the_task_lifetime_not_dropped_early() {
1808        use std::sync::Arc;
1809
1810        use tokio::sync::Semaphore;
1811
1812        let inflight = Arc::new(Semaphore::new(1));
1813        let permit = inflight
1814            .clone()
1815            .try_acquire_owned()
1816            .expect("the sole permit is available");
1817
1818        let (started_tx, started_rx) = tokio::sync::oneshot::channel();
1819        let (release_tx, release_rx) = tokio::sync::oneshot::channel();
1820        let task = tokio::spawn(async move {
1821            // Same shape as `serve`'s spawned forward: the permit is a named binding moved into the
1822            // task, so it lives until the body ends — not dropped at the `let`.
1823            let _permit = permit;
1824            started_tx.send(()).unwrap();
1825            // Stand in for the `.await` on the upstream forward.
1826            release_rx.await.unwrap();
1827        });
1828
1829        started_rx.await.unwrap();
1830        // While the task runs, the permit it moved in is still held — the pool is empty.
1831        assert!(
1832            inflight.clone().try_acquire_owned().is_err(),
1833            "a permit moved into a running task must stay held across its await"
1834        );
1835
1836        // Let the task finish; its permit drops with the body and capacity returns.
1837        release_tx.send(()).unwrap();
1838        task.await.unwrap();
1839        assert!(
1840            inflight.clone().try_acquire_owned().is_ok(),
1841            "the permit must be released once the task body completes"
1842        );
1843    }
1844
1845    #[test]
1846    fn longest_suffix_route_wins() {
1847        let mut routes = std::collections::BTreeMap::new();
1848        routes.insert("example".to_string(), vec![udp("10.0.0.1:53")]);
1849        routes.insert("corp.example".to_string(), vec![udp("10.0.0.2:53")]);
1850        let view = view_with_routes(routes, vec![], vec![]);
1851        let buf = build_query(0x101, &["api", "corp", "example"], 1, 1);
1852
1853        match decide(&view, &buf).expect("decides") {
1854            Decision::Forward { upstreams, .. } => {
1855                assert_eq!(
1856                    upstreams,
1857                    vec!["10.0.0.2:53".parse().unwrap()],
1858                    "longer suffix wins"
1859                );
1860            }
1861            Decision::Reply(_) => panic!("expected forward"),
1862        }
1863    }
1864
1865    #[test]
1866    fn negative_route_is_nxdomain_not_forwarded() {
1867        // An empty upstream list is a negative route: fail closed, never forward.
1868        let mut routes = std::collections::BTreeMap::new();
1869        routes.insert("blocked.example".to_string(), vec![]);
1870        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
1871        let buf = build_query(0x102, &["x", "blocked", "example"], 1, 1);
1872
1873        match decide(&view, &buf).expect("decides") {
1874            Decision::Reply(resp) => {
1875                let (_, rcode, _) = parse_header(&resp);
1876                assert_eq!(rcode, 3, "NxDomain: negative route is not forwarded");
1877            }
1878            Decision::Forward { .. } => panic!("negative route must not forward"),
1879        }
1880    }
1881
1882    #[test]
1883    fn unrouted_name_forwards_to_fallback_then_global() {
1884        // No route matches: fallback resolvers are preferred over global resolvers.
1885        let view = view_with_routes(
1886            std::collections::BTreeMap::new(),
1887            vec![udp("8.8.8.8:53")],
1888            vec![udp("1.1.1.1:53")],
1889        );
1890        let buf = build_query(0x103, &["example", "com"], 1, 1);
1891
1892        match decide(&view, &buf).expect("decides") {
1893            Decision::Forward { upstreams, .. } => {
1894                assert_eq!(
1895                    upstreams,
1896                    vec!["1.1.1.1:53".parse().unwrap()],
1897                    "fallback preferred"
1898                );
1899            }
1900            Decision::Reply(_) => panic!("expected forward to fallback"),
1901        }
1902    }
1903
1904    #[test]
1905    fn unrouted_name_forwards_to_global_when_no_fallback() {
1906        let view = view_with_routes(
1907            std::collections::BTreeMap::new(),
1908            vec![udp("8.8.8.8:53")],
1909            vec![],
1910        );
1911        let buf = build_query(0x104, &["example", "com"], 1, 1);
1912
1913        match decide(&view, &buf).expect("decides") {
1914            Decision::Forward { upstreams, .. } => {
1915                assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
1916            }
1917            Decision::Reply(_) => panic!("expected forward to global resolver"),
1918        }
1919    }
1920
1921    #[test]
1922    fn tailnet_name_is_never_forwarded() {
1923        // Anti-leak: a name under a tailnet search domain that has no overlay match must fail
1924        // closed to NXDOMAIN, never leak to an upstream resolver, even with resolvers configured.
1925        let view = view_with_routes(
1926            std::collections::BTreeMap::new(),
1927            vec![udp("8.8.8.8:53")],
1928            vec![udp("1.1.1.1:53")],
1929        );
1930        // "ghost.user.ts.net" is under the tailnet suffix but matches no peer.
1931        let buf = build_query(0x105, &["ghost", "user", "ts", "net"], 1, 1);
1932
1933        match decide(&view, &buf).expect("decides") {
1934            Decision::Reply(resp) => {
1935                let (_, rcode, _) = parse_header(&resp);
1936                assert_eq!(rcode, 3, "NxDomain: tailnet name not leaked upstream");
1937            }
1938            Decision::Forward { .. } => panic!("tailnet name must never be forwarded"),
1939        }
1940    }
1941
1942    #[test]
1943    fn no_resolvers_fails_closed() {
1944        // No route, no resolvers: an unknown name is NXDOMAIN, not forwarded.
1945        let view = view_with_routes(std::collections::BTreeMap::new(), vec![], vec![]);
1946        let buf = build_query(0x106, &["example", "com"], 1, 1);
1947
1948        match decide(&view, &buf).expect("decides") {
1949            Decision::Reply(resp) => {
1950                let (_, rcode, _) = parse_header(&resp);
1951                assert_eq!(rcode, 3, "NxDomain");
1952            }
1953            Decision::Forward { .. } => panic!("must not forward with no resolvers"),
1954        }
1955    }
1956
1957    #[test]
1958    fn overlay_match_wins_over_forwarding() {
1959        // A known peer name resolves authoritatively even when upstream resolvers are configured.
1960        let mut db = PeerDb::default();
1961        db.upsert(&test_node());
1962        let view = DnsView {
1963            cfg: DnsConfig {
1964                magic_dns: true,
1965                search_domains: vec!["user.ts.net".to_string()],
1966                resolvers: vec![udp("8.8.8.8:53")],
1967                ..Default::default()
1968            },
1969            peers: Some(Arc::new(db)),
1970            self_node: None,
1971            exit_doh: None,
1972            enable_ipv6: false,
1973            accept_dns: true,
1974        };
1975        let buf = build_query(0x107, &["host", "user", "ts", "net"], 1, 1);
1976
1977        match decide(&view, &buf).expect("decides") {
1978            Decision::Reply(resp) => {
1979                let (_, rcode, ancount) = parse_header(&resp);
1980                assert_eq!(rcode, 0, "authoritative answer wins");
1981                assert_eq!(ancount, 1);
1982            }
1983            Decision::Forward { .. } => panic!("overlay match must not forward"),
1984        }
1985    }
1986
1987    #[test]
1988    fn ipv6_reverse_ptr_is_nxdomain_not_forwarded() {
1989        // Anti-leak: an `ip6.arpa` reverse PTR for a tailnet ULA (fd7a:…) must fail closed to
1990        // NXDOMAIN, never be forwarded — even with an upstream resolver configured. This fork is
1991        // IPv4-only on the tailnet; forwarding would reveal that a v6 address was probed.
1992        let view = view_with_routes(
1993            std::collections::BTreeMap::new(),
1994            vec![udp("8.8.8.8:53")],
1995            vec![udp("1.1.1.1:53")],
1996        );
1997        // Reverse name for fd7a::1 (nibble-reversed) under ip6.arpa. The exact nibble labels don't
1998        // matter to the guard — any name ending in ip6.arpa must fail closed.
1999        let labels = vec![
2000            "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
2001            "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "a", "7", "d", "f", "ip6",
2002            "arpa",
2003        ];
2004        let buf = build_query(0x200, &labels, 12, 1);
2005
2006        match decide(&view, &buf).expect("decides") {
2007            Decision::Reply(resp) => {
2008                let (_, rcode, _) = parse_header(&resp);
2009                assert_eq!(
2010                    rcode, 3,
2011                    "NxDomain: ip6.arpa reverse must not leak upstream"
2012                );
2013            }
2014            Decision::Forward { .. } => panic!("ip6.arpa PTR must never be forwarded"),
2015        }
2016    }
2017
2018    #[test]
2019    fn cap_response_sets_tc_when_truncated() {
2020        // An oversize upstream answer is capped to a single datagram AND marked truncated (TC bit)
2021        // so the stub resolver retries over TCP rather than trusting a chopped message.
2022        let mut big = build_query(0x300, &["example", "com"], 1, 1);
2023        big[2] |= 0x80; // make it a response (QR=1)
2024        big.resize(MAX_UPSTREAM_RESPONSE + 500, 0xAB);
2025
2026        let out = cap_response(big);
2027        assert_eq!(out.len(), MAX_UPSTREAM_RESPONSE, "capped to one datagram");
2028        assert_ne!(out[2] & 0x02, 0, "TC bit set on truncation");
2029    }
2030
2031    #[test]
2032    fn cap_response_leaves_small_response_untouched() {
2033        // A response that fits is returned verbatim with no TC bit forced on.
2034        let mut small = build_query(0x301, &["example", "com"], 1, 1);
2035        small[2] |= 0x80;
2036        let before = small.clone();
2037
2038        let out = cap_response(small);
2039        assert_eq!(out, before, "small response unchanged");
2040        assert_eq!(out[2] & 0x02, 0, "TC bit not set when no truncation");
2041    }
2042
2043    #[test]
2044    fn response_matches_query_rejects_mismatched_question() {
2045        // id + QR match but the echoed question differs (different QNAME) => rejected. This guards
2046        // against an off-path injector that guesses the id but answers a different question.
2047        let query = build_query(0x1234, &["a", "com"], 1, 1);
2048
2049        let mut wrong_question = build_query(0x1234, &["b", "com"], 1, 1);
2050        wrong_question[2] |= 0x80; // QR=1, same id
2051        assert!(
2052            !response_matches_query(&query, &wrong_question),
2053            "different QNAME must be rejected"
2054        );
2055
2056        // A different QTYPE with the same name is also rejected.
2057        let mut wrong_qtype = build_query(0x1234, &["a", "com"], 28, 1);
2058        wrong_qtype[2] |= 0x80;
2059        assert!(
2060            !response_matches_query(&query, &wrong_qtype),
2061            "different QTYPE must be rejected"
2062        );
2063
2064        // The exact echoed question with QR=1 is accepted.
2065        let mut good = query.clone();
2066        good[2] |= 0x80;
2067        assert!(
2068            response_matches_query(&query, &good),
2069            "matching question accepted"
2070        );
2071    }
2072
2073    #[test]
2074    fn suffix_matches_handles_boundaries_and_empty() {
2075        // Exact and label-boundary matches.
2076        assert!(suffix_matches("corp", "corp"));
2077        assert!(suffix_matches("a.corp", "corp"));
2078        assert!(suffix_matches("a.b.corp", "corp"));
2079        // Not a label boundary.
2080        assert!(!suffix_matches("acorp", "corp"));
2081        // Empty suffix never matches (defense-in-depth against `ends_with("")`).
2082        assert!(!suffix_matches("anything.example", ""));
2083        assert!(!suffix_matches("", ""));
2084    }
2085
2086    #[test]
2087    fn empty_search_domain_does_not_capture_everything() {
2088        // Defense-in-depth: an empty search domain must NOT make every name look like a tailnet
2089        // name (which would fail-close legitimate recursive queries / mis-route). With an empty
2090        // suffix present alongside a real resolver, an off-tailnet name still forwards.
2091        let mut view = view_with_routes(
2092            std::collections::BTreeMap::new(),
2093            vec![udp("8.8.8.8:53")],
2094            vec![],
2095        );
2096        view.cfg.search_domains = vec![String::new()];
2097        let buf = build_query(0x400, &["example", "com"], 1, 1);
2098
2099        match decide(&view, &buf).expect("decides") {
2100            Decision::Forward { upstreams, .. } => {
2101                assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2102            }
2103            Decision::Reply(_) => {
2104                panic!("empty search domain must not treat every name as tailnet")
2105            }
2106        }
2107    }
2108
2109    #[test]
2110    fn empty_route_suffix_does_not_capture_everything() {
2111        // Defense-in-depth: an empty route suffix must not match every name (which would route all
2112        // queries to that route's upstreams). With an empty-suffix route present, an unrelated name
2113        // still falls through to the global resolver.
2114        let mut routes = std::collections::BTreeMap::new();
2115        routes.insert(String::new(), vec![udp("10.9.9.9:53")]);
2116        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2117        let buf = build_query(0x401, &["example", "com"], 1, 1);
2118
2119        match decide(&view, &buf).expect("decides") {
2120            Decision::Forward { upstreams, .. } => {
2121                assert_eq!(
2122                    upstreams,
2123                    vec!["8.8.8.8:53".parse().unwrap()],
2124                    "empty route suffix must not capture; falls through to global"
2125                );
2126            }
2127            Decision::Reply(_) => panic!("expected forward to global resolver"),
2128        }
2129    }
2130
2131    fn udp_exit(addr: &str) -> DnsResolver {
2132        DnsResolver {
2133            transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
2134            use_with_exit_node: true,
2135        }
2136    }
2137
2138    #[test]
2139    fn recursive_forward_is_flagged_route_forward_is_not() {
2140        // A recursive (global/fallback) forward sets `recursive = true` (eligible for DoH
2141        // delegation); a deliberately-configured split-DNS route sets `recursive = false`.
2142        let mut routes = std::collections::BTreeMap::new();
2143        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
2144        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2145
2146        let routed = build_query(0x500, &["api", "corp", "example"], 1, 1);
2147        match decide(&view, &routed).expect("decides") {
2148            Decision::Forward { recursive, .. } => {
2149                assert!(!recursive, "split-DNS route is not a recursive forward")
2150            }
2151            Decision::Reply(_) => panic!("expected route forward"),
2152        }
2153
2154        let global = build_query(0x501, &["example", "com"], 1, 1);
2155        match decide(&view, &global).expect("decides") {
2156            Decision::Forward { recursive, .. } => {
2157                assert!(recursive, "unrouted name is a recursive forward")
2158            }
2159            Decision::Reply(_) => panic!("expected recursive forward"),
2160        }
2161    }
2162
2163    #[test]
2164    fn recursive_plan_keeps_udp_without_exit_node() {
2165        // No active exit node: a recursive forward stays on its default UDP upstreams.
2166        let view = view_with_routes(
2167            std::collections::BTreeMap::new(),
2168            vec![udp("8.8.8.8:53")],
2169            vec![],
2170        );
2171        let default = vec!["8.8.8.8:53".parse().unwrap()];
2172        assert_eq!(
2173            recursive_plan(&view, default.clone()),
2174            RecursivePlan::Udp(default)
2175        );
2176    }
2177
2178    #[test]
2179    fn recursive_plan_delegates_to_doh_with_exit_node() {
2180        // Exit node active, no kept-local resolvers: recursive queries delegate to the exit node's
2181        // DoH endpoint so resolution egresses from the exit node, not this host.
2182        let mut view = view_with_routes(
2183            std::collections::BTreeMap::new(),
2184            vec![udp("8.8.8.8:53")],
2185            vec![],
2186        );
2187        let doh: SocketAddr = "100.64.0.5:8080".parse().unwrap();
2188        view.exit_doh = Some(doh);
2189        assert_eq!(
2190            recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2191            RecursivePlan::Doh(doh)
2192        );
2193    }
2194
2195    #[test]
2196    fn recursive_plan_keeps_use_with_exit_node_resolvers_local() {
2197        // Even with an exit node active, resolvers flagged `use_with_exit_node` stay local (Go keeps
2198        // UseWithExitNode resolvers). The plan forwards to those over UDP, never delegating to DoH.
2199        let mut view = view_with_routes(
2200            std::collections::BTreeMap::new(),
2201            vec![udp_exit("10.0.0.53:53"), udp("8.8.8.8:53")],
2202            vec![],
2203        );
2204        view.exit_doh = Some("100.64.0.5:8080".parse().unwrap());
2205        // The default upstreams the caller computed are irrelevant when kept-local resolvers exist;
2206        // the plan must use the kept-local ones.
2207        assert_eq!(
2208            recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2209            RecursivePlan::Udp(vec!["10.0.0.53:53".parse().unwrap()])
2210        );
2211    }
2212}