Skip to main content

ts_runtime/
magic_dns.rs

1//! MagicDNS responder with a split-DNS / recursive forwarder.
2//!
3//! An in-netstack DNS server bound to `100.100.100.100:53`. It is authoritative for in-tailnet
4//! peer names and control-pushed [`ExtraRecord`][ts_control::ExtraRecord]s, answering `A`/`AAAA`/
5//! `PTR` for those directly. For names it is *not* authoritative for, it brings tsnet-style
6//! split-DNS and recursive resolution:
7//!
8//! - **Split DNS** ([`DnsConfig::routes`]): the longest matching suffix route forwards the query
9//!   to one of that route's upstream resolvers. A route with an **empty** upstream list is a
10//!   negative route — names under it are `NXDOMAIN` (Go keeps them on the built-in resolver; for
11//!   us that means fail-closed unless an overlay/extra record matched first).
12//! - **Recursive** ([`DnsConfig::fallback_resolvers`] / [`DnsConfig::resolvers`]): names matching
13//!   no route are forwarded to the fallback resolvers, else the global resolvers.
14//! - **Fail closed**: if no route and no resolver is configured, an unknown name is `NXDOMAIN`.
15//!
16//! Anti-leak / IPv6-off posture: upstream forwarding binds `0.0.0.0:0` (UDP, IPv4 only) and never
17//! opens an IPv6 socket. AAAA handling is gated on [`DnsView::enable_ipv6`] (default off): with the
18//! gate OFF an AAAA query for a tailnet/overlay/self name returns NoError with an empty answer
19//! (NODATA) rather than the overlay v6 address — answering a v6 the IPv4-only client can't route
20//! would only create dead connections and a fingerprint. With the gate ON, AAAA is answered from
21//! overlay data (the v6 overlay addr), as historically. AAAA for tailnet names is never forwarded
22//! to a recursive upstream regardless of the gate.
23//!
24//! - MagicDNS disabled (`dns_config == None` or `magic_dns == false`), OR the node does not accept
25//!   the tailnet DNS config ([`DnsView::accept_dns`] is `false`, i.e. `--accept-dns` / `CorpDNS`
26//!   off) => `REFUSED` for every query (the responder serves nothing, mirroring Go applying an empty
27//!   `dns.Config` when `CorpDNS` is off).
28//! - A qtype/class we don't serve authoritatively (anything but IN-class A/AAAA/PTR — TXT, SRV, MX,
29//!   HTTPS/SVCB, a CHAOS-class query, …) => NODATA (empty NOERROR) for a tailnet-authoritative name,
30//!   forwarded verbatim to upstream for an off-tailnet name — exactly like Go's resolver, NOT
31//!   `REFUSED` (a stub reads REFUSED as "won't serve me" and abandons the resolver). Tailnet reverse
32//!   zones (CGNAT `in-addr.arpa` / any `ip6.arpa`) still fail closed to NXDOMAIN for every qtype
33//!   (never forwarded — anti-leak).
34//! - Malformed query => dropped (no response).
35
36use std::{
37    net::{IpAddr, Ipv4Addr, SocketAddr},
38    sync::Arc,
39    time::Duration,
40};
41
42use kameo::{
43    actor::ActorRef,
44    message::{Context, Message},
45};
46use netstack::{CreateSocket, netcore::Channel};
47use tokio::{
48    sync::{Semaphore, watch},
49    task::JoinSet,
50    time::timeout,
51};
52use ts_control::{DnsConfig, DnsResolver, Node};
53use ts_dns_wire::{Name, QType, RData, Rcode, decode_query, encode_response};
54
55use crate::{
56    Error,
57    env::Env,
58    peer_tracker::{PeerDb, PeerState},
59};
60
61/// How long to wait for an upstream resolver to answer a forwarded query before giving up.
62const UPSTREAM_TIMEOUT: Duration = Duration::from_secs(5);
63/// Cap on concurrent in-flight forwarded queries on the local `100.100.100.100:53` responder.
64///
65/// Each forward is spawned onto a task that holds an overlay UDP socket until the upstream answers
66/// or [`UPSTREAM_TIMEOUT`] elapses. Without a cap, a local/tailnet client spraying distinct
67/// forwardable names opens unbounded concurrent overlay sockets + tasks (a resource-exhaustion DoS
68/// on a slow/black-holed upstream, since each lingers for the full timeout). Bound it the same way
69/// the peerAPI DoH server bounds its request handlers ([`crate::peerapi`]'s `MAX_INFLIGHT`): acquire
70/// a permit before spawning and drop the query fail-closed when saturated. A dropped DNS query is a
71/// benign outcome — the stub resolver simply retries or times out — and Go's resolver likewise
72/// bounds outstanding forwards rather than spawning without limit.
73const MAX_INFLIGHT_FORWARDS: usize = 512;
74/// Cap on a forwarded upstream response we read into memory (a single UDP datagram).
75///
76/// Matches Go's forwarder read buffer (`maxResponseBytes`, ~4 KiB). The client's query is forwarded
77/// verbatim, so a client advertising a large EDNS UDP size can elicit a legitimately large
78/// (1300–4096 byte) UDP answer (big TXT sets, DNSSEC, many-record round-robins). Capping at the old
79/// 1232 truncated those and set TC, forcing a TCP retry this fork's UDP-only forwarder can't serve —
80/// so the large answer became unreachable. 4096 relays them intact.
81const MAX_UPSTREAM_RESPONSE: usize = 4096;
82
83/// The MagicDNS service IP. The netstack interface owns this address, so a `udp_bind` here
84/// receives the tailnet's DNS traffic.
85const MAGIC_DNS_IP: Ipv4Addr = Ipv4Addr::new(100, 100, 100, 100);
86/// The DNS service port.
87const MAGIC_DNS_PORT: u16 = 53;
88
89/// The latest view the answer loop resolves queries against.
90///
91/// Updated by the actor's message handlers (from control `StateUpdate` and peer `PeerState`
92/// updates) and read fresh by the answer loop for every packet.
93#[derive(Clone, Default)]
94pub(crate) struct DnsView {
95    /// The DNS configuration. `magic_dns == false` (the default) means serve nothing.
96    pub(crate) cfg: DnsConfig,
97    /// The current peer database, if we've seen a peer update.
98    pub(crate) peers: Option<Arc<PeerDb>>,
99    /// This node, if we've seen a self-node update.
100    pub(crate) self_node: Option<Node>,
101    /// The peerAPI DoH socket address of the currently-selected exit node, if one is active and can
102    /// proxy DNS ([`Node::peerapi_doh_addr`]). When set, the MagicDNS *client* serve loop delegates
103    /// recursive resolution to this address over the overlay instead of forwarding to the locally
104    /// configured upstream resolvers — so recursive DNS egresses from the exit node, not this host.
105    ///
106    /// Only consumed by the local MagicDNS responder's serve loop (the client side). The peerAPI
107    /// DoH *server* shares this same view but ignores this field: an exit-node DNS proxy resolves
108    /// recursively itself (gated by `forward_exit_egress`), it never re-delegates to its own exit
109    /// node. `None` means no active exit node / no DoH delegation — recursion stays local.
110    pub(crate) exit_doh: Option<SocketAddr>,
111    /// Whether IPv6 is enabled on the tailnet overlay (from [`Env::enable_ipv6`], default `false`).
112    ///
113    /// Governs the AAAA answer path only: with the gate OFF (default) an AAAA query for a
114    /// tailnet/overlay/self name is answered NoError-with-empty-answer (NODATA) instead of the
115    /// overlay v6 address; with it ON, AAAA is answered from overlay data as historically. Set once
116    /// from the runtime `Env` when the actor starts; never changes for the life of the runtime.
117    pub(crate) enable_ipv6: bool,
118    /// Whether the tailnet's DNS configuration is accepted (`--accept-dns` / `CorpDNS`, from
119    /// [`Env::accept_dns`]). When `false`, [`decide`] refuses every query (the responder serves
120    /// nothing), mirroring Go applying an empty `dns.Config` when `CorpDNS` is off — so a node can
121    /// join for connectivity without taking over DNS.
122    ///
123    /// Unlike [`enable_ipv6`](DnsView::enable_ipv6) (snapshotted once at actor spawn), this is
124    /// runtime-settable via `Device::set_accept_dns`, so it is re-read from the live
125    /// [`Env::accept_dns`] cell on **every** view rebuild (the `StateUpdate` and `PeerState`
126    /// handlers), not just at spawn — otherwise a runtime toggle would never reach the served view.
127    pub(crate) accept_dns: bool,
128}
129
130impl DnsView {
131    /// Find the node (peer or self) that answers to `name`, case/dot-insensitively.
132    fn node_by_name(&self, name: &str) -> Option<Node> {
133        if let Some(node) = self
134            .peers
135            .as_ref()
136            .and_then(|p| p.get(&name).map(|(_, n)| n.clone()))
137        {
138            return Some(node);
139        }
140
141        self.self_node
142            .as_ref()
143            .filter(|n| n.matches_name(name))
144            .cloned()
145    }
146
147    /// Resolve `canon` to an answer address of the requested family. A tailnet peer/self match
148    /// wins first — tried as written and then qualified by each tailnet search domain (so a
149    /// short/partially-qualified name like `host` or `host.user` still resolves to
150    /// `host.user.ts.net`). Failing that, a control-pushed [`ExtraRecord`] of the matching family
151    /// answers, matched as a fully-qualified name only (no search-domain expansion — like Go tsnet,
152    /// ExtraRecords are authoritative FQDN entries, not subject to client search-list qualification).
153    /// Still fail-closed: only ever resolves to a known tailnet peer/self or an explicitly
154    /// control-pushed static record — never anything else.
155    fn resolve_addr(&self, canon: &str, want_v4: bool) -> Option<IpAddr> {
156        let addr_of = |node: Node| -> IpAddr {
157            if want_v4 {
158                IpAddr::from(node.tailnet_address.ipv4.addr())
159            } else {
160                IpAddr::from(node.tailnet_address.ipv6.addr())
161            }
162        };
163
164        if let Some(node) = self.node_by_name(canon) {
165            return Some(addr_of(node));
166        }
167        for suffix in &self.cfg.search_domains {
168            if let Some(node) = self.node_by_name(&format!("{canon}.{suffix}")) {
169                return Some(addr_of(node));
170            }
171        }
172
173        // Control-pushed static records match the fully-qualified query name only.
174        self.cfg.extra_records.iter().find_map(|rec| {
175            let family_ok = matches!(
176                (rec.addr, want_v4),
177                (IpAddr::V4(_), true) | (IpAddr::V6(_), false)
178            );
179            (rec.name == canon && family_ok).then_some(rec.addr)
180        })
181    }
182
183    /// Find the node (peer or self) that owns the tailnet IP `ip`.
184    fn node_by_ip(&self, ip: IpAddr) -> Option<Node> {
185        if let Some(node) = self
186            .peers
187            .as_ref()
188            .and_then(|p| p.get(&ip).map(|(_, n)| n.clone()))
189        {
190            return Some(node);
191        }
192
193        self.self_node
194            .as_ref()
195            .filter(|n| {
196                IpAddr::from(n.tailnet_address.ipv4.addr()) == ip
197                    || IpAddr::from(n.tailnet_address.ipv6.addr()) == ip
198            })
199            .cloned()
200    }
201
202    /// Decide how to resolve a non-overlay `name` against the split-DNS routes and recursive
203    /// resolvers, returning the upstreams to forward to.
204    ///
205    /// Longest-suffix wins among [`DnsConfig::routes`]: a route's suffix matches `name` if `name`
206    /// equals it or ends with `.suffix`. A matched route with a non-empty upstream list forwards
207    /// there; a matched route with an **empty** list is a negative route ([`Upstreams::Block`] =>
208    /// NXDOMAIN). With no route match, [`DnsConfig::fallback_resolvers`] (preferred) or
209    /// [`DnsConfig::resolvers`] resolve recursively; if neither is configured we stay fail-closed
210    /// ([`Upstreams::None`] => NXDOMAIN).
211    fn route_for(&self, name: &str) -> Upstreams<'_> {
212        let mut best: Option<(&str, &Vec<DnsResolver>)> = None;
213        for (suffix, upstreams) in &self.cfg.routes {
214            if suffix_matches(name, suffix) && best.is_none_or(|(b, _)| suffix.len() > b.len()) {
215                best = Some((suffix.as_str(), upstreams));
216            }
217        }
218
219        if let Some((_, upstreams)) = best {
220            return if upstreams.is_empty() {
221                Upstreams::Block
222            } else {
223                // A deliberately-configured split-DNS route: not eligible for exit-node DoH
224                // delegation — these upstreams (e.g. an internal resolver reachable over a subnet
225                // route) must keep receiving the query directly.
226                Upstreams::Route(upstreams)
227            };
228        }
229
230        if !self.cfg.fallback_resolvers.is_empty() {
231            return Upstreams::Recursive(&self.cfg.fallback_resolvers);
232        }
233        if !self.cfg.resolvers.is_empty() {
234            return Upstreams::Recursive(&self.cfg.resolvers);
235        }
236        Upstreams::None
237    }
238}
239
240/// The upstreams a non-overlay query should be forwarded to (or why it should not be forwarded).
241enum Upstreams<'a> {
242    /// A split-DNS route matched: forward to these route-specific upstreams (never DoH-delegated).
243    Route(&'a [DnsResolver]),
244    /// No route matched: forward to these recursive (fallback/global) resolvers. Eligible for
245    /// exit-node DoH delegation in the client serve loop.
246    Recursive(&'a [DnsResolver]),
247    /// A negative split-DNS route matched: do not resolve (NXDOMAIN).
248    Block,
249    /// No route and no resolver configured: fail closed (NXDOMAIN).
250    None,
251}
252
253/// What the (sync) decision step concluded for a query: either a complete response to send back,
254/// or a request to forward the original query to an upstream resolver.
255pub(crate) enum Decision {
256    /// A fully-formed response is ready to send.
257    Reply(Vec<u8>),
258    /// Forward the original query datagram to one of these upstream UDP resolvers; on success
259    /// relay the upstream answer, on failure/timeout answer NXDOMAIN with the given id+question.
260    Forward {
261        /// UDP upstreams to try, in order.
262        upstreams: Vec<SocketAddr>,
263        /// The original query bytes to forward verbatim.
264        query: Vec<u8>,
265        /// Fallback NXDOMAIN response if every upstream fails.
266        nxdomain: Vec<u8>,
267        /// Whether this is a *recursive* (catch-all fallback/global resolver) forward, as opposed
268        /// to a deliberately-configured split-DNS route. Only recursive forwards are eligible for
269        /// exit-node DoH delegation in the client serve loop (see [`DnsView::exit_doh`]); split-DNS
270        /// routes always stay on their configured upstreams (typically subnet-reachable internal
271        /// resolvers). The peerAPI DoH *server* ignores this flag entirely.
272        recursive: bool,
273    },
274}
275
276/// Whether `name` is `suffix` or sits under it at a label boundary: `"a.corp"` matches `"corp"`,
277/// `"acorp"` does not. An **empty** suffix never matches (defense-in-depth: an empty suffix would
278/// otherwise make `ends_with("")` match every name and either over-route or treat everything as a
279/// tailnet name — both leak-prone).
280fn suffix_matches(name: &str, suffix: &str) -> bool {
281    if suffix.is_empty() {
282        return false;
283    }
284    name == suffix
285        || (name.len() > suffix.len()
286            && name.ends_with(suffix)
287            && name.as_bytes()[name.len() - suffix.len() - 1] == b'.')
288}
289
290/// Returns `true` if `name` falls under one of the tailnet search domains. Such names are
291/// authoritative MagicDNS names and are NEVER forwarded to an upstream resolver — anti-leak: a
292/// tailnet name (and the fact that it was queried) must not escape to a third-party resolver.
293fn is_tailnet_name(view: &DnsView, name: &str) -> bool {
294    view.cfg
295        .search_domains
296        .iter()
297        .any(|suffix| suffix_matches(name, suffix))
298}
299
300/// Whether `name` is an IPv6 reverse-DNS (`PTR`) name (ends in `ip6.arpa`). This fork is IPv4-only
301/// on the tailnet; an IPv6 reverse lookup must NEVER be forwarded to a third-party resolver
302/// (anti-leak: it would reveal that a tailnet v6 address — e.g. a ULA `fd7a:…` — was probed). All
303/// such queries fail closed to NXDOMAIN.
304fn is_ip6_arpa(name: &str) -> bool {
305    suffix_matches(name, "ip6.arpa")
306}
307
308/// Whether `ip` is in the Tailscale CGNAT range `100.64.0.0/10` (RFC 6598, the tailnet IPv4 space).
309/// Reverse (`PTR`) queries for these addresses are authoritative to MagicDNS: if no peer owns the
310/// IP we fail closed to NXDOMAIN rather than forwarding the probe to a third-party resolver.
311fn is_tailnet_cgnat(ip: Ipv4Addr) -> bool {
312    let o = ip.octets();
313    o[0] == 100 && (64..=127).contains(&o[1])
314}
315
316/// Decide what to do with a single DNS query against `view`: either a complete response is ready
317/// ([`Decision::Reply`]), the query should be forwarded to upstream resolvers
318/// ([`Decision::Forward`]), or the packet should be dropped without answering (`None`).
319///
320/// Pure (no I/O), factored out of the socket loop so it can be unit-tested without a netstack. It
321/// never panics and fails closed: an unknown, unroutable, or tailnet-suffix name resolves to
322/// NXDOMAIN rather than leaking to an upstream resolver.
323pub(crate) fn decide(view: &DnsView, buf: &[u8]) -> Option<Decision> {
324    // Malformed / non-query input is dropped: we never answer something we can't parse.
325    let query = decode_query(buf).ok()?;
326    let q = &query.question;
327    let id = query.id;
328
329    let reply = |rcode, answers: &[RData]| Decision::Reply(encode_response(id, q, rcode, answers));
330
331    // Fail closed: MagicDNS off, or the node doesn't accept the tailnet's DNS config
332    // (`--accept-dns` / `CorpDNS` is false) => serve nothing. The `accept_dns` gate mirrors Go
333    // applying an empty `dns.Config` when `CorpDNS` is off: the node ignores the control-pushed DNS
334    // config and refuses every query. This one read site covers the netstack responder, the peerAPI
335    // DoH server that shares the view, and (via `tun_actor::plan_intercept`) the TUN query path.
336    if !view.cfg.magic_dns || !view.accept_dns {
337        return Some(reply(Rcode::Refused, &[]));
338    }
339
340    let canon = q.name.to_canon();
341
342    // We only serve the internet (IN) class authoritatively. A non-IN class (CHAOS, HESIOD, the
343    // ANY/255 class, ...) is NOT refused outright: Go's local resolver does no class check and
344    // forwards such a query like any other name. Treat it as an unsupported authoritative type —
345    // NODATA for a tailnet name, forward for an off-tailnet name — so a `CH TXT version.bind`
346    // diagnostic or a `qclass=ANY` probe reaches upstream instead of getting REFUSED.
347    const CLASS_IN: u16 = 1;
348    if q.qclass != CLASS_IN {
349        return Some(forward_or_nodata(view, &canon, buf, id, q));
350    }
351
352    Some(match &q.qtype {
353        QType::A => match view.resolve_addr(&canon, true) {
354            Some(IpAddr::V4(v4)) => reply(Rcode::NoError, &[RData::A(v4.octets())]),
355            // No overlay/extra-record answer: try split-DNS / recursive upstreams.
356            _ => forward_or_nxdomain(view, &canon, buf, id, q),
357        },
358        QType::Aaaa => match view.resolve_addr(&canon, false) {
359            // A tailnet/overlay/self (or extra-record) AAAA match. Gate on IPv6: with IPv6 OFF
360            // (default) the client is IPv4-only, so answering with the overlay v6 address would
361            // only hand out an unroutable address — dead connections plus a fingerprint. Return
362            // NoError with an empty answer (NODATA) instead. With the gate ON, answer from overlay
363            // data as historically. We never forward this name to a recursive upstream either way:
364            // a positive overlay match is authoritative.
365            Some(IpAddr::V6(v6)) if view.enable_ipv6 => {
366                reply(Rcode::NoError, &[RData::Aaaa(v6.octets())])
367            }
368            Some(IpAddr::V6(_)) => reply(Rcode::NoError, &[]),
369            // No overlay/extra-record answer: split-DNS / recursive upstreams (off-tailnet names);
370            // tailnet names fail closed to NXDOMAIN inside `forward_or_nxdomain`.
371            _ => forward_or_nxdomain(view, &canon, buf, id, q),
372        },
373        QType::Ptr => match q.name.ptr_to_ipv4() {
374            Some(octets) => {
375                let v4: Ipv4Addr = octets.into();
376                let ip = IpAddr::V4(v4);
377                match view.node_by_ip(ip) {
378                    Some(node) => {
379                        let fqdn = node.fqdn(false);
380                        let labels: Vec<String> = fqdn.split('.').map(str::to_owned).collect();
381                        reply(Rcode::NoError, &[RData::Ptr(Name(labels))])
382                    }
383                    // Anti-leak: a reverse query for an IP in the tailnet CGNAT range
384                    // (100.64.0.0/10) that misses the peer set is authoritative-but-unknown; fail
385                    // closed to NXDOMAIN rather than leaking the probed tailnet IP upstream. Only
386                    // genuinely off-tailnet reverse queries are forwarded.
387                    None if is_tailnet_cgnat(v4) => reply(Rcode::NxDomain, &[]),
388                    None => forward_or_nxdomain(view, &canon, buf, id, q),
389                }
390            }
391            // Anti-leak / IPv4-only-tailnet: an IPv6 reverse (`ip6.arpa`) PTR must never be
392            // forwarded — relaying it would reveal that a tailnet v6 address (e.g. a ULA `fd7a:…`)
393            // was probed. Fail closed to NXDOMAIN, exactly like the IPv4 CGNAT guard above.
394            None if is_ip6_arpa(&canon) => reply(Rcode::NxDomain, &[]),
395            None => forward_or_nxdomain(view, &canon, buf, id, q),
396        },
397        // Anything else (TXT, SRV, MX, HTTPS/SVCB, CNAME, ...): we hold no authoritative record of
398        // that type, so — like Go's resolver — forward it to upstream for an off-tailnet name and
399        // return NODATA (empty NOERROR) for a tailnet-authoritative name. NOT REFUSED: a stub reads
400        // REFUSED as "this server won't serve me" and abandons the resolver, which would break
401        // ordinary client lookups (notably HTTPS/SVCB type 65, issued routinely by browsers for
402        // HTTP/3 + ECH) for the same off-tailnet names whose A/AAAA already forward.
403        QType::Other(_) => forward_or_nodata(view, &canon, buf, id, q),
404    })
405}
406
407/// For a name with no overlay answer, consult the split-DNS routes + recursive resolvers and
408/// either forward (to UDP upstreams) or fail closed with NXDOMAIN.
409///
410/// Anti-leak: a name under a tailnet search domain is authoritative and is never forwarded — it
411/// fails closed to NXDOMAIN so neither the name nor the query leaks to a third-party resolver.
412fn forward_or_nxdomain(
413    view: &DnsView,
414    canon: &str,
415    buf: &[u8],
416    id: u16,
417    q: &ts_dns_wire::Question,
418) -> Decision {
419    let nxdomain = encode_response(id, q, Rcode::NxDomain, &[]);
420
421    if is_tailnet_name(view, canon) {
422        return Decision::Reply(nxdomain);
423    }
424
425    let (resolvers, recursive) = match view.route_for(canon) {
426        Upstreams::Route(resolvers) => (resolvers, false),
427        Upstreams::Recursive(resolvers) => (resolvers, true),
428        // Negative route or nothing configured: fail closed.
429        Upstreams::Block | Upstreams::None => return Decision::Reply(nxdomain),
430    };
431
432    let upstreams: Vec<SocketAddr> = resolvers
433        .iter()
434        .map(DnsResolver::udp_addr)
435        // Anti-leak / IPv6-off: only forward over IPv4 upstreams; never open a v6 socket.
436        .filter(SocketAddr::is_ipv4)
437        .collect();
438    if upstreams.is_empty() {
439        Decision::Reply(nxdomain)
440    } else {
441        Decision::Forward {
442            upstreams,
443            query: buf.to_vec(),
444            nxdomain,
445            recursive,
446        }
447    }
448}
449
450/// The DNS query types Go's resolver explicitly leaves unimplemented for a tailnet-authoritative
451/// name, answering `RCodeNotImplemented` (NOTIMP) rather than NODATA (`net/dns/resolver/tsdns.go`
452/// `resolveLocal`: `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO`). The numeric type
453/// codes: NS=2, SOA=6, HINFO=13, AXFR=252.
454fn is_unimplemented_tailnet_qtype(qtype: &ts_dns_wire::QType) -> bool {
455    matches!(qtype, ts_dns_wire::QType::Other(2 | 6 | 13 | 252))
456}
457
458/// For a query whose *qtype/qclass* we don't serve authoritatively (anything other than an IN-class
459/// A/AAAA/PTR — e.g. TXT, SRV, MX, HTTPS/SVCB, or a CHAOS-class query): forward it to upstream like
460/// any other name, but for a tailnet-authoritative name return an empty NOERROR (NODATA) instead of
461/// NXDOMAIN — except the NS/SOA/HINFO/AXFR types Go answers NOTIMP for
462/// ([`is_unimplemented_tailnet_qtype`]).
463///
464/// This mirrors Go's resolver: an authoritative name with no record of the requested type returns
465/// `RCodeSuccess` with no answers ("the name exists, but no records of that type"), NOT NXDOMAIN and
466/// NOT REFUSED; a non-authoritative name is forwarded verbatim regardless of qtype. The fork
467/// previously REFUSED every non-A/AAAA/PTR qtype (and every non-IN class) for *all* names, which a
468/// stub resolver reads as "this server won't serve me" — so it would abandon the resolver, breaking
469/// ordinary client lookups (HTTPS/SVCB type 65 issued routinely by browsers for HTTP/3 + ECH, plus
470/// MX/TXT/SRV) for off-tailnet names that A/AAAA queries already forward. Refusing these was never an
471/// anti-leak measure (the same name's A/AAAA already egresses); it was just broken interop.
472///
473/// Anti-leak is preserved: a tailnet-suffix name still never leaves this node (NODATA, not forward),
474/// exactly as the A/AAAA path keeps a positive overlay match authoritative.
475fn forward_or_nodata(
476    view: &DnsView,
477    canon: &str,
478    buf: &[u8],
479    id: u16,
480    q: &ts_dns_wire::Question,
481) -> Decision {
482    // Authoritative tailnet name. For most unsupported types we answer NODATA (empty NOERROR) — the
483    // name exists, we just hold no record of that type. But a small set of types Go's resolver
484    // *explicitly* leaves unimplemented (`net/dns/resolver/tsdns.go` `resolveLocal`:
485    // `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO: return RCodeNotImplemented`) must
486    // answer NOTIMP, not NODATA — a `dig NS`/`SOA`/`HINFO` against the tailnet zone is otherwise a
487    // clean fingerprint distinguishing this fork from real tailscaled. Off-tailnet names are
488    // unaffected (they forward below regardless of type); this NOTIMP applies only to a name we are
489    // authoritative for.
490    if is_tailnet_name(view, canon) {
491        let rcode = if is_unimplemented_tailnet_qtype(&q.qtype) {
492            Rcode::NotImpl
493        } else {
494            Rcode::NoError
495        };
496        return Decision::Reply(encode_response(id, q, rcode, &[]));
497    }
498    // Anti-leak parity with the `QType::Ptr` arm: a reverse query for a tailnet CGNAT IPv4
499    // (100.64.0.0/10) or ANY `ip6.arpa` name must NEVER egress to an upstream resolver, regardless
500    // of qtype/class — forwarding it would reveal that a specific tailnet IP was probed. The PTR arm
501    // enforces this (NXDOMAIN) but its guards live only inside that arm; without re-checking here, an
502    // exotic-qtype (TXT/ANY/…) or non-IN-class query for a tailnet reverse name would slip through to
503    // the forward path below. Fail closed to NXDOMAIN, matching the PTR arm's disposition.
504    if is_ip6_arpa(canon) {
505        return Decision::Reply(encode_response(id, q, Rcode::NxDomain, &[]));
506    }
507    if let Some(octets) = q.name.ptr_to_ipv4()
508        && is_tailnet_cgnat(octets.into())
509    {
510        return Decision::Reply(encode_response(id, q, Rcode::NxDomain, &[]));
511    }
512    // Off-tailnet, non-reverse-zone: forward verbatim. `forward_or_nxdomain` already forwards
513    // non-tailnet names and fails closed (NXDOMAIN) when no upstream is configured/routable; reuse it
514    // (the tailnet branch above is already handled, so its tailnet→NXDOMAIN path is unreachable here).
515    forward_or_nxdomain(view, canon, buf, id, q)
516}
517
518/// Client-side plan for a *recursive* forward: keep resolving over local UDP upstreams, or delegate
519/// the query to the active exit node's peerAPI DoH endpoint over the overlay.
520#[derive(Debug, PartialEq, Eq)]
521pub(crate) enum RecursivePlan {
522    /// Forward over UDP to these upstreams. Used when no exit node is active, or when the config
523    /// has `use_with_exit_node` resolvers (kept local even with an exit node selected).
524    Udp(Vec<SocketAddr>),
525    /// Delegate the query to the exit node's peerAPI DoH server at this overlay address.
526    Doh(SocketAddr),
527}
528
529/// Decide whether a recursive forward should stay on local UDP upstreams or be delegated to the
530/// active exit node's DoH endpoint. Pure (no I/O) so the delegation rule is unit-testable.
531///
532/// - No active exit node ([`DnsView::exit_doh`] is `None`) => keep `default_upstreams` (UDP).
533/// - Exit node active, but the config has [`use_with_exit_node`][ts_control::DnsResolver::use_with_exit_node]
534///   resolvers => those resolvers stay local (Go keeps `UseWithExitNode` resolvers when an exit node
535///   is selected); forward to them over UDP, do NOT delegate.
536/// - Exit node active, no kept-local resolvers => delegate to the exit node's DoH. Recursive DNS
537///   then egresses from the exit node, not this host (the whole point of routing through an exit
538///   node: this node's real IP is never used to resolve the peer's public names).
539pub(crate) fn recursive_plan(view: &DnsView, default_upstreams: Vec<SocketAddr>) -> RecursivePlan {
540    let Some(doh) = view.exit_doh else {
541        return RecursivePlan::Udp(default_upstreams);
542    };
543    let kept: Vec<SocketAddr> = view
544        .cfg
545        .resolvers_with_exit_node()
546        .map(DnsResolver::udp_addr)
547        // Anti-leak / IPv6-off: only ever resolve over IPv4 upstreams; never open a v6 socket.
548        .filter(SocketAddr::is_ipv4)
549        .collect();
550    if kept.is_empty() {
551        RecursivePlan::Doh(doh)
552    } else {
553        RecursivePlan::Udp(kept)
554    }
555}
556
557/// Cap a forwarded upstream response to a single UDP datagram ([`MAX_UPSTREAM_RESPONSE`]). When the
558/// response is too large it is truncated mid-message, so we set the `TC` (truncation) flag in the
559/// DNS header (byte 2, bit `0x02`) telling the stub resolver to retry over TCP — relaying a chopped
560/// answer without `TC` would surface a malformed-but-"complete" message. The flag is only set when
561/// truncation actually occurs.
562fn cap_response(mut resp: Vec<u8>) -> Vec<u8> {
563    if resp.len() > MAX_UPSTREAM_RESPONSE {
564        resp.truncate(MAX_UPSTREAM_RESPONSE);
565        // The header is 12 bytes; the TC bit lives in the second flags byte (header byte 2). A
566        // capped datagram is always >= the header length, but guard anyway to never panic.
567        if let Some(flags_hi) = resp.get_mut(2) {
568            *flags_hi |= 0x02;
569        }
570    }
571    resp
572}
573
574/// The byte length of a fixed DNS header.
575const DNS_HEADER_LEN: usize = 12;
576
577/// Return the byte range of the first question section (QNAME + QTYPE + QCLASS) within `msg`,
578/// starting just after the 12-byte header. Returns [`None`] if the name is malformed, uses a
579/// compression pointer (illegal in a question), or runs past the buffer. Used to byte-compare a
580/// forwarded query's question against the upstream response's question.
581fn question_range(msg: &[u8]) -> Option<std::ops::Range<usize>> {
582    let mut off = DNS_HEADER_LEN;
583    // Walk the QNAME label sequence to the terminating root label (0x00).
584    loop {
585        let len = *msg.get(off)? as usize;
586        // A compression pointer (top two bits set) is not valid in a question section.
587        if len & 0xC0 != 0 {
588            return None;
589        }
590        off += 1;
591        if len == 0 {
592            break; // root label: QNAME complete.
593        }
594        off = off.checked_add(len)?;
595        if off > msg.len() {
596            return None;
597        }
598    }
599    // QTYPE (2) + QCLASS (2) follow the name.
600    let end = off.checked_add(4)?;
601    if end > msg.len() {
602        return None;
603    }
604    Some(DNS_HEADER_LEN..end)
605}
606
607/// Whether `resp` is a plausible DNS response to `query`: same 16-bit transaction id, the QR
608/// (response) bit set, and a byte-identical question section (QNAME + QTYPE + QCLASS). Both buffers
609/// carry the DNS header in the first 12 bytes (id at [0..2], flags at [2..4], QR is the high bit of
610/// byte 2). Used to reject off-path/forged datagrams before relaying them back to the stub resolver
611/// as authoritative: matching only the id + QR lets an injector that guesses the id swap in an
612/// answer for a different question, so we also require the echoed question to match.
613fn response_matches_query(query: &[u8], resp: &[u8]) -> bool {
614    if query.len() < DNS_HEADER_LEN || resp.len() < DNS_HEADER_LEN {
615        return false;
616    }
617    let id_matches = query[0..2] == resp[0..2];
618    let is_response = resp[2] & 0x80 != 0;
619    if !id_matches || !is_response {
620        return false;
621    }
622    // The response must echo the exact question we asked. Parse both question sections and compare
623    // their bytes; a parse failure on either side is treated as a non-match (fail closed).
624    match (question_range(query), question_range(resp)) {
625        (Some(q), Some(r)) => query[q] == resp[r],
626        _ => false,
627    }
628}
629
630/// Forward `query` to each upstream in order over the **overlay** netstack, returning the first
631/// well-formed response, or `nxdomain` if every upstream times out or errors.
632///
633/// Anti-leak: forwarding goes through the overlay netstack `channel` (a fresh `0.0.0.0:0` overlay
634/// UDP socket per query), NEVER a host socket — so the real origin IP can't leak to the resolver,
635/// and split-DNS upstreams reachable only over the tailnet/subnet-router work. Each upstream is
636/// bounded by [`UPSTREAM_TIMEOUT`]; responses are capped at [`MAX_UPSTREAM_RESPONSE`].
637pub(crate) async fn forward_query(
638    channel: &Channel,
639    upstreams: &[SocketAddr],
640    query: &[u8],
641    nxdomain: Vec<u8>,
642) -> Vec<u8> {
643    for upstream in upstreams {
644        let socket = match channel
645            .udp_bind(SocketAddr::from((Ipv4Addr::UNSPECIFIED, 0)))
646            .await
647        {
648            Ok(s) => s,
649            Err(e) => {
650                tracing::warn!(error = %e, %upstream, "magic dns upstream bind failed");
651                continue;
652            }
653        };
654
655        if let Err(e) = socket.send_to(*upstream, query).await {
656            tracing::warn!(error = %e, %upstream, "magic dns upstream send failed");
657            continue;
658        }
659
660        match timeout(UPSTREAM_TIMEOUT, socket.recv_from_bytes()).await {
661            Ok(Ok((from, resp))) if !resp.is_empty() => {
662                // Anti-poisoning: only accept a datagram that came from the upstream we queried
663                // and whose DNS header matches this query (same transaction id, QR=response bit
664                // set). An off-path injector racing the real answer is otherwise relayed straight
665                // back to the stub resolver as authoritative.
666                if from.ip() != upstream.ip() || !response_matches_query(query, &resp) {
667                    tracing::debug!(%upstream, %from, "magic dns dropping unsolicited/mismatched response");
668                    continue;
669                }
670                return cap_response(resp.to_vec());
671            }
672            Ok(Ok(_)) => continue,
673            Ok(Err(e)) => {
674                tracing::warn!(error = %e, %upstream, "magic dns upstream recv failed");
675                continue;
676            }
677            Err(_) => {
678                tracing::debug!(%upstream, "magic dns upstream timed out");
679                continue;
680            }
681        }
682    }
683    nxdomain
684}
685
686/// Run the receive/answer loop for the bound socket until it (or the netstack) goes away.
687///
688/// Authoritative answers are sent inline. Forwarded queries are handled on spawned tasks (each
689/// cloning the overlay `channel`) so a slow upstream never blocks other queries.
690async fn serve(
691    socket: netstack::netsock::UdpSocket,
692    rx: watch::Receiver<Arc<DnsView>>,
693    channel: Channel,
694) {
695    let socket = Arc::new(socket);
696    let mut forwards = JoinSet::new();
697    // Bounds concurrent in-flight forwards (see `MAX_INFLIGHT_FORWARDS`); a permit is held for the
698    // lifetime of each spawned forward task and released on completion.
699    let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
700    loop {
701        let (src, buf) = match socket.recv_from_bytes().await {
702            Ok(pkt) => pkt,
703            Err(e) => {
704                tracing::warn!(error = %e, "magic dns socket recv failed, stopping responder");
705                return;
706            }
707        };
708
709        // Read the freshest view per packet.
710        let view = rx.borrow().clone();
711
712        match decide(&view, &buf) {
713            // Malformed query: drop silently.
714            None => continue,
715            Some(Decision::Reply(resp)) => {
716                if let Err(e) = socket.send_to(src, &resp).await {
717                    tracing::warn!(error = %e, %src, "magic dns response send failed");
718                }
719            }
720            Some(Decision::Forward {
721                upstreams,
722                query,
723                nxdomain,
724                recursive,
725            }) => {
726                // A recursive forward is eligible for exit-node DoH delegation; a split-DNS route
727                // always stays on its configured upstreams. Decide the plan against the current
728                // view so a query routed while an exit node is active egresses from that exit node.
729                let plan = if recursive {
730                    recursive_plan(&view, upstreams)
731                } else {
732                    RecursivePlan::Udp(upstreams)
733                };
734                // Fail closed at the in-flight cap: drop the query (the stub resolver retries or
735                // times out) rather than spawn an unbounded task that pins an overlay socket for up
736                // to UPSTREAM_TIMEOUT. The permit is moved into the task as a named `_permit` binding
737                // (NOT `let _ =`, which would drop it immediately) so it is released only when the
738                // task body completes.
739                let Ok(permit) = inflight.clone().try_acquire_owned() else {
740                    tracing::warn!(
741                        %src,
742                        max = MAX_INFLIGHT_FORWARDS,
743                        "magic dns drop: at max in-flight forwarded queries"
744                    );
745                    continue;
746                };
747                let socket = socket.clone();
748                let channel = channel.clone();
749                forwards.spawn(async move {
750                    let _permit = permit;
751                    let resp = match plan {
752                        RecursivePlan::Udp(upstreams) => {
753                            forward_query(&channel, &upstreams, &query, nxdomain).await
754                        }
755                        RecursivePlan::Doh(doh_addr) => {
756                            crate::peerapi_doh::forward_doh(&channel, doh_addr, &query, nxdomain)
757                                .await
758                        }
759                    };
760                    if let Err(e) = socket.send_to(src, &resp).await {
761                        tracing::warn!(error = %e, %src, "magic dns forwarded response send failed");
762                    }
763                });
764            }
765        }
766
767        // Reap finished forward tasks without blocking. The unreaped completed-handle backlog is
768        // bounded by MAX_INFLIGHT_FORWARDS (a task spawns only after acquiring a permit, and there
769        // are at most that many), so this bounds JoinSet memory too — not just the reap cadence.
770        while forwards.try_join_next().is_some() {}
771    }
772}
773
774/// The MagicDNS responder actor.
775///
776/// Subscribes to control state (for the DNS config + self node) and peer state (for the peer
777/// database), keeping a [`DnsView`] that the spawned answer loop reads for every query.
778pub struct MagicDnsActor {
779    /// Keeps the socket-serving task alive for the lifetime of the actor.
780    _joinset: JoinSet<()>,
781    /// The latest view, shared with the answer loop.
782    view_tx: watch::Sender<Arc<DnsView>>,
783    /// The runtime [`Env`], retained so each view rebuild (the `StateUpdate` / `PeerState` handlers)
784    /// can re-read the live [`Env::accept_dns`] cell. Unlike `enable_ipv6` (snapshotted once at
785    /// spawn), `accept_dns` is runtime-settable via `Device::set_accept_dns`, so it must be read at
786    /// rebuild time — not captured once — for a toggle to reach the served view.
787    env: Env,
788    /// The overlay channel, retained so the [`Query`] handler can run a query through the same
789    /// forward path the serve loop uses ([`forward_query`] / [`forward_doh`], both binding
790    /// `0.0.0.0:0` on this channel — never a host socket).
791    channel: Channel,
792}
793
794/// A programmatic DNS query routed through the live MagicDNS responder (the `100.100.100.100` path),
795/// for [`Device::query_dns`](crate::Device::query_dns). The handler synthesizes a query packet and
796/// drives it through the exact same [`decide`]/forward logic as an on-the-wire query, so the result
797/// (and its anti-leak posture) matches what a tailnet client would observe.
798pub struct Query {
799    /// The canonical name to resolve (e.g. `example.com`, no trailing dot).
800    pub name: String,
801    /// The DNS query type (`1`=A, `28`=AAAA, `12`=PTR, or any other RFC 1035 TYPE).
802    pub qtype: u16,
803}
804
805/// The outcome of a `Query`: the raw DNS response bytes, the RCODE, and which upstream resolvers
806/// (if any) were consulted. The response is returned as raw bytes (matching Go `LocalClient.QueryDNS`)
807/// rather than parsed records — this fork's wire codec has no answer-record decoder.
808///
809/// (`Query` is the crate-internal actor message; not linked here as it is a private item — a
810/// `pub` doc cannot intra-doc-link to it without erroring under the doc-lint gate.)
811#[derive(Debug, Clone, kameo::Reply)]
812pub struct DnsQueryResult {
813    /// The raw DNS response datagram (header + question + any answer records).
814    pub response: Vec<u8>,
815    /// The RCODE from the response header's low 4 bits (`0`=NoError, `2`=SERVFAIL, `3`=NXDOMAIN,
816    /// `5`=Refused, …).
817    pub rcode: u8,
818    /// The upstream resolver(s) the query was forwarded to. For a UDP forward this is the candidate
819    /// list tried in order (the forwarder returns on the first that answers); for an exit-node DoH
820    /// forward it is the single DoH endpoint. Empty for a locally-answered query (an authoritative
821    /// tailnet name, a NODATA, or a fail-closed NXDOMAIN — nothing egressed).
822    pub resolvers_consulted: Vec<SocketAddr>,
823}
824
825impl kameo::Actor for MagicDnsActor {
826    type Args = (Env, Channel);
827    type Error = Error;
828
829    async fn on_start(
830        (env, channel): Self::Args,
831        slf: ActorRef<Self>,
832    ) -> Result<Self, Self::Error> {
833        env.subscribe::<Arc<ts_control::StateUpdate>>(&slf).await?;
834        env.subscribe::<Arc<PeerState>>(&slf).await?;
835        env.subscribe::<crate::route_updater::ActiveExitNode>(&slf)
836            .await?;
837
838        // Seed the view with the runtime's IPv6 gate (default off) and the current accept-dns value.
839        // Subsequent control/peer updates clone-and-modify this view: `enable_ipv6` (set once here)
840        // is preserved, while `accept_dns` is re-read live from `Env` on every rebuild (it is
841        // runtime-settable). The seed value is moot — no query is served before the first
842        // StateUpdate — but seeding it keeps the pre-update view internally consistent.
843        let (view_tx, view_rx) = watch::channel(Arc::new(DnsView {
844            enable_ipv6: env.enable_ipv6,
845            accept_dns: env.accept_dns(),
846            ..DnsView::default()
847        }));
848
849        let mut joinset = JoinSet::new();
850
851        // Bind the MagicDNS socket. If the bind fails we still start (fail closed: the actor just
852        // never answers anything) so a transient bind error doesn't take down the runtime.
853        let addr = SocketAddr::from((MAGIC_DNS_IP, MAGIC_DNS_PORT));
854        match channel.udp_bind(addr).await {
855            Ok(socket) => {
856                tracing::debug!(%addr, "magic dns responder bound");
857                joinset.spawn(serve(socket, view_rx.clone(), channel.clone()));
858            }
859            Err(e) => {
860                tracing::error!(error = %e, %addr, "magic dns udp bind failed; responder inert");
861            }
862        }
863
864        // When this node advertises a peerAPI port, run the single peerAPI server on the same shared
865        // view. It routes `/dns-query` to the exit-node DoH handler (recursive resolution gated by
866        // `forward_exit_egress`, see `peerapi_doh`) and `/v0/put/<name>` to the Taildrop receive
867        // handler when a store is configured (access-gated, fail-closed, see `peerapi`).
868        if let Some(port) = env.peerapi_port {
869            let channel = channel.clone();
870            let view_rx = view_rx.clone();
871            let forward_exit_egress = env.forward_exit_egress;
872            let taildrop = env.taildrop_store.clone();
873            let funnel_ingress = env.funnel_ingress.clone();
874            joinset.spawn(crate::peerapi::serve(
875                channel,
876                port,
877                view_rx,
878                forward_exit_egress,
879                taildrop,
880                funnel_ingress,
881            ));
882        }
883
884        Ok(Self {
885            _joinset: joinset,
886            view_tx,
887            env,
888            channel,
889        })
890    }
891}
892
893/// A bare SERVFAIL response header for a [`Query`] whose name could not be encoded into a
894/// well-formed query (a non-ASCII label or an over-255-byte name). A 12-byte header with QR=1 (this
895/// is a response) and RCODE=2 (server failure); no question or answer section (we never produced a
896/// parseable question). Lets `query_dns` return a definite, honest RCODE instead of an empty buffer
897/// that would read back as a fabricated NoError.
898fn servfail_response() -> Vec<u8> {
899    let mut resp = vec![0u8; 12];
900    // Flags: QR=1 (byte 2, 0x80) + RCODE=2 (low nibble of byte 3). All other bits clear.
901    resp[2] = 0x80;
902    resp[3] = 0x02;
903    resp
904}
905
906impl Message<Query> for MagicDnsActor {
907    type Reply = DnsQueryResult;
908
909    async fn handle(&mut self, query: Query, _ctx: &mut Context<Self, Self::Reply>) -> Self::Reply {
910        // Synthesize a query packet and drive it through the SAME decide/forward path the serve loop
911        // uses, against the freshest view — so the result and its anti-leak posture exactly match an
912        // on-the-wire query. The id is fixed (0): a programmatic query has no concurrent-demux need,
913        // and `response_matches_query` validates the echoed id against this same buffer.
914        //
915        // Normalize the name into labels: strip a single trailing dot (an FQDN's root marker — Go's
916        // `dnsname.ToFQDN` does the same) and drop empty labels. An empty label would otherwise encode
917        // as a lone `0x00`, identical to the QNAME root terminator, truncating the wire query and
918        // corrupting the QTYPE/QCLASS that follow.
919        let trimmed = query.name.strip_suffix('.').unwrap_or(&query.name);
920        let labels: Vec<String> = trimmed
921            .split('.')
922            .filter(|label| !label.is_empty())
923            .map(str::to_owned)
924            .collect();
925        let qtype = match query.qtype {
926            1 => ts_dns_wire::QType::A,
927            28 => ts_dns_wire::QType::Aaaa,
928            12 => ts_dns_wire::QType::Ptr,
929            other => ts_dns_wire::QType::Other(other),
930        };
931        // Class IN (1) — the only class the responder serves authoritatively (a non-IN class still
932        // forwards via `forward_or_nodata`, matching the on-the-wire path).
933        let buf = ts_dns_wire::encode_query(0, &ts_dns_wire::Name(labels), &qtype, 1);
934
935        let view = self.view_tx.borrow().clone();
936
937        let (response, resolvers_consulted) = match decide(&view, &buf) {
938            // `decide` returns `None` only when `decode_query` rejects the buffer we just built. With
939            // the name normalized above that can still happen for a name `encode_query` accepts but
940            // `decode_query` rejects — a non-ASCII/IDN label (the caller must pass punycode) or a name
941            // whose wire form exceeds 255 bytes. Surface a SERVFAIL (RCODE 2: "could not process")
942            // rather than an empty buffer that would read back as a fabricated NoError. The serve loop
943            // silently drops here (the on-wire client times out); a programmatic caller gets a
944            // definite, honest error instead.
945            None => (servfail_response(), Vec::new()),
946            Some(Decision::Reply(resp)) => (resp, Vec::new()),
947            Some(Decision::Forward {
948                upstreams,
949                query,
950                nxdomain,
951                recursive,
952            }) => {
953                let plan = if recursive {
954                    recursive_plan(&view, upstreams)
955                } else {
956                    RecursivePlan::Udp(upstreams)
957                };
958                match plan {
959                    RecursivePlan::Udp(upstreams) => {
960                        let resp = forward_query(&self.channel, &upstreams, &query, nxdomain).await;
961                        (resp, upstreams)
962                    }
963                    RecursivePlan::Doh(doh_addr) => {
964                        let resp = crate::peerapi_doh::forward_doh(
965                            &self.channel,
966                            doh_addr,
967                            &query,
968                            nxdomain,
969                        )
970                        .await;
971                        // The query egressed via the exit node's DoH endpoint, not a local UDP
972                        // upstream — report the DoH address as the resolver consulted.
973                        (resp, vec![doh_addr])
974                    }
975                }
976            }
977        };
978
979        // RCODE is the low 4 bits of the second flags byte (header byte 3).
980        let rcode = response.get(3).map(|b| b & 0x0F).unwrap_or(0);
981
982        DnsQueryResult {
983            response,
984            rcode,
985            resolvers_consulted,
986        }
987    }
988}
989
990impl Message<Arc<ts_control::StateUpdate>> for MagicDnsActor {
991    type Reply = ();
992
993    async fn handle(
994        &mut self,
995        update: Arc<ts_control::StateUpdate>,
996        _ctx: &mut Context<Self, Self::Reply>,
997    ) {
998        // Re-read the live accept-dns cell on every rebuild (it is runtime-settable via
999        // `Device::set_accept_dns`); `enable_ipv6` is preserved from the seed (set once at spawn).
1000        let accept_dns = self.env.accept_dns();
1001        self.view_tx.send_modify(|view| {
1002            let mut next = (**view).clone();
1003            next.cfg = update.dns_config.clone().unwrap_or_default();
1004            next.self_node = update.node.clone();
1005            next.accept_dns = accept_dns;
1006            *view = Arc::new(next);
1007        });
1008    }
1009}
1010
1011impl Message<Arc<PeerState>> for MagicDnsActor {
1012    type Reply = ();
1013
1014    async fn handle(&mut self, state: Arc<PeerState>, _ctx: &mut Context<Self, Self::Reply>) {
1015        // Re-read the live accept-dns cell on every rebuild: `Device::set_accept_dns` triggers a
1016        // `RepublishState` that lands here, so this is the path that re-applies the gate after a
1017        // runtime toggle (covers the netstack responder AND the peerAPI DoH server sharing the view).
1018        let accept_dns = self.env.accept_dns();
1019        self.view_tx.send_modify(|view| {
1020            let mut next = (**view).clone();
1021            next.peers = Some(state.peers.clone());
1022            next.accept_dns = accept_dns;
1023            *view = Arc::new(next);
1024        });
1025    }
1026}
1027
1028impl Message<crate::route_updater::ActiveExitNode> for MagicDnsActor {
1029    type Reply = ();
1030
1031    async fn handle(
1032        &mut self,
1033        active: crate::route_updater::ActiveExitNode,
1034        _ctx: &mut Context<Self, Self::Reply>,
1035    ) {
1036        // Cache the active exit node's DoH endpoint so the serve loop delegates recursive queries
1037        // to it. `None` (no exit node, or one that can't proxy DNS) keeps recursion local. Resolving
1038        // the address here — once, from the route updater's authoritative selection — means the
1039        // serve loop never re-resolves the selector.
1040        let exit_doh = active.node.as_ref().and_then(|n| n.peerapi_doh_addr());
1041        self.view_tx.send_modify(|view| {
1042            let mut next = (**view).clone();
1043            next.exit_doh = exit_doh;
1044            *view = Arc::new(next);
1045        });
1046    }
1047}
1048
1049#[cfg(test)]
1050mod tests {
1051    use ts_control::{StableNodeId, TailnetAddress};
1052
1053    use super::*;
1054
1055    /// Test wrapper: run [`decide`] and extract the reply bytes. These tests configure no
1056    /// upstream resolvers, so an unresolved name fails closed to a `Reply` (NXDOMAIN), never a
1057    /// `Forward`; a `Forward` here is a bug and panics.
1058    fn answer(view: &DnsView, buf: &[u8]) -> Option<Vec<u8>> {
1059        match decide(view, buf)? {
1060            Decision::Reply(resp) => Some(resp),
1061            Decision::Forward { .. } => panic!("unexpected forward in authoritative-only test"),
1062        }
1063    }
1064
1065    /// Build a `Node` named `host.user.ts.net` with a known v4/v6 tailnet address.
1066    fn test_node() -> Node {
1067        Node {
1068            id: 1,
1069            stable_id: StableNodeId("n1".to_string()),
1070            hostname: "host".to_string(),
1071            user_id: 0,
1072            tailnet: Some("user.ts.net".to_string()),
1073            tags: vec![],
1074            tailnet_address: TailnetAddress {
1075                ipv4: "100.64.0.1/32".parse().unwrap(),
1076                ipv6: "fd7a::1/128".parse().unwrap(),
1077            },
1078            node_key: [0u8; 32].into(),
1079            node_key_expiry: None,
1080            online: None,
1081            last_seen: None,
1082            key_signature: vec![],
1083            machine_key: None,
1084            disco_key: None,
1085            accepted_routes: vec![],
1086            underlay_addresses: vec![],
1087            derp_region: None,
1088            cap: Default::default(),
1089            cap_map: Default::default(),
1090            peerapi_port: None,
1091            peerapi_dns_proxy: false,
1092            is_wireguard_only: false,
1093            exit_node_dns_resolvers: vec![],
1094            peer_relay: false,
1095            service_vips: Default::default(),
1096        }
1097    }
1098
1099    /// A view with MagicDNS on and a single peer in the db.
1100    fn view_with_peer() -> DnsView {
1101        let mut db = PeerDb::default();
1102        db.upsert(&test_node());
1103
1104        DnsView {
1105            cfg: DnsConfig {
1106                magic_dns: true,
1107                search_domains: vec!["user.ts.net".to_string()],
1108                ..Default::default()
1109            },
1110            peers: Some(Arc::new(db)),
1111            self_node: None,
1112            exit_doh: None,
1113            enable_ipv6: false,
1114            accept_dns: true,
1115        }
1116    }
1117
1118    /// Build a raw DNS query buffer for `labels` with the given id, qtype, qclass.
1119    fn build_query(id: u16, labels: &[&str], qtype: u16, qclass: u16) -> Vec<u8> {
1120        let mut buf: Vec<u8> = Vec::new();
1121        buf.extend_from_slice(&id.to_be_bytes());
1122        buf.extend_from_slice(&0u16.to_be_bytes()); // flags: QR=0 (query)
1123        buf.extend_from_slice(&1u16.to_be_bytes()); // QDCOUNT
1124        buf.extend_from_slice(&0u16.to_be_bytes()); // ANCOUNT
1125        buf.extend_from_slice(&0u16.to_be_bytes()); // NSCOUNT
1126        buf.extend_from_slice(&0u16.to_be_bytes()); // ARCOUNT
1127        for label in labels {
1128            buf.push(label.len() as u8);
1129            buf.extend_from_slice(label.as_bytes());
1130        }
1131        buf.push(0); // root label
1132        buf.extend_from_slice(&qtype.to_be_bytes());
1133        buf.extend_from_slice(&qclass.to_be_bytes());
1134        buf
1135    }
1136
1137    /// Parse a response header: returns `(id, rcode, ancount)`.
1138    fn parse_header(resp: &[u8]) -> (u16, u8, u16) {
1139        let id = u16::from_be_bytes([resp[0], resp[1]]);
1140        let flags = u16::from_be_bytes([resp[2], resp[3]]);
1141        let ancount = u16::from_be_bytes([resp[6], resp[7]]);
1142        (id, (flags & 0x000F) as u8, ancount)
1143    }
1144
1145    #[test]
1146    fn a_query_for_known_peer_answers_v4() {
1147        let view = view_with_peer();
1148        let buf = build_query(0x1234, &["host", "user", "ts", "net"], 1, 1);
1149
1150        let resp = answer(&view, &buf).expect("answers");
1151        let (id, rcode, ancount) = parse_header(&resp);
1152        assert_eq!(id, 0x1234);
1153        assert_eq!(rcode, 0, "NoError");
1154        assert_eq!(ancount, 1);
1155
1156        // The trailing RDATA of the single A record is the peer's tailnet v4 octets.
1157        let tail = &resp[resp.len() - 4..];
1158        assert_eq!(tail, &[100, 64, 0, 1]);
1159    }
1160
1161    #[test]
1162    fn aaaa_query_for_known_peer_is_nodata_when_ipv6_off() {
1163        // Gate OFF (default): an AAAA query for a known overlay peer must return NoError with an
1164        // empty answer (NODATA) — NOT the overlay v6 address, which the IPv4-only client can't
1165        // route. This is the anti-fingerprint / no-dead-connections posture.
1166        let view = view_with_peer();
1167        assert!(!view.enable_ipv6, "default gate is off");
1168        let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1169
1170        let resp = answer(&view, &buf).expect("answers");
1171        let (_, rcode, ancount) = parse_header(&resp);
1172        assert_eq!(rcode, 0, "NoError (NODATA)");
1173        assert_eq!(ancount, 0, "empty answer: no AAAA handed out with IPv6 off");
1174    }
1175
1176    #[test]
1177    fn a_query_still_resolves_when_ipv6_off() {
1178        // Gate OFF must not touch the A (v4) path: the v4 answer is byte-for-byte unchanged.
1179        let view = view_with_peer();
1180        let buf = build_query(0x6, &["host", "user", "ts", "net"], 1, 1);
1181
1182        let resp = answer(&view, &buf).expect("answers");
1183        let (_, rcode, ancount) = parse_header(&resp);
1184        assert_eq!(rcode, 0, "NoError");
1185        assert_eq!(ancount, 1);
1186        let tail = &resp[resp.len() - 4..];
1187        assert_eq!(tail, &[100, 64, 0, 1]);
1188    }
1189
1190    #[test]
1191    fn aaaa_query_for_known_peer_answers_v6_when_ipv6_on() {
1192        // Gate ON: historical behavior — answer AAAA from the overlay v6 address.
1193        let mut view = view_with_peer();
1194        view.enable_ipv6 = true;
1195        let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1196
1197        let resp = answer(&view, &buf).expect("answers");
1198        let (_, rcode, ancount) = parse_header(&resp);
1199        assert_eq!(rcode, 0, "NoError");
1200        assert_eq!(ancount, 1);
1201
1202        let expected = "fd7a::1".parse::<std::net::Ipv6Addr>().unwrap().octets();
1203        let tail = &resp[resp.len() - 16..];
1204        assert_eq!(tail, expected);
1205    }
1206
1207    #[test]
1208    fn aaaa_for_unknown_tailnet_name_is_nxdomain_not_forwarded_with_ipv6_off() {
1209        // Anti-leak, unchanged by the gate: an AAAA for a name under the tailnet suffix that has no
1210        // overlay match still fails closed to NXDOMAIN — never forwarded to a recursive upstream,
1211        // even with resolvers configured. (Gate OFF only changes the *positive* overlay match into
1212        // NODATA; a non-match still routes through `forward_or_nxdomain`.)
1213        let mut db = PeerDb::default();
1214        db.upsert(&test_node());
1215        let view = DnsView {
1216            cfg: DnsConfig {
1217                magic_dns: true,
1218                search_domains: vec!["user.ts.net".to_string()],
1219                fallback_resolvers: vec![DnsResolver {
1220                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1221                    use_with_exit_node: false,
1222                }],
1223                ..Default::default()
1224            },
1225            peers: Some(Arc::new(db)),
1226            self_node: None,
1227            exit_doh: None,
1228            enable_ipv6: false,
1229            accept_dns: true,
1230        };
1231        let buf = build_query(0x5A, &["ghost", "user", "ts", "net"], 28, 1);
1232
1233        match decide(&view, &buf).expect("decides") {
1234            Decision::Reply(resp) => {
1235                let (_, rcode, _) = parse_header(&resp);
1236                assert_eq!(rcode, 3, "NxDomain: tailnet AAAA not leaked upstream");
1237            }
1238            Decision::Forward { .. } => panic!("tailnet AAAA must never be forwarded"),
1239        }
1240    }
1241
1242    #[test]
1243    fn bare_hostname_resolves() {
1244        // The name index also stores the bare hostname.
1245        let view = view_with_peer();
1246        let buf = build_query(0x7, &["host"], 1, 1);
1247
1248        let resp = answer(&view, &buf).expect("answers");
1249        let (_, rcode, ancount) = parse_header(&resp);
1250        assert_eq!(rcode, 0);
1251        assert_eq!(ancount, 1);
1252    }
1253
1254    #[test]
1255    fn unknown_name_is_nxdomain() {
1256        let view = view_with_peer();
1257        let buf = build_query(0x9, &["nope", "example", "com"], 1, 1);
1258
1259        let resp = answer(&view, &buf).expect("answers");
1260        let (_, rcode, ancount) = parse_header(&resp);
1261        assert_eq!(rcode, 3, "NxDomain");
1262        assert_eq!(ancount, 0);
1263    }
1264
1265    #[test]
1266    fn magic_dns_off_is_refused() {
1267        // Fail closed: with MagicDNS disabled, even a known name is refused.
1268        let mut view = view_with_peer();
1269        view.cfg.magic_dns = false;
1270        let buf = build_query(0xAB, &["host", "user", "ts", "net"], 1, 1);
1271
1272        let resp = answer(&view, &buf).expect("answers");
1273        let (_, rcode, ancount) = parse_header(&resp);
1274        assert_eq!(rcode, 5, "Refused");
1275        assert_eq!(ancount, 0);
1276    }
1277
1278    #[test]
1279    fn accept_dns_false_refuses_otherwise_answerable_query() {
1280        // The accept-dns gate (Go `CorpDNS`): with `accept_dns == false` the node ignores the
1281        // tailnet DNS config, so even a known peer name that would normally answer authoritatively is
1282        // REFUSED (the responder serves nothing) — mirroring Go applying an empty `dns.Config`.
1283        let mut view = view_with_peer();
1284        assert!(view.cfg.magic_dns, "MagicDNS itself is on");
1285        view.accept_dns = false;
1286        let buf = build_query(0xDD, &["host", "user", "ts", "net"], 1, 1);
1287
1288        let resp = answer(&view, &buf).expect("answers");
1289        let (_, rcode, ancount) = parse_header(&resp);
1290        assert_eq!(rcode, 5, "Refused: accept_dns off ⇒ serve nothing");
1291        assert_eq!(ancount, 0);
1292
1293        // Flip accept_dns back ON (the config was never destroyed, only gated): the same query now
1294        // answers authoritatively — proving the OFF→ON restore is automatic.
1295        view.accept_dns = true;
1296        let resp = answer(&view, &buf).expect("answers");
1297        let (_, rcode, ancount) = parse_header(&resp);
1298        assert_eq!(rcode, 0, "NoError: accept_dns on ⇒ the known peer answers");
1299        assert_eq!(ancount, 1);
1300        let tail = &resp[resp.len() - 4..];
1301        assert_eq!(tail, &[100, 64, 0, 1], "the peer's tailnet v4 is served");
1302    }
1303
1304    #[test]
1305    fn default_view_serves_nothing() {
1306        // The default (no dns_config seen) has magic_dns == false: fail closed.
1307        let view = DnsView::default();
1308        let buf = build_query(0x1, &["host", "user", "ts", "net"], 1, 1);
1309
1310        let resp = answer(&view, &buf).expect("answers");
1311        let (_, rcode, _) = parse_header(&resp);
1312        assert_eq!(rcode, 5, "Refused");
1313    }
1314
1315    #[test]
1316    fn unsupported_qtype_on_tailnet_name_is_nodata_not_refused() {
1317        // TXT (type 16) for a tailnet-authoritative name: the name exists but we hold no TXT, so —
1318        // like Go — return NODATA (empty NOERROR), NOT REFUSED (which would make a stub abandon the
1319        // resolver) and NOT NXDOMAIN (the name exists). The name is never forwarded (anti-leak).
1320        let view = view_with_peer();
1321        let buf = build_query(0x1, &["host", "user", "ts", "net"], 16, 1);
1322
1323        let resp = answer(&view, &buf).expect("answers");
1324        let (_, rcode, ancount) = parse_header(&resp);
1325        assert_eq!(rcode, 0, "NoError (NODATA), not Refused");
1326        assert_eq!(ancount, 0, "no answer records (NODATA)");
1327    }
1328
1329    #[test]
1330    fn unsupported_qtype_off_tailnet_forwards_or_nxdomains() {
1331        // A non-A/AAAA/PTR qtype for an OFF-tailnet name must be forwardable like A/AAAA — never
1332        // REFUSED. With no upstream configured in this view it fails closed to NXDOMAIN (the same
1333        // disposition an off-tailnet A query gets here), proving the qtype no longer short-circuits
1334        // to REFUSED. HTTPS/SVCB is type 65 (the browser HTTP/3 + ECH case the old REFUSED broke).
1335        let view = view_with_peer();
1336        let buf = build_query(0x1, &["example", "com"], 65, 1);
1337
1338        let resp = answer(&view, &buf).expect("answers");
1339        let (_, rcode, _) = parse_header(&resp);
1340        assert_eq!(
1341            rcode, 3,
1342            "off-tailnet, no upstream -> NXDOMAIN (forwardable, not Refused)"
1343        );
1344    }
1345
1346    #[test]
1347    fn unimplemented_qtype_on_tailnet_name_is_notimp() {
1348        // NS (2), SOA (6), HINFO (13), AXFR (252) for a tailnet-authoritative name must answer NOTIMP
1349        // (rcode 4), matching Go `resolveLocal`'s `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR,
1350        // dns.TypeHINFO: return RCodeNotImplemented`. Returning NODATA (rcode 0) here was a clean
1351        // fingerprint (a `dig SOA user.ts.net` answer differs from real tailscaled). The name is
1352        // still never forwarded (anti-leak).
1353        let view = view_with_peer();
1354        for qtype in [2u16, 6, 13, 252] {
1355            let buf = build_query(0x1, &["host", "user", "ts", "net"], qtype, 1);
1356            let resp = answer(&view, &buf).expect("answers");
1357            let (_, rcode, ancount) = parse_header(&resp);
1358            assert_eq!(rcode, 4, "qtype {qtype} on a tailnet name must be NOTIMP");
1359            assert_eq!(ancount, 0, "NOTIMP carries no answer records");
1360        }
1361    }
1362
1363    #[test]
1364    fn unimplemented_qtype_off_tailnet_still_forwards_not_notimp() {
1365        // The NOTIMP disposition is ONLY for a name we are authoritative for. An NS query for an
1366        // off-tailnet name must still forward (here: NXDOMAIN, no upstream) — NOT NOTIMP — exactly
1367        // like the off-tailnet HTTPS/SVCB case above. Guards the NOTIMP change against over-reach.
1368        let view = view_with_peer();
1369        let buf = build_query(0x1, &["example", "com"], 2, 1); // NS, off-tailnet
1370        let resp = answer(&view, &buf).expect("answers");
1371        let (_, rcode, _) = parse_header(&resp);
1372        assert_eq!(
1373            rcode, 3,
1374            "off-tailnet NS -> NXDOMAIN (forwardable), not NOTIMP"
1375        );
1376    }
1377
1378    #[test]
1379    fn malformed_query_is_dropped() {
1380        // A response (QR bit set) is not a query; we drop it (no answer).
1381        let mut buf = build_query(0x1, &["host"], 1, 1);
1382        buf[2] = 0x80; // set QR bit
1383        assert!(answer(&view_with_peer(), &buf).is_none());
1384    }
1385
1386    #[test]
1387    fn ptr_for_known_ip_answers_fqdn() {
1388        let view = view_with_peer();
1389        // Reverse name for 100.64.0.1 => 1.0.64.100.in-addr.arpa
1390        let buf = build_query(0x33, &["1", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1391
1392        let resp = answer(&view, &buf).expect("answers");
1393        let (_, rcode, ancount) = parse_header(&resp);
1394        assert_eq!(rcode, 0, "NoError");
1395        assert_eq!(ancount, 1);
1396
1397        // The PTR rdata encodes the peer's fqdn "host.user.ts.net" as length-prefixed labels.
1398        let expected = {
1399            let mut out = Vec::new();
1400            for label in ["host", "user", "ts", "net"] {
1401                out.push(label.len() as u8);
1402                out.extend_from_slice(label.as_bytes());
1403            }
1404            out.push(0);
1405            out
1406        };
1407        let tail = &resp[resp.len() - expected.len()..];
1408        assert_eq!(tail, expected.as_slice());
1409    }
1410
1411    #[test]
1412    fn ptr_for_unknown_ip_is_nxdomain() {
1413        let view = view_with_peer();
1414        // 9.9.9.9 is not a known tailnet IP.
1415        let buf = build_query(0x34, &["9", "9", "9", "9", "in-addr", "arpa"], 12, 1);
1416
1417        let resp = answer(&view, &buf).expect("answers");
1418        let (_, rcode, _) = parse_header(&resp);
1419        assert_eq!(rcode, 3, "NxDomain");
1420    }
1421
1422    #[test]
1423    fn ptr_for_unknown_tailnet_ip_is_nxdomain_not_forwarded() {
1424        // A view WITH an upstream resolver: an off-tailnet reverse query would forward, but a
1425        // reverse query for an unmatched IP in the CGNAT range (100.64.0.0/10) must fail closed to
1426        // NXDOMAIN — the probed tailnet IP must never leak upstream.
1427        let mut db = PeerDb::default();
1428        db.upsert(&test_node());
1429        let view = DnsView {
1430            cfg: DnsConfig {
1431                magic_dns: true,
1432                search_domains: vec!["user.ts.net".to_string()],
1433                fallback_resolvers: vec![DnsResolver {
1434                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1435                    use_with_exit_node: false,
1436                }],
1437                ..Default::default()
1438            },
1439            peers: Some(Arc::new(db)),
1440            self_node: None,
1441            exit_doh: None,
1442            enable_ipv6: false,
1443            accept_dns: true,
1444        };
1445
1446        // 100.64.0.9 is in CGNAT range but owned by no peer => NXDOMAIN, never a Forward.
1447        let buf = build_query(0x35, &["9", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1448        match decide(&view, &buf).expect("decides") {
1449            Decision::Reply(resp) => {
1450                let (_, rcode, _) = parse_header(&resp);
1451                assert_eq!(rcode, 3, "NxDomain");
1452            }
1453            Decision::Forward { .. } => {
1454                panic!("tailnet CGNAT PTR must never be forwarded upstream")
1455            }
1456        }
1457    }
1458
1459    /// Anti-leak regression for the exotic-qtype forward path: a NON-PTR query (TXT, type 16) for a
1460    /// tailnet CGNAT reverse name, with an upstream configured, must STILL fail closed to NXDOMAIN —
1461    /// never forward. The PTR arm guards this, but the `QType::Other` path routes through
1462    /// `forward_or_nodata`, which must re-apply the reverse-zone guard or the tailnet IP leaks.
1463    #[test]
1464    fn exotic_qtype_for_tailnet_cgnat_reverse_is_nxdomain_not_forwarded() {
1465        let mut db = PeerDb::default();
1466        db.upsert(&test_node());
1467        let view = DnsView {
1468            cfg: DnsConfig {
1469                magic_dns: true,
1470                search_domains: vec!["user.ts.net".to_string()],
1471                fallback_resolvers: vec![DnsResolver {
1472                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1473                    use_with_exit_node: false,
1474                }],
1475                ..Default::default()
1476            },
1477            peers: Some(Arc::new(db)),
1478            self_node: None,
1479            exit_doh: None,
1480            enable_ipv6: false,
1481            accept_dns: true,
1482        };
1483
1484        // TXT (16) for a CGNAT reverse name => NXDOMAIN, never a Forward (no tailnet-IP leak).
1485        let buf = build_query(0x36, &["9", "0", "64", "100", "in-addr", "arpa"], 16, 1);
1486        match decide(&view, &buf).expect("decides") {
1487            Decision::Reply(resp) => {
1488                let (_, rcode, _) = parse_header(&resp);
1489                assert_eq!(rcode, 3, "NxDomain");
1490            }
1491            Decision::Forward { .. } => {
1492                panic!("a non-PTR query for a tailnet CGNAT reverse name must never forward")
1493            }
1494        }
1495    }
1496
1497    /// Same anti-leak guard for an `ip6.arpa` reverse name under an exotic qtype: must NXDOMAIN, not
1498    /// forward (revealing a tailnet ULA was probed).
1499    #[test]
1500    fn exotic_qtype_for_ip6_arpa_is_nxdomain_not_forwarded() {
1501        let view = view_with_routes(
1502            std::collections::BTreeMap::new(),
1503            vec![udp("9.9.9.9:53")],
1504            vec![],
1505        );
1506        // An ip6.arpa reverse name with a TXT (16) qtype must fail closed.
1507        let buf = build_query(
1508            0x37,
1509            &[
1510                "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
1511                "a", "7", "d", "f", "ip6", "arpa",
1512            ],
1513            16,
1514            1,
1515        );
1516        match decide(&view, &buf).expect("decides") {
1517            Decision::Reply(resp) => {
1518                let (_, rcode, _) = parse_header(&resp);
1519                assert_eq!(rcode, 3, "NxDomain");
1520            }
1521            Decision::Forward { .. } => panic!("an ip6.arpa exotic-qtype query must never forward"),
1522        }
1523    }
1524
1525    #[test]
1526    fn is_tailnet_cgnat_classifies_range() {
1527        assert!(is_tailnet_cgnat("100.64.0.0".parse().unwrap()));
1528        assert!(is_tailnet_cgnat("100.64.0.1".parse().unwrap()));
1529        assert!(is_tailnet_cgnat("100.127.255.255".parse().unwrap()));
1530        // Outside the /10:
1531        assert!(!is_tailnet_cgnat("100.63.255.255".parse().unwrap()));
1532        assert!(!is_tailnet_cgnat("100.128.0.0".parse().unwrap()));
1533        assert!(!is_tailnet_cgnat("9.9.9.9".parse().unwrap()));
1534        // The MagicDNS resolver IP 100.100.100.100 is itself inside the /10.
1535        assert!(is_tailnet_cgnat("100.100.100.100".parse().unwrap()));
1536    }
1537
1538    #[test]
1539    fn response_matches_query_validates_id_and_qr() {
1540        // query id 0x1234, QR=0
1541        let query = build_query(0x1234, &["a", "com"], 1, 1);
1542
1543        // A well-formed response: same id, QR=1.
1544        let mut good = query.clone();
1545        good[2] |= 0x80;
1546        assert!(response_matches_query(&query, &good));
1547
1548        // Same id but QR still 0 (not a response): rejected.
1549        assert!(!response_matches_query(&query, &query));
1550
1551        // QR=1 but a different transaction id: rejected (off-path forgery).
1552        let mut wrong_id = good.clone();
1553        wrong_id[0] ^= 0xFF;
1554        assert!(!response_matches_query(&query, &wrong_id));
1555
1556        // Too-short buffers: rejected.
1557        assert!(!response_matches_query(&query, &[0u8; 2]));
1558        assert!(!response_matches_query(&[0u8; 3], &good));
1559    }
1560
1561    #[test]
1562    fn self_node_resolves_when_no_peer_match() {
1563        // With the peer db empty but a self node set, the self node answers for its own name.
1564        let view = DnsView {
1565            cfg: DnsConfig {
1566                magic_dns: true,
1567                search_domains: vec![],
1568                ..Default::default()
1569            },
1570            peers: None,
1571            self_node: Some(test_node()),
1572            exit_doh: None,
1573            enable_ipv6: false,
1574            accept_dns: true,
1575        };
1576        let buf = build_query(0x44, &["host", "user", "ts", "net"], 1, 1);
1577
1578        let resp = answer(&view, &buf).expect("answers");
1579        let (_, rcode, ancount) = parse_header(&resp);
1580        assert_eq!(rcode, 0);
1581        assert_eq!(ancount, 1);
1582        let tail = &resp[resp.len() - 4..];
1583        assert_eq!(tail, &[100, 64, 0, 1]);
1584    }
1585
1586    #[test]
1587    fn partially_qualified_name_resolves_via_search_domain() {
1588        // "host.user" is not indexed directly, but the "user.ts.net" search domain qualifies it
1589        // to "host.user.user.ts.net"... which does NOT match. The realistic case is "host" (bare,
1590        // already indexed) and "host.user.ts.net" (fqdn). Verify a name needing suffix expansion:
1591        // with search domain "ts.net" the partially-qualified "host.user" => "host.user.ts.net".
1592        let mut view = view_with_peer();
1593        view.cfg.search_domains = vec!["ts.net".to_string()];
1594        let buf = build_query(0x55, &["host", "user"], 1, 1);
1595
1596        let resp = answer(&view, &buf).expect("answers");
1597        let (_, rcode, ancount) = parse_header(&resp);
1598        assert_eq!(rcode, 0, "NoError via search-domain expansion");
1599        assert_eq!(ancount, 1);
1600        let tail = &resp[resp.len() - 4..];
1601        assert_eq!(tail, &[100, 64, 0, 1]);
1602    }
1603
1604    #[test]
1605    fn extra_record_a_answers_when_no_peer_match() {
1606        // A control-pushed static A record answers for a non-peer name, fail-closed otherwise.
1607        let mut view = view_with_peer();
1608        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1609            name: "static.user.ts.net".to_string(),
1610            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1611        }];
1612        let buf = build_query(0x77, &["static", "user", "ts", "net"], 1, 1);
1613
1614        let resp = answer(&view, &buf).expect("answers");
1615        let (_, rcode, ancount) = parse_header(&resp);
1616        assert_eq!(rcode, 0, "NoError from extra record");
1617        assert_eq!(ancount, 1);
1618        let tail = &resp[resp.len() - 4..];
1619        assert_eq!(tail, &[100, 64, 0, 9]);
1620    }
1621
1622    #[test]
1623    fn extra_record_matches_query_case_insensitively() {
1624        // The query name is canonicalized (lowercased) at decode time, so a mixed-case query
1625        // matches a lowercase extra record.
1626        let mut view = view_with_peer();
1627        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1628            name: "static.user.ts.net".to_string(),
1629            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1630        }];
1631        let buf = build_query(0x7A, &["Static", "User", "TS", "net"], 1, 1);
1632
1633        let resp = answer(&view, &buf).expect("answers");
1634        let (_, rcode, ancount) = parse_header(&resp);
1635        assert_eq!(rcode, 0, "NoError: case-insensitive match");
1636        assert_eq!(ancount, 1);
1637        let tail = &resp[resp.len() - 4..];
1638        assert_eq!(tail, &[100, 64, 0, 9]);
1639    }
1640
1641    #[test]
1642    fn extra_record_not_expanded_by_search_domain() {
1643        // Unlike peer names, an extra record is matched as an FQDN only: a bare query that would
1644        // need search-domain expansion to reach the record name must NOT resolve.
1645        let mut view = view_with_peer();
1646        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1647            name: "static.user.ts.net".to_string(),
1648            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1649        }];
1650        // "static" would only reach "static.user.ts.net" via the "user.ts.net" search domain.
1651        let buf = build_query(0x7B, &["static"], 1, 1);
1652
1653        let resp = answer(&view, &buf).expect("answers");
1654        let (_, rcode, _) = parse_header(&resp);
1655        assert_eq!(rcode, 3, "NxDomain: extra records are not search-expanded");
1656    }
1657
1658    #[test]
1659    fn extra_record_aaaa_family_is_isolated() {
1660        // An A-only extra record must NOT answer an AAAA query for the same name (NxDomain).
1661        let mut view = view_with_peer();
1662        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1663            name: "v4only.user.ts.net".to_string(),
1664            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1665        }];
1666        let buf = build_query(0x78, &["v4only", "user", "ts", "net"], 28, 1);
1667
1668        let resp = answer(&view, &buf).expect("answers");
1669        let (_, rcode, _) = parse_header(&resp);
1670        assert_eq!(rcode, 3, "NxDomain: A record does not satisfy AAAA");
1671    }
1672
1673    #[test]
1674    fn extra_record_ignored_when_magic_dns_off() {
1675        // Fail closed: extra records are never served while MagicDNS is disabled.
1676        let mut view = view_with_peer();
1677        view.cfg.magic_dns = false;
1678        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1679            name: "static.user.ts.net".to_string(),
1680            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1681        }];
1682        let buf = build_query(0x79, &["static", "user", "ts", "net"], 1, 1);
1683
1684        let resp = answer(&view, &buf).expect("answers");
1685        let (_, rcode, _) = parse_header(&resp);
1686        assert_eq!(rcode, 5, "Refused");
1687    }
1688
1689    #[test]
1690    fn non_in_class_on_tailnet_name_is_nodata_not_answered_as_in() {
1691        // A CHAOS-class (3) query for a tailnet name must NOT be answered as IN (no overlay A), and
1692        // must NOT be REFUSED (Go does no class check on the local path). It's an unsupported
1693        // authoritative class -> NODATA (empty NOERROR), and never forwarded (tailnet name).
1694        let view = view_with_peer();
1695        let buf = build_query(0x66, &["host", "user", "ts", "net"], 1, 3);
1696
1697        let resp = answer(&view, &buf).expect("answers");
1698        let (_, rcode, ancount) = parse_header(&resp);
1699        assert_eq!(
1700            rcode, 0,
1701            "NoError (NODATA), not Refused and not an IN answer"
1702        );
1703        assert_eq!(
1704            ancount, 0,
1705            "must not hand out the overlay A for a non-IN class"
1706        );
1707    }
1708
1709    #[test]
1710    fn non_in_class_off_tailnet_forwards_or_nxdomains() {
1711        // A non-IN class for an OFF-tailnet name is forwardable (Go forwards it), never REFUSED.
1712        // No upstream here -> NXDOMAIN, proving the class gate no longer short-circuits to Refused.
1713        let view = view_with_peer();
1714        let buf = build_query(0x66, &["example", "com"], 1, 3);
1715
1716        let resp = answer(&view, &buf).expect("answers");
1717        let (_, rcode, _) = parse_header(&resp);
1718        assert_eq!(
1719            rcode, 3,
1720            "off-tailnet non-IN class, no upstream -> NXDOMAIN, not Refused"
1721        );
1722    }
1723
1724    /// A view with MagicDNS on, the `user.ts.net` search domain, and the given split-DNS routes
1725    /// + global resolvers.
1726    fn view_with_routes(
1727        routes: std::collections::BTreeMap<String, Vec<DnsResolver>>,
1728        resolvers: Vec<DnsResolver>,
1729        fallback: Vec<DnsResolver>,
1730    ) -> DnsView {
1731        DnsView {
1732            cfg: DnsConfig {
1733                magic_dns: true,
1734                search_domains: vec!["user.ts.net".to_string()],
1735                routes,
1736                resolvers,
1737                fallback_resolvers: fallback,
1738                ..Default::default()
1739            },
1740            peers: None,
1741            self_node: None,
1742            exit_doh: None,
1743            enable_ipv6: false,
1744            accept_dns: true,
1745        }
1746    }
1747
1748    fn udp(addr: &str) -> DnsResolver {
1749        DnsResolver {
1750            transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
1751            use_with_exit_node: false,
1752        }
1753    }
1754
1755    #[test]
1756    fn split_dns_route_forwards_to_matching_upstream() {
1757        let mut routes = std::collections::BTreeMap::new();
1758        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1759        let view = view_with_routes(routes, vec![], vec![]);
1760        let buf = build_query(0x100, &["api", "corp", "example"], 1, 1);
1761
1762        match decide(&view, &buf).expect("decides") {
1763            Decision::Forward { upstreams, .. } => {
1764                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1765            }
1766            Decision::Reply(_) => panic!("expected forward to the split-DNS upstream"),
1767        }
1768    }
1769
1770    #[test]
1771    fn exotic_qtype_off_tailnet_forwards_to_upstream() {
1772        // The core of the fix: an HTTPS/SVCB (type 65) query for an off-tailnet name with a matching
1773        // route must FORWARD to the upstream (verbatim), exactly like an A query would — not REFUSE
1774        // and not NXDOMAIN. This is the browser HTTP/3 + ECH case the old blanket-REFUSE broke.
1775        let mut routes = std::collections::BTreeMap::new();
1776        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1777        let view = view_with_routes(routes, vec![], vec![]);
1778        let buf = build_query(0x102, &["api", "corp", "example"], 65, 1);
1779
1780        match decide(&view, &buf).expect("decides") {
1781            Decision::Forward {
1782                upstreams, query, ..
1783            } => {
1784                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1785                assert_eq!(query, buf, "the exotic-qtype query is forwarded verbatim");
1786            }
1787            Decision::Reply(_) => {
1788                panic!("an off-tailnet HTTPS-record query must forward, not reply")
1789            }
1790        }
1791    }
1792
1793    #[test]
1794    fn non_in_class_off_tailnet_forwards_to_upstream() {
1795        // A non-IN class for an off-tailnet routed name forwards too (Go does no class check on the
1796        // local path). Proves the class gate no longer short-circuits to REFUSED before routing.
1797        let mut routes = std::collections::BTreeMap::new();
1798        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1799        let view = view_with_routes(routes, vec![], vec![]);
1800        let buf = build_query(0x103, &["api", "corp", "example"], 1, 3);
1801
1802        match decide(&view, &buf).expect("decides") {
1803            Decision::Forward { upstreams, .. } => {
1804                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1805            }
1806            Decision::Reply(_) => {
1807                panic!("an off-tailnet non-IN-class query must forward, not reply")
1808            }
1809        }
1810    }
1811
1812    /// The local responder bounds concurrent in-flight forwards: `serve` acquires one
1813    /// `MAX_INFLIGHT_FORWARDS` permit per spawned forward task and drops the query fail-closed when
1814    /// the pool is exhausted (a client spraying forwardable names can't open unbounded overlay
1815    /// sockets). This pins the gating semantics `serve` relies on — drained pool refuses a new
1816    /// permit; releasing one restores capacity — and the cap constant itself. (The async `serve`
1817    /// loop has no netstack-free test seam, so the semaphore behavior is exercised directly here, the
1818    /// same `Arc<Semaphore>::try_acquire_owned` the loop uses.)
1819    #[test]
1820    fn forward_inflight_cap_fails_closed_when_saturated() {
1821        use std::sync::Arc;
1822
1823        use tokio::sync::Semaphore;
1824
1825        let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
1826
1827        // Drain every permit (one per concurrently in-flight forward).
1828        let mut held = Vec::with_capacity(MAX_INFLIGHT_FORWARDS);
1829        for _ in 0..MAX_INFLIGHT_FORWARDS {
1830            held.push(
1831                inflight
1832                    .clone()
1833                    .try_acquire_owned()
1834                    .expect("permits available below the cap"),
1835            );
1836        }
1837
1838        // At the cap, the next forward is refused — `serve` would drop the query, not spawn.
1839        assert!(
1840            inflight.clone().try_acquire_owned().is_err(),
1841            "a saturated forward pool must refuse a new permit (fail closed)"
1842        );
1843
1844        // Completing an in-flight forward releases its permit and restores capacity.
1845        drop(held.pop());
1846        assert!(
1847            inflight.clone().try_acquire_owned().is_ok(),
1848            "releasing a permit must let the next forward proceed"
1849        );
1850    }
1851
1852    /// A permit moved into a spawned forward task (the `let _permit = permit;` shape `serve` uses)
1853    /// must stay held for the *whole* task body — across the `.await` on the upstream — and release
1854    /// only when the task completes. This guards the regression the saturation test above can't see:
1855    /// "tidying" `let _permit = permit;` to `let _ = permit;` would drop the permit immediately,
1856    /// re-opening unbounded concurrency while leaving the synchronous drain/restore test green. Here a
1857    /// 1-permit pool is consumed by a task that holds it across a yield; the pool must read empty
1858    /// while the task runs and refill once it finishes.
1859    #[tokio::test]
1860    async fn forward_permit_is_held_for_the_task_lifetime_not_dropped_early() {
1861        use std::sync::Arc;
1862
1863        use tokio::sync::Semaphore;
1864
1865        let inflight = Arc::new(Semaphore::new(1));
1866        let permit = inflight
1867            .clone()
1868            .try_acquire_owned()
1869            .expect("the sole permit is available");
1870
1871        let (started_tx, started_rx) = tokio::sync::oneshot::channel();
1872        let (release_tx, release_rx) = tokio::sync::oneshot::channel();
1873        let task = tokio::spawn(async move {
1874            // Same shape as `serve`'s spawned forward: the permit is a named binding moved into the
1875            // task, so it lives until the body ends — not dropped at the `let`.
1876            let _permit = permit;
1877            started_tx.send(()).unwrap();
1878            // Stand in for the `.await` on the upstream forward.
1879            release_rx.await.unwrap();
1880        });
1881
1882        started_rx.await.unwrap();
1883        // While the task runs, the permit it moved in is still held — the pool is empty.
1884        assert!(
1885            inflight.clone().try_acquire_owned().is_err(),
1886            "a permit moved into a running task must stay held across its await"
1887        );
1888
1889        // Let the task finish; its permit drops with the body and capacity returns.
1890        release_tx.send(()).unwrap();
1891        task.await.unwrap();
1892        assert!(
1893            inflight.clone().try_acquire_owned().is_ok(),
1894            "the permit must be released once the task body completes"
1895        );
1896    }
1897
1898    #[test]
1899    fn longest_suffix_route_wins() {
1900        let mut routes = std::collections::BTreeMap::new();
1901        routes.insert("example".to_string(), vec![udp("10.0.0.1:53")]);
1902        routes.insert("corp.example".to_string(), vec![udp("10.0.0.2:53")]);
1903        let view = view_with_routes(routes, vec![], vec![]);
1904        let buf = build_query(0x101, &["api", "corp", "example"], 1, 1);
1905
1906        match decide(&view, &buf).expect("decides") {
1907            Decision::Forward { upstreams, .. } => {
1908                assert_eq!(
1909                    upstreams,
1910                    vec!["10.0.0.2:53".parse().unwrap()],
1911                    "longer suffix wins"
1912                );
1913            }
1914            Decision::Reply(_) => panic!("expected forward"),
1915        }
1916    }
1917
1918    #[test]
1919    fn negative_route_is_nxdomain_not_forwarded() {
1920        // An empty upstream list is a negative route: fail closed, never forward.
1921        let mut routes = std::collections::BTreeMap::new();
1922        routes.insert("blocked.example".to_string(), vec![]);
1923        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
1924        let buf = build_query(0x102, &["x", "blocked", "example"], 1, 1);
1925
1926        match decide(&view, &buf).expect("decides") {
1927            Decision::Reply(resp) => {
1928                let (_, rcode, _) = parse_header(&resp);
1929                assert_eq!(rcode, 3, "NxDomain: negative route is not forwarded");
1930            }
1931            Decision::Forward { .. } => panic!("negative route must not forward"),
1932        }
1933    }
1934
1935    #[test]
1936    fn unrouted_name_forwards_to_fallback_then_global() {
1937        // No route matches: fallback resolvers are preferred over global resolvers.
1938        let view = view_with_routes(
1939            std::collections::BTreeMap::new(),
1940            vec![udp("8.8.8.8:53")],
1941            vec![udp("1.1.1.1:53")],
1942        );
1943        let buf = build_query(0x103, &["example", "com"], 1, 1);
1944
1945        match decide(&view, &buf).expect("decides") {
1946            Decision::Forward { upstreams, .. } => {
1947                assert_eq!(
1948                    upstreams,
1949                    vec!["1.1.1.1:53".parse().unwrap()],
1950                    "fallback preferred"
1951                );
1952            }
1953            Decision::Reply(_) => panic!("expected forward to fallback"),
1954        }
1955    }
1956
1957    #[test]
1958    fn unrouted_name_forwards_to_global_when_no_fallback() {
1959        let view = view_with_routes(
1960            std::collections::BTreeMap::new(),
1961            vec![udp("8.8.8.8:53")],
1962            vec![],
1963        );
1964        let buf = build_query(0x104, &["example", "com"], 1, 1);
1965
1966        match decide(&view, &buf).expect("decides") {
1967            Decision::Forward { upstreams, .. } => {
1968                assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
1969            }
1970            Decision::Reply(_) => panic!("expected forward to global resolver"),
1971        }
1972    }
1973
1974    #[test]
1975    fn tailnet_name_is_never_forwarded() {
1976        // Anti-leak: a name under a tailnet search domain that has no overlay match must fail
1977        // closed to NXDOMAIN, never leak to an upstream resolver, even with resolvers configured.
1978        let view = view_with_routes(
1979            std::collections::BTreeMap::new(),
1980            vec![udp("8.8.8.8:53")],
1981            vec![udp("1.1.1.1:53")],
1982        );
1983        // "ghost.user.ts.net" is under the tailnet suffix but matches no peer.
1984        let buf = build_query(0x105, &["ghost", "user", "ts", "net"], 1, 1);
1985
1986        match decide(&view, &buf).expect("decides") {
1987            Decision::Reply(resp) => {
1988                let (_, rcode, _) = parse_header(&resp);
1989                assert_eq!(rcode, 3, "NxDomain: tailnet name not leaked upstream");
1990            }
1991            Decision::Forward { .. } => panic!("tailnet name must never be forwarded"),
1992        }
1993    }
1994
1995    #[test]
1996    fn no_resolvers_fails_closed() {
1997        // No route, no resolvers: an unknown name is NXDOMAIN, not forwarded.
1998        let view = view_with_routes(std::collections::BTreeMap::new(), vec![], vec![]);
1999        let buf = build_query(0x106, &["example", "com"], 1, 1);
2000
2001        match decide(&view, &buf).expect("decides") {
2002            Decision::Reply(resp) => {
2003                let (_, rcode, _) = parse_header(&resp);
2004                assert_eq!(rcode, 3, "NxDomain");
2005            }
2006            Decision::Forward { .. } => panic!("must not forward with no resolvers"),
2007        }
2008    }
2009
2010    #[test]
2011    fn overlay_match_wins_over_forwarding() {
2012        // A known peer name resolves authoritatively even when upstream resolvers are configured.
2013        let mut db = PeerDb::default();
2014        db.upsert(&test_node());
2015        let view = DnsView {
2016            cfg: DnsConfig {
2017                magic_dns: true,
2018                search_domains: vec!["user.ts.net".to_string()],
2019                resolvers: vec![udp("8.8.8.8:53")],
2020                ..Default::default()
2021            },
2022            peers: Some(Arc::new(db)),
2023            self_node: None,
2024            exit_doh: None,
2025            enable_ipv6: false,
2026            accept_dns: true,
2027        };
2028        let buf = build_query(0x107, &["host", "user", "ts", "net"], 1, 1);
2029
2030        match decide(&view, &buf).expect("decides") {
2031            Decision::Reply(resp) => {
2032                let (_, rcode, ancount) = parse_header(&resp);
2033                assert_eq!(rcode, 0, "authoritative answer wins");
2034                assert_eq!(ancount, 1);
2035            }
2036            Decision::Forward { .. } => panic!("overlay match must not forward"),
2037        }
2038    }
2039
2040    #[test]
2041    fn ipv6_reverse_ptr_is_nxdomain_not_forwarded() {
2042        // Anti-leak: an `ip6.arpa` reverse PTR for a tailnet ULA (fd7a:…) must fail closed to
2043        // NXDOMAIN, never be forwarded — even with an upstream resolver configured. This fork is
2044        // IPv4-only on the tailnet; forwarding would reveal that a v6 address was probed.
2045        let view = view_with_routes(
2046            std::collections::BTreeMap::new(),
2047            vec![udp("8.8.8.8:53")],
2048            vec![udp("1.1.1.1:53")],
2049        );
2050        // Reverse name for fd7a::1 (nibble-reversed) under ip6.arpa. The exact nibble labels don't
2051        // matter to the guard — any name ending in ip6.arpa must fail closed.
2052        let labels = vec![
2053            "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
2054            "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "a", "7", "d", "f", "ip6",
2055            "arpa",
2056        ];
2057        let buf = build_query(0x200, &labels, 12, 1);
2058
2059        match decide(&view, &buf).expect("decides") {
2060            Decision::Reply(resp) => {
2061                let (_, rcode, _) = parse_header(&resp);
2062                assert_eq!(
2063                    rcode, 3,
2064                    "NxDomain: ip6.arpa reverse must not leak upstream"
2065                );
2066            }
2067            Decision::Forward { .. } => panic!("ip6.arpa PTR must never be forwarded"),
2068        }
2069    }
2070
2071    #[test]
2072    fn cap_response_sets_tc_when_truncated() {
2073        // An oversize upstream answer is capped to a single datagram AND marked truncated (TC bit)
2074        // so the stub resolver retries over TCP rather than trusting a chopped message.
2075        let mut big = build_query(0x300, &["example", "com"], 1, 1);
2076        big[2] |= 0x80; // make it a response (QR=1)
2077        big.resize(MAX_UPSTREAM_RESPONSE + 500, 0xAB);
2078
2079        let out = cap_response(big);
2080        assert_eq!(out.len(), MAX_UPSTREAM_RESPONSE, "capped to one datagram");
2081        assert_ne!(out[2] & 0x02, 0, "TC bit set on truncation");
2082    }
2083
2084    #[test]
2085    fn cap_response_leaves_small_response_untouched() {
2086        // A response that fits is returned verbatim with no TC bit forced on.
2087        let mut small = build_query(0x301, &["example", "com"], 1, 1);
2088        small[2] |= 0x80;
2089        let before = small.clone();
2090
2091        let out = cap_response(small);
2092        assert_eq!(out, before, "small response unchanged");
2093        assert_eq!(out[2] & 0x02, 0, "TC bit not set when no truncation");
2094    }
2095
2096    #[test]
2097    fn response_matches_query_rejects_mismatched_question() {
2098        // id + QR match but the echoed question differs (different QNAME) => rejected. This guards
2099        // against an off-path injector that guesses the id but answers a different question.
2100        let query = build_query(0x1234, &["a", "com"], 1, 1);
2101
2102        let mut wrong_question = build_query(0x1234, &["b", "com"], 1, 1);
2103        wrong_question[2] |= 0x80; // QR=1, same id
2104        assert!(
2105            !response_matches_query(&query, &wrong_question),
2106            "different QNAME must be rejected"
2107        );
2108
2109        // A different QTYPE with the same name is also rejected.
2110        let mut wrong_qtype = build_query(0x1234, &["a", "com"], 28, 1);
2111        wrong_qtype[2] |= 0x80;
2112        assert!(
2113            !response_matches_query(&query, &wrong_qtype),
2114            "different QTYPE must be rejected"
2115        );
2116
2117        // The exact echoed question with QR=1 is accepted.
2118        let mut good = query.clone();
2119        good[2] |= 0x80;
2120        assert!(
2121            response_matches_query(&query, &good),
2122            "matching question accepted"
2123        );
2124    }
2125
2126    #[test]
2127    fn suffix_matches_handles_boundaries_and_empty() {
2128        // Exact and label-boundary matches.
2129        assert!(suffix_matches("corp", "corp"));
2130        assert!(suffix_matches("a.corp", "corp"));
2131        assert!(suffix_matches("a.b.corp", "corp"));
2132        // Not a label boundary.
2133        assert!(!suffix_matches("acorp", "corp"));
2134        // Empty suffix never matches (defense-in-depth against `ends_with("")`).
2135        assert!(!suffix_matches("anything.example", ""));
2136        assert!(!suffix_matches("", ""));
2137    }
2138
2139    #[test]
2140    fn empty_search_domain_does_not_capture_everything() {
2141        // Defense-in-depth: an empty search domain must NOT make every name look like a tailnet
2142        // name (which would fail-close legitimate recursive queries / mis-route). With an empty
2143        // suffix present alongside a real resolver, an off-tailnet name still forwards.
2144        let mut view = view_with_routes(
2145            std::collections::BTreeMap::new(),
2146            vec![udp("8.8.8.8:53")],
2147            vec![],
2148        );
2149        view.cfg.search_domains = vec![String::new()];
2150        let buf = build_query(0x400, &["example", "com"], 1, 1);
2151
2152        match decide(&view, &buf).expect("decides") {
2153            Decision::Forward { upstreams, .. } => {
2154                assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2155            }
2156            Decision::Reply(_) => {
2157                panic!("empty search domain must not treat every name as tailnet")
2158            }
2159        }
2160    }
2161
2162    #[test]
2163    fn empty_route_suffix_does_not_capture_everything() {
2164        // Defense-in-depth: an empty route suffix must not match every name (which would route all
2165        // queries to that route's upstreams). With an empty-suffix route present, an unrelated name
2166        // still falls through to the global resolver.
2167        let mut routes = std::collections::BTreeMap::new();
2168        routes.insert(String::new(), vec![udp("10.9.9.9:53")]);
2169        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2170        let buf = build_query(0x401, &["example", "com"], 1, 1);
2171
2172        match decide(&view, &buf).expect("decides") {
2173            Decision::Forward { upstreams, .. } => {
2174                assert_eq!(
2175                    upstreams,
2176                    vec!["8.8.8.8:53".parse().unwrap()],
2177                    "empty route suffix must not capture; falls through to global"
2178                );
2179            }
2180            Decision::Reply(_) => panic!("expected forward to global resolver"),
2181        }
2182    }
2183
2184    fn udp_exit(addr: &str) -> DnsResolver {
2185        DnsResolver {
2186            transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
2187            use_with_exit_node: true,
2188        }
2189    }
2190
2191    #[test]
2192    fn recursive_forward_is_flagged_route_forward_is_not() {
2193        // A recursive (global/fallback) forward sets `recursive = true` (eligible for DoH
2194        // delegation); a deliberately-configured split-DNS route sets `recursive = false`.
2195        let mut routes = std::collections::BTreeMap::new();
2196        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
2197        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2198
2199        let routed = build_query(0x500, &["api", "corp", "example"], 1, 1);
2200        match decide(&view, &routed).expect("decides") {
2201            Decision::Forward { recursive, .. } => {
2202                assert!(!recursive, "split-DNS route is not a recursive forward")
2203            }
2204            Decision::Reply(_) => panic!("expected route forward"),
2205        }
2206
2207        let global = build_query(0x501, &["example", "com"], 1, 1);
2208        match decide(&view, &global).expect("decides") {
2209            Decision::Forward { recursive, .. } => {
2210                assert!(recursive, "unrouted name is a recursive forward")
2211            }
2212            Decision::Reply(_) => panic!("expected recursive forward"),
2213        }
2214    }
2215
2216    #[test]
2217    fn recursive_plan_keeps_udp_without_exit_node() {
2218        // No active exit node: a recursive forward stays on its default UDP upstreams.
2219        let view = view_with_routes(
2220            std::collections::BTreeMap::new(),
2221            vec![udp("8.8.8.8:53")],
2222            vec![],
2223        );
2224        let default = vec!["8.8.8.8:53".parse().unwrap()];
2225        assert_eq!(
2226            recursive_plan(&view, default.clone()),
2227            RecursivePlan::Udp(default)
2228        );
2229    }
2230
2231    #[test]
2232    fn recursive_plan_delegates_to_doh_with_exit_node() {
2233        // Exit node active, no kept-local resolvers: recursive queries delegate to the exit node's
2234        // DoH endpoint so resolution egresses from the exit node, not this host.
2235        let mut view = view_with_routes(
2236            std::collections::BTreeMap::new(),
2237            vec![udp("8.8.8.8:53")],
2238            vec![],
2239        );
2240        let doh: SocketAddr = "100.64.0.5:8080".parse().unwrap();
2241        view.exit_doh = Some(doh);
2242        assert_eq!(
2243            recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2244            RecursivePlan::Doh(doh)
2245        );
2246    }
2247
2248    #[test]
2249    fn recursive_plan_keeps_use_with_exit_node_resolvers_local() {
2250        // Even with an exit node active, resolvers flagged `use_with_exit_node` stay local (Go keeps
2251        // UseWithExitNode resolvers). The plan forwards to those over UDP, never delegating to DoH.
2252        let mut view = view_with_routes(
2253            std::collections::BTreeMap::new(),
2254            vec![udp_exit("10.0.0.53:53"), udp("8.8.8.8:53")],
2255            vec![],
2256        );
2257        view.exit_doh = Some("100.64.0.5:8080".parse().unwrap());
2258        // The default upstreams the caller computed are irrelevant when kept-local resolvers exist;
2259        // the plan must use the kept-local ones.
2260        assert_eq!(
2261            recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2262            RecursivePlan::Udp(vec!["10.0.0.53:53".parse().unwrap()])
2263        );
2264    }
2265}