Skip to main content

ts_runtime/
magic_dns.rs

1//! MagicDNS responder with a split-DNS / recursive forwarder.
2//!
3//! An in-netstack DNS server bound to `100.100.100.100:53`. It is authoritative for in-tailnet
4//! peer names and control-pushed [`ExtraRecord`][ts_control::ExtraRecord]s, answering `A`/`AAAA`/
5//! `PTR` for those directly. For names it is *not* authoritative for, it brings tsnet-style
6//! split-DNS and recursive resolution:
7//!
8//! - **Split DNS** ([`DnsConfig::routes`]): the longest matching suffix route forwards the query
9//!   to one of that route's upstream resolvers. A route with an **empty** upstream list is a
10//!   negative route — names under it are `NXDOMAIN` (Go keeps them on the built-in resolver; for
11//!   us that means fail-closed unless an overlay/extra record matched first).
12//! - **Recursive** ([`DnsConfig::fallback_resolvers`] / [`DnsConfig::resolvers`]): names matching
13//!   no route are forwarded to the fallback resolvers, else the global resolvers.
14//! - **Fail closed**: if no route and no resolver is configured, an unknown name is `NXDOMAIN`.
15//!
16//! Anti-leak / IPv6-off posture: upstream forwarding binds `0.0.0.0:0` (UDP, IPv4 only) and never
17//! opens an IPv6 socket. AAAA handling is gated on [`DnsView::enable_ipv6`] (default off): with the
18//! gate OFF an AAAA query for a tailnet/overlay/self name returns NoError with an empty answer
19//! (NODATA) rather than the overlay v6 address — answering a v6 the IPv4-only client can't route
20//! would only create dead connections and a fingerprint. With the gate ON, AAAA is answered from
21//! overlay data (the v6 overlay addr), as historically. AAAA for tailnet names is never forwarded
22//! to a recursive upstream regardless of the gate.
23//!
24//! - MagicDNS disabled (`dns_config == None` or `magic_dns == false`), OR the node does not accept
25//!   the tailnet DNS config ([`DnsView::accept_dns`] is `false`, i.e. `--accept-dns` / `CorpDNS`
26//!   off) => `REFUSED` for every query (the responder serves nothing, mirroring Go applying an empty
27//!   `dns.Config` when `CorpDNS` is off).
28//! - A qtype/class we don't serve authoritatively (anything but IN-class A/AAAA/PTR — TXT, SRV, MX,
29//!   HTTPS/SVCB, a CHAOS-class query, …) => NODATA (empty NOERROR) for a tailnet-authoritative name,
30//!   forwarded verbatim to upstream for an off-tailnet name — exactly like Go's resolver, NOT
31//!   `REFUSED` (a stub reads REFUSED as "won't serve me" and abandons the resolver). Tailnet reverse
32//!   zones (CGNAT `in-addr.arpa` / any `ip6.arpa`) still fail closed to NXDOMAIN for every qtype
33//!   (never forwarded — anti-leak).
34//! - Malformed query => dropped (no response).
35
36use std::{
37    net::{IpAddr, Ipv4Addr, SocketAddr},
38    sync::Arc,
39    time::Duration,
40};
41
42use kameo::{
43    actor::ActorRef,
44    message::{Context, Message},
45};
46use netstack::{CreateSocket, netcore::Channel};
47use tokio::{
48    sync::{Semaphore, watch},
49    task::JoinSet,
50    time::timeout,
51};
52use ts_control::{DnsConfig, DnsResolver, Node};
53use ts_dns_wire::{Name, QType, RData, Rcode, decode_query, encode_response};
54
55use crate::{
56    Error,
57    env::Env,
58    peer_tracker::{PeerDb, PeerState},
59};
60
61/// How long to wait for an upstream resolver to answer a forwarded query before giving up.
62const UPSTREAM_TIMEOUT: Duration = Duration::from_secs(5);
63/// Cap on concurrent in-flight forwarded queries on the local `100.100.100.100:53` responder.
64///
65/// Each forward is spawned onto a task that holds an overlay UDP socket until the upstream answers
66/// or [`UPSTREAM_TIMEOUT`] elapses. Without a cap, a local/tailnet client spraying distinct
67/// forwardable names opens unbounded concurrent overlay sockets + tasks (a resource-exhaustion DoS
68/// on a slow/black-holed upstream, since each lingers for the full timeout). Bound it the same way
69/// the peerAPI DoH server bounds its request handlers ([`crate::peerapi`]'s `MAX_INFLIGHT`): acquire
70/// a permit before spawning and drop the query fail-closed when saturated. A dropped DNS query is a
71/// benign outcome — the stub resolver simply retries or times out — and Go's resolver likewise
72/// bounds outstanding forwards rather than spawning without limit.
73const MAX_INFLIGHT_FORWARDS: usize = 512;
74/// Cap on a forwarded upstream response we read into memory (a single UDP datagram).
75///
76/// Matches Go's forwarder read buffer (`maxResponseBytes`, ~4 KiB). The client's query is forwarded
77/// verbatim, so a client advertising a large EDNS UDP size can elicit a legitimately large
78/// (1300–4096 byte) UDP answer (big TXT sets, DNSSEC, many-record round-robins). Capping at the old
79/// 1232 truncated those and set TC, forcing a TCP retry this fork's UDP-only forwarder can't serve —
80/// so the large answer became unreachable. 4096 relays them intact.
81const MAX_UPSTREAM_RESPONSE: usize = 4096;
82
83/// The MagicDNS service IP. The netstack interface owns this address, so a `udp_bind` here
84/// receives the tailnet's DNS traffic.
85const MAGIC_DNS_IP: Ipv4Addr = Ipv4Addr::new(100, 100, 100, 100);
86/// The DNS service port.
87const MAGIC_DNS_PORT: u16 = 53;
88
89/// The latest view the answer loop resolves queries against.
90///
91/// Updated by the actor's message handlers (from control `StateUpdate` and peer `PeerState`
92/// updates) and read fresh by the answer loop for every packet.
93#[derive(Clone, Default)]
94pub(crate) struct DnsView {
95    /// The DNS configuration. `magic_dns == false` (the default) means serve nothing.
96    pub(crate) cfg: DnsConfig,
97    /// The current peer database, if we've seen a peer update.
98    pub(crate) peers: Option<Arc<PeerDb>>,
99    /// This node, if we've seen a self-node update.
100    pub(crate) self_node: Option<Node>,
101    /// The peerAPI DoH socket address of the currently-selected exit node, if one is active and can
102    /// proxy DNS ([`Node::peerapi_doh_addr`]). When set, the MagicDNS *client* serve loop delegates
103    /// recursive resolution to this address over the overlay instead of forwarding to the locally
104    /// configured upstream resolvers — so recursive DNS egresses from the exit node, not this host.
105    ///
106    /// Only consumed by the local MagicDNS responder's serve loop (the client side). The peerAPI
107    /// DoH *server* shares this same view but ignores this field: an exit-node DNS proxy resolves
108    /// recursively itself (gated by `forward_exit_egress`), it never re-delegates to its own exit
109    /// node. `None` means no active exit node / no DoH delegation — recursion stays local.
110    pub(crate) exit_doh: Option<SocketAddr>,
111    /// Whether IPv6 is enabled on the tailnet overlay (from [`Env::enable_ipv6`], default `false`).
112    ///
113    /// Governs the AAAA answer path only: with the gate OFF (default) an AAAA query for a
114    /// tailnet/overlay/self name is answered NoError-with-empty-answer (NODATA) instead of the
115    /// overlay v6 address; with it ON, AAAA is answered from overlay data as historically. Set once
116    /// from the runtime `Env` when the actor starts; never changes for the life of the runtime.
117    pub(crate) enable_ipv6: bool,
118    /// Whether the tailnet's DNS configuration is accepted (`--accept-dns` / `CorpDNS`, from
119    /// [`Env::accept_dns`]). When `false`, [`decide`] refuses every query (the responder serves
120    /// nothing), mirroring Go applying an empty `dns.Config` when `CorpDNS` is off — so a node can
121    /// join for connectivity without taking over DNS.
122    ///
123    /// Unlike [`enable_ipv6`](DnsView::enable_ipv6) (snapshotted once at actor spawn), this is
124    /// runtime-settable via `Device::set_accept_dns`, so it is re-read from the live
125    /// [`Env::accept_dns`] cell on **every** view rebuild (the `StateUpdate` and `PeerState`
126    /// handlers), not just at spawn — otherwise a runtime toggle would never reach the served view.
127    pub(crate) accept_dns: bool,
128}
129
130impl DnsView {
131    /// Find the node (peer or self) that answers to `name`, case/dot-insensitively.
132    fn node_by_name(&self, name: &str) -> Option<Node> {
133        if let Some(node) = self
134            .peers
135            .as_ref()
136            .and_then(|p| p.get(&name).map(|(_, n)| n.clone()))
137        {
138            return Some(node);
139        }
140
141        self.self_node
142            .as_ref()
143            .filter(|n| n.matches_name(name))
144            .cloned()
145    }
146
147    /// Resolve `canon` to an answer address of the requested family. A tailnet peer/self match
148    /// wins first — tried as written and then qualified by each tailnet search domain (so a
149    /// short/partially-qualified name like `host` or `host.user` still resolves to
150    /// `host.user.ts.net`). Failing that, a control-pushed [`ExtraRecord`] of the matching family
151    /// answers, matched as a fully-qualified name only (no search-domain expansion — like Go tsnet,
152    /// ExtraRecords are authoritative FQDN entries, not subject to client search-list qualification).
153    /// Still fail-closed: only ever resolves to a known tailnet peer/self or an explicitly
154    /// control-pushed static record — never anything else.
155    fn resolve_addr(&self, canon: &str, want_v4: bool) -> Option<IpAddr> {
156        let addr_of = |node: Node| -> IpAddr {
157            if want_v4 {
158                IpAddr::from(node.tailnet_address.ipv4.addr())
159            } else {
160                IpAddr::from(node.tailnet_address.ipv6.addr())
161            }
162        };
163
164        if let Some(node) = self.node_by_name(canon) {
165            return Some(addr_of(node));
166        }
167        for suffix in &self.cfg.search_domains {
168            if let Some(node) = self.node_by_name(&format!("{canon}.{suffix}")) {
169                return Some(addr_of(node));
170            }
171        }
172
173        // Control-pushed static records match the fully-qualified query name only.
174        self.cfg.extra_records.iter().find_map(|rec| {
175            let family_ok = matches!(
176                (rec.addr, want_v4),
177                (IpAddr::V4(_), true) | (IpAddr::V6(_), false)
178            );
179            (rec.name == canon && family_ok).then_some(rec.addr)
180        })
181    }
182
183    /// Find the node (peer or self) that owns the tailnet IP `ip`.
184    fn node_by_ip(&self, ip: IpAddr) -> Option<Node> {
185        if let Some(node) = self
186            .peers
187            .as_ref()
188            .and_then(|p| p.get(&ip).map(|(_, n)| n.clone()))
189        {
190            return Some(node);
191        }
192
193        self.self_node
194            .as_ref()
195            .filter(|n| {
196                IpAddr::from(n.tailnet_address.ipv4.addr()) == ip
197                    || IpAddr::from(n.tailnet_address.ipv6.addr()) == ip
198            })
199            .cloned()
200    }
201
202    /// Decide how to resolve a non-overlay `name` against the split-DNS routes and recursive
203    /// resolvers, returning the upstreams to forward to.
204    ///
205    /// Longest-suffix wins among [`DnsConfig::routes`]: a route's suffix matches `name` if `name`
206    /// equals it or ends with `.suffix`. A matched route with a non-empty upstream list forwards
207    /// there; a matched route with an **empty** list is a negative route ([`Upstreams::Block`] =>
208    /// NXDOMAIN). With no route match, [`DnsConfig::fallback_resolvers`] (preferred) or
209    /// [`DnsConfig::resolvers`] resolve recursively; if neither is configured we stay fail-closed
210    /// ([`Upstreams::None`] => NXDOMAIN).
211    fn route_for(&self, name: &str) -> Upstreams<'_> {
212        let mut best: Option<(&str, &Vec<DnsResolver>)> = None;
213        for (suffix, upstreams) in &self.cfg.routes {
214            if suffix_matches(name, suffix) && best.is_none_or(|(b, _)| suffix.len() > b.len()) {
215                best = Some((suffix.as_str(), upstreams));
216            }
217        }
218
219        if let Some((_, upstreams)) = best {
220            return if upstreams.is_empty() {
221                Upstreams::Block
222            } else {
223                // A deliberately-configured split-DNS route: not eligible for exit-node DoH
224                // delegation — these upstreams (e.g. an internal resolver reachable over a subnet
225                // route) must keep receiving the query directly.
226                Upstreams::Route(upstreams)
227            };
228        }
229
230        if !self.cfg.fallback_resolvers.is_empty() {
231            return Upstreams::Recursive(&self.cfg.fallback_resolvers);
232        }
233        if !self.cfg.resolvers.is_empty() {
234            return Upstreams::Recursive(&self.cfg.resolvers);
235        }
236        Upstreams::None
237    }
238}
239
240/// The upstreams a non-overlay query should be forwarded to (or why it should not be forwarded).
241enum Upstreams<'a> {
242    /// A split-DNS route matched: forward to these route-specific upstreams (never DoH-delegated).
243    Route(&'a [DnsResolver]),
244    /// No route matched: forward to these recursive (fallback/global) resolvers. Eligible for
245    /// exit-node DoH delegation in the client serve loop.
246    Recursive(&'a [DnsResolver]),
247    /// A negative split-DNS route matched: do not resolve (NXDOMAIN).
248    Block,
249    /// No route and no resolver configured: fail closed (NXDOMAIN).
250    None,
251}
252
253/// What the (sync) decision step concluded for a query: either a complete response to send back,
254/// or a request to forward the original query to an upstream resolver.
255pub(crate) enum Decision {
256    /// A fully-formed response is ready to send.
257    Reply(Vec<u8>),
258    /// Forward the original query datagram to one of these upstream UDP resolvers; on success
259    /// relay the upstream answer, on failure/timeout answer with the prebuilt `servfail` buffer
260    /// (an off-tailnet name we failed to forward is a soft failure, not a cacheable non-existence —
261    /// Go forwarder.go:1297-1307).
262    Forward {
263        /// UDP upstreams to try, in order.
264        upstreams: Vec<SocketAddr>,
265        /// The original query bytes to forward verbatim.
266        query: Vec<u8>,
267        /// Fallback SERVFAIL response if every upstream fails or times out.
268        servfail: Vec<u8>,
269        /// Whether this is a *recursive* (catch-all fallback/global resolver) forward, as opposed
270        /// to a deliberately-configured split-DNS route. Only recursive forwards are eligible for
271        /// exit-node DoH delegation in the client serve loop (see [`DnsView::exit_doh`]); split-DNS
272        /// routes always stay on their configured upstreams (typically subnet-reachable internal
273        /// resolvers). The peerAPI DoH *server* ignores this flag entirely.
274        recursive: bool,
275    },
276}
277
278/// Whether `name` is `suffix` or sits under it at a label boundary: `"a.corp"` matches `"corp"`,
279/// `"acorp"` does not. An **empty** suffix never matches (defense-in-depth: an empty suffix would
280/// otherwise make `ends_with("")` match every name and either over-route or treat everything as a
281/// tailnet name — both leak-prone).
282fn suffix_matches(name: &str, suffix: &str) -> bool {
283    if suffix.is_empty() {
284        return false;
285    }
286    name == suffix
287        || (name.len() > suffix.len()
288            && name.ends_with(suffix)
289            && name.as_bytes()[name.len() - suffix.len() - 1] == b'.')
290}
291
292/// Returns `true` if `name` falls under one of the tailnet search domains. Such names are
293/// authoritative MagicDNS names and are NEVER forwarded to an upstream resolver — anti-leak: a
294/// tailnet name (and the fact that it was queried) must not escape to a third-party resolver.
295fn is_tailnet_name(view: &DnsView, name: &str) -> bool {
296    view.cfg
297        .search_domains
298        .iter()
299        .any(|suffix| suffix_matches(name, suffix))
300}
301
302/// Whether `name` is an IPv6 reverse-DNS (`PTR`) name (ends in `ip6.arpa`). This fork is IPv4-only
303/// on the tailnet; an IPv6 reverse lookup must NEVER be forwarded to a third-party resolver
304/// (anti-leak: it would reveal that a tailnet v6 address — e.g. a ULA `fd7a:…` — was probed). All
305/// such queries fail closed to NXDOMAIN.
306fn is_ip6_arpa(name: &str) -> bool {
307    suffix_matches(name, "ip6.arpa")
308}
309
310/// Whether `ip` is in the Tailscale CGNAT range `100.64.0.0/10` (RFC 6598, the tailnet IPv4 space).
311/// Reverse (`PTR`) queries for these addresses are authoritative to MagicDNS: if no peer owns the
312/// IP we fail closed to NXDOMAIN rather than forwarding the probe to a third-party resolver.
313fn is_tailnet_cgnat(ip: Ipv4Addr) -> bool {
314    let o = ip.octets();
315    o[0] == 100 && (64..=127).contains(&o[1])
316}
317
318/// Decide what to do with a single DNS query against `view`: either a complete response is ready
319/// ([`Decision::Reply`]), the query should be forwarded to upstream resolvers
320/// ([`Decision::Forward`]), or the packet should be dropped without answering (`None`).
321///
322/// Pure (no I/O), factored out of the socket loop so it can be unit-tested without a netstack. It
323/// never panics and fails closed: an unknown, unroutable, or tailnet-suffix name resolves to
324/// NXDOMAIN rather than leaking to an upstream resolver.
325pub(crate) fn decide(view: &DnsView, buf: &[u8]) -> Option<Decision> {
326    // Malformed / non-query input is dropped: we never answer something we can't parse.
327    let query = decode_query(buf).ok()?;
328    let q = &query.question;
329    let id = query.id;
330    // Echo the query's RD bit (and set RA when set) on the response — Go derives the response header
331    // from the query header.
332    let rd = query.recursion_desired;
333
334    let reply =
335        |rcode, answers: &[RData]| Decision::Reply(encode_response(id, q, rd, rcode, answers));
336
337    // Fail closed: MagicDNS off, or the node doesn't accept the tailnet's DNS config
338    // (`--accept-dns` / `CorpDNS` is false) => serve nothing. The `accept_dns` gate mirrors Go
339    // applying an empty `dns.Config` when `CorpDNS` is off: the node ignores the control-pushed DNS
340    // config and refuses every query. This one read site covers the netstack responder, the peerAPI
341    // DoH server that shares the view, and (via `tun_actor::plan_intercept`) the TUN query path.
342    if !view.cfg.magic_dns || !view.accept_dns {
343        return Some(reply(Rcode::Refused, &[]));
344    }
345
346    let canon = q.name.to_canon();
347
348    // We only serve the internet (IN) class authoritatively. A non-IN class (CHAOS, HESIOD, the
349    // ANY/255 class, ...) is NOT refused outright: Go's local resolver does no class check and
350    // forwards such a query like any other name. Treat it as an unsupported authoritative type —
351    // NODATA for a tailnet name, forward for an off-tailnet name — so a `CH TXT version.bind`
352    // diagnostic or a `qclass=ANY` probe reaches upstream instead of getting REFUSED.
353    const CLASS_IN: u16 = 1;
354    if q.qclass != CLASS_IN {
355        return Some(forward_or_nodata(view, &canon, buf, id, q, rd));
356    }
357
358    Some(match &q.qtype {
359        QType::A => match view.resolve_addr(&canon, true) {
360            Some(IpAddr::V4(v4)) => reply(Rcode::NoError, &[RData::A(v4.octets())]),
361            // No overlay/extra-record answer: try split-DNS / recursive upstreams.
362            _ => forward_or_nxdomain(view, &canon, buf, id, q, rd),
363        },
364        QType::Aaaa => match view.resolve_addr(&canon, false) {
365            // A tailnet/overlay/self (or extra-record) AAAA match. Gate on IPv6: with IPv6 OFF
366            // (default) the client is IPv4-only, so answering with the overlay v6 address would
367            // only hand out an unroutable address — dead connections plus a fingerprint. Return
368            // NoError with an empty answer (NODATA) instead. With the gate ON, answer from overlay
369            // data as historically. We never forward this name to a recursive upstream either way:
370            // a positive overlay match is authoritative.
371            Some(IpAddr::V6(v6)) if view.enable_ipv6 => {
372                reply(Rcode::NoError, &[RData::Aaaa(v6.octets())])
373            }
374            Some(IpAddr::V6(_)) => reply(Rcode::NoError, &[]),
375            // No overlay/extra-record answer: split-DNS / recursive upstreams (off-tailnet names);
376            // tailnet names fail closed to NXDOMAIN inside `forward_or_nxdomain`.
377            _ => forward_or_nxdomain(view, &canon, buf, id, q, rd),
378        },
379        QType::Ptr => match q.name.ptr_to_ipv4() {
380            Some(octets) => {
381                let v4: Ipv4Addr = octets.into();
382                let ip = IpAddr::V4(v4);
383                match view.node_by_ip(ip) {
384                    Some(node) => {
385                        let fqdn = node.fqdn(false);
386                        let labels: Vec<String> = fqdn.split('.').map(str::to_owned).collect();
387                        reply(Rcode::NoError, &[RData::Ptr(Name(labels))])
388                    }
389                    // Anti-leak: a reverse query for an IP in the tailnet CGNAT range
390                    // (100.64.0.0/10) that misses the peer set is authoritative-but-unknown; fail
391                    // closed to NXDOMAIN rather than leaking the probed tailnet IP upstream. Only
392                    // genuinely off-tailnet reverse queries are forwarded.
393                    None if is_tailnet_cgnat(v4) => reply(Rcode::NxDomain, &[]),
394                    None => forward_or_nxdomain(view, &canon, buf, id, q, rd),
395                }
396            }
397            // Anti-leak / IPv4-only-tailnet: an IPv6 reverse (`ip6.arpa`) PTR must never be
398            // forwarded — relaying it would reveal that a tailnet v6 address (e.g. a ULA `fd7a:…`)
399            // was probed. Fail closed to NXDOMAIN, exactly like the IPv4 CGNAT guard above.
400            None if is_ip6_arpa(&canon) => reply(Rcode::NxDomain, &[]),
401            None => forward_or_nxdomain(view, &canon, buf, id, q, rd),
402        },
403        // Anything else (TXT, SRV, MX, HTTPS/SVCB, CNAME, ...): we hold no authoritative record of
404        // that type, so — like Go's resolver — forward it to upstream for an off-tailnet name and
405        // return NODATA (empty NOERROR) for a tailnet-authoritative name. NOT REFUSED: a stub reads
406        // REFUSED as "this server won't serve me" and abandons the resolver, which would break
407        // ordinary client lookups (notably HTTPS/SVCB type 65, issued routinely by browsers for
408        // HTTP/3 + ECH) for the same off-tailnet names whose A/AAAA already forward.
409        QType::Other(_) => forward_or_nodata(view, &canon, buf, id, q, rd),
410    })
411}
412
413/// For a name with no overlay answer, consult the split-DNS routes + recursive resolvers and
414/// either forward (to UDP upstreams), answer authoritatively absent (NXDOMAIN), or fail soft
415/// (SERVFAIL) when an off-tailnet name simply can't be forwarded.
416///
417/// Rcode parity with Go's resolver (`net/dns/resolver/tsdns.go` resolution order + `forwarder.go`):
418/// - A **tailnet-authoritative** name (search-domain suffix) or a **negative split-DNS route**
419///   (`Upstreams::Block` — a route configured with no resolvers, which Go answers authoritatively
420///   from Hosts, so an unmatched name under it is authoritatively absent) → **NXDOMAIN**.
421/// - An **off-tailnet** name we cannot forward — no route and no resolver configured
422///   (`Upstreams::None`), or a route whose resolvers are all filtered out (IPv6-only under the
423///   IPv4-only egress) → **SERVFAIL**, matching Go forwarder.go:1207 ("no upstream resolvers set,
424///   returning SERVFAIL"). A cacheable NXDOMAIN on a transient/structural inability to forward would
425///   make a downstream stub cache the *non-existence* of a real name; SERVFAIL is a soft failure the
426///   stub retries.
427///
428/// Anti-leak: a tailnet-suffix name is authoritative and is never forwarded — neither the name nor
429/// the query leaks to a third-party resolver. (The CGNAT `in-addr.arpa` / `ip6.arpa` reverse-zone
430/// NXDOMAIN guards live in the PTR arm of [`decide`] and are likewise unaffected.)
431fn forward_or_nxdomain(
432    view: &DnsView,
433    canon: &str,
434    buf: &[u8],
435    id: u16,
436    q: &ts_dns_wire::Question,
437    rd: bool,
438) -> Decision {
439    // NXDOMAIN for authoritative-absent names; SERVFAIL for an off-tailnet name we can't forward.
440    let nxdomain = encode_response(id, q, rd, Rcode::NxDomain, &[]);
441    let servfail = encode_response(id, q, rd, Rcode::ServFail, &[]);
442
443    if is_tailnet_name(view, canon) {
444        return Decision::Reply(nxdomain);
445    }
446
447    let (resolvers, recursive) = match view.route_for(canon) {
448        Upstreams::Route(resolvers) => (resolvers, false),
449        Upstreams::Recursive(resolvers) => (resolvers, true),
450        // A negative split-DNS route is authoritative-absent (Go answers it from Hosts): NXDOMAIN.
451        Upstreams::Block => return Decision::Reply(nxdomain),
452        // No route and no resolver: an off-tailnet name we have nowhere to forward — SERVFAIL, not
453        // a cacheable non-existence (Go forwarder.go:1207).
454        Upstreams::None => return Decision::Reply(servfail),
455    };
456
457    let upstreams: Vec<SocketAddr> = resolvers
458        .iter()
459        .map(DnsResolver::udp_addr)
460        // Anti-leak / IPv6-off: only forward over IPv4 upstreams; never open a v6 socket.
461        .filter(SocketAddr::is_ipv4)
462        .collect();
463    if upstreams.is_empty() {
464        // We had a route but every resolver was filtered out (IPv6-only): we cannot forward this
465        // off-tailnet name, so soft-fail rather than assert non-existence.
466        Decision::Reply(servfail)
467    } else {
468        Decision::Forward {
469            upstreams,
470            query: buf.to_vec(),
471            // All upstreams failing at runtime is also an inability to forward, not a non-existence
472            // (Go forwarder.go:1297-1307): hand the forwarder a SERVFAIL fallback, not NXDOMAIN.
473            servfail,
474            recursive,
475        }
476    }
477}
478
479/// The DNS query types Go's resolver explicitly leaves unimplemented for a tailnet-authoritative
480/// name, answering `RCodeNotImplemented` (NOTIMP) rather than NODATA (`net/dns/resolver/tsdns.go`
481/// `resolveLocal`: `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO`). The numeric type
482/// codes: NS=2, SOA=6, HINFO=13, AXFR=252.
483fn is_unimplemented_tailnet_qtype(qtype: &ts_dns_wire::QType) -> bool {
484    matches!(qtype, ts_dns_wire::QType::Other(2 | 6 | 13 | 252))
485}
486
487/// For a query whose *qtype/qclass* we don't serve authoritatively (anything other than an IN-class
488/// A/AAAA/PTR — e.g. TXT, SRV, MX, HTTPS/SVCB, or a CHAOS-class query): forward it to upstream like
489/// any other name, but for a tailnet-authoritative name return an empty NOERROR (NODATA) instead of
490/// NXDOMAIN — except the NS/SOA/HINFO/AXFR types Go answers NOTIMP for
491/// ([`is_unimplemented_tailnet_qtype`]).
492///
493/// This mirrors Go's resolver: an authoritative name with no record of the requested type returns
494/// `RCodeSuccess` with no answers ("the name exists, but no records of that type"), NOT NXDOMAIN and
495/// NOT REFUSED; a non-authoritative name is forwarded verbatim regardless of qtype. The fork
496/// previously REFUSED every non-A/AAAA/PTR qtype (and every non-IN class) for *all* names, which a
497/// stub resolver reads as "this server won't serve me" — so it would abandon the resolver, breaking
498/// ordinary client lookups (HTTPS/SVCB type 65 issued routinely by browsers for HTTP/3 + ECH, plus
499/// MX/TXT/SRV) for off-tailnet names that A/AAAA queries already forward. Refusing these was never an
500/// anti-leak measure (the same name's A/AAAA already egresses); it was just broken interop.
501///
502/// Anti-leak is preserved: a tailnet-suffix name still never leaves this node (NODATA, not forward),
503/// exactly as the A/AAAA path keeps a positive overlay match authoritative.
504fn forward_or_nodata(
505    view: &DnsView,
506    canon: &str,
507    buf: &[u8],
508    id: u16,
509    q: &ts_dns_wire::Question,
510    rd: bool,
511) -> Decision {
512    // Authoritative tailnet name. For most unsupported types we answer NODATA (empty NOERROR) — the
513    // name exists, we just hold no record of that type. But a small set of types Go's resolver
514    // *explicitly* leaves unimplemented (`net/dns/resolver/tsdns.go` `resolveLocal`:
515    // `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO: return RCodeNotImplemented`) must
516    // answer NOTIMP, not NODATA — a `dig NS`/`SOA`/`HINFO` against the tailnet zone is otherwise a
517    // clean fingerprint distinguishing this fork from real tailscaled. Off-tailnet names are
518    // unaffected (they forward below regardless of type); this NOTIMP applies only to a name we are
519    // authoritative for.
520    if is_tailnet_name(view, canon) {
521        let rcode = if is_unimplemented_tailnet_qtype(&q.qtype) {
522            Rcode::NotImpl
523        } else {
524            Rcode::NoError
525        };
526        return Decision::Reply(encode_response(id, q, rd, rcode, &[]));
527    }
528    // Anti-leak parity with the `QType::Ptr` arm: a reverse query for a tailnet CGNAT IPv4
529    // (100.64.0.0/10) or ANY `ip6.arpa` name must NEVER egress to an upstream resolver, regardless
530    // of qtype/class — forwarding it would reveal that a specific tailnet IP was probed. The PTR arm
531    // enforces this (NXDOMAIN) but its guards live only inside that arm; without re-checking here, an
532    // exotic-qtype (TXT/ANY/…) or non-IN-class query for a tailnet reverse name would slip through to
533    // the forward path below. Fail closed to NXDOMAIN, matching the PTR arm's disposition.
534    if is_ip6_arpa(canon) {
535        return Decision::Reply(encode_response(id, q, rd, Rcode::NxDomain, &[]));
536    }
537    if let Some(octets) = q.name.ptr_to_ipv4()
538        && is_tailnet_cgnat(octets.into())
539    {
540        return Decision::Reply(encode_response(id, q, rd, Rcode::NxDomain, &[]));
541    }
542    // Off-tailnet, non-reverse-zone: forward verbatim. `forward_or_nxdomain` already forwards
543    // non-tailnet names and soft-fails (SERVFAIL) when no upstream is configured/routable; reuse it
544    // (the tailnet branch above is already handled, so its tailnet→NXDOMAIN and negative-route paths
545    // are unreachable here — this only exercises its off-tailnet forward / SERVFAIL dispositions).
546    forward_or_nxdomain(view, canon, buf, id, q, rd)
547}
548
549/// Client-side plan for a *recursive* forward: keep resolving over local UDP upstreams, or delegate
550/// the query to the active exit node's peerAPI DoH endpoint over the overlay.
551#[derive(Debug, PartialEq, Eq)]
552pub(crate) enum RecursivePlan {
553    /// Forward over UDP to these upstreams. Used when no exit node is active, or when the config
554    /// has `use_with_exit_node` resolvers (kept local even with an exit node selected).
555    Udp(Vec<SocketAddr>),
556    /// Delegate the query to the exit node's peerAPI DoH server at this overlay address.
557    Doh(SocketAddr),
558}
559
560/// Decide whether a recursive forward should stay on local UDP upstreams or be delegated to the
561/// active exit node's DoH endpoint. Pure (no I/O) so the delegation rule is unit-testable.
562///
563/// - No active exit node ([`DnsView::exit_doh`] is `None`) => keep `default_upstreams` (UDP).
564/// - Exit node active, but the config has [`use_with_exit_node`][ts_control::DnsResolver::use_with_exit_node]
565///   resolvers => those resolvers stay local (Go keeps `UseWithExitNode` resolvers when an exit node
566///   is selected); forward to them over UDP, do NOT delegate.
567/// - Exit node active, no kept-local resolvers => delegate to the exit node's DoH. Recursive DNS
568///   then egresses from the exit node, not this host (the whole point of routing through an exit
569///   node: this node's real IP is never used to resolve the peer's public names).
570pub(crate) fn recursive_plan(view: &DnsView, default_upstreams: Vec<SocketAddr>) -> RecursivePlan {
571    let Some(doh) = view.exit_doh else {
572        return RecursivePlan::Udp(default_upstreams);
573    };
574    let kept: Vec<SocketAddr> = view
575        .cfg
576        .resolvers_with_exit_node()
577        .map(DnsResolver::udp_addr)
578        // Anti-leak / IPv6-off: only ever resolve over IPv4 upstreams; never open a v6 socket.
579        .filter(SocketAddr::is_ipv4)
580        .collect();
581    if kept.is_empty() {
582        RecursivePlan::Doh(doh)
583    } else {
584        RecursivePlan::Udp(kept)
585    }
586}
587
588/// Cap a forwarded upstream response to a single UDP datagram ([`MAX_UPSTREAM_RESPONSE`]). When the
589/// response is too large it is truncated mid-message, so we set the `TC` (truncation) flag in the
590/// DNS header (byte 2, bit `0x02`) telling the stub resolver to retry over TCP — relaying a chopped
591/// answer without `TC` would surface a malformed-but-"complete" message. The flag is only set when
592/// truncation actually occurs.
593fn cap_response(mut resp: Vec<u8>) -> Vec<u8> {
594    if resp.len() > MAX_UPSTREAM_RESPONSE {
595        resp.truncate(MAX_UPSTREAM_RESPONSE);
596        // The header is 12 bytes; the TC bit lives in the second flags byte (header byte 2). A
597        // capped datagram is always >= the header length, but guard anyway to never panic.
598        if let Some(flags_hi) = resp.get_mut(2) {
599            *flags_hi |= 0x02;
600        }
601    }
602    resp
603}
604
605/// The byte length of a fixed DNS header.
606const DNS_HEADER_LEN: usize = 12;
607
608/// Return the byte range of the first question section (QNAME + QTYPE + QCLASS) within `msg`,
609/// starting just after the 12-byte header. Returns [`None`] if the name is malformed, uses a
610/// compression pointer (illegal in a question), or runs past the buffer. Used to byte-compare a
611/// forwarded query's question against the upstream response's question.
612fn question_range(msg: &[u8]) -> Option<std::ops::Range<usize>> {
613    let mut off = DNS_HEADER_LEN;
614    // Walk the QNAME label sequence to the terminating root label (0x00).
615    loop {
616        let len = *msg.get(off)? as usize;
617        // A compression pointer (top two bits set) is not valid in a question section.
618        if len & 0xC0 != 0 {
619            return None;
620        }
621        off += 1;
622        if len == 0 {
623            break; // root label: QNAME complete.
624        }
625        off = off.checked_add(len)?;
626        if off > msg.len() {
627            return None;
628        }
629    }
630    // QTYPE (2) + QCLASS (2) follow the name.
631    let end = off.checked_add(4)?;
632    if end > msg.len() {
633        return None;
634    }
635    Some(DNS_HEADER_LEN..end)
636}
637
638/// Whether `resp` is a plausible DNS response to `query`: same 16-bit transaction id, the QR
639/// (response) bit set, and a byte-identical question section (QNAME + QTYPE + QCLASS). Both buffers
640/// carry the DNS header in the first 12 bytes (id at [0..2], flags at [2..4], QR is the high bit of
641/// byte 2). Used to reject off-path/forged datagrams before relaying them back to the stub resolver
642/// as authoritative: matching only the id + QR lets an injector that guesses the id swap in an
643/// answer for a different question, so we also require the echoed question to match.
644fn response_matches_query(query: &[u8], resp: &[u8]) -> bool {
645    if query.len() < DNS_HEADER_LEN || resp.len() < DNS_HEADER_LEN {
646        return false;
647    }
648    let id_matches = query[0..2] == resp[0..2];
649    let is_response = resp[2] & 0x80 != 0;
650    if !id_matches || !is_response {
651        return false;
652    }
653    // The response must echo the exact question we asked. Parse both question sections and compare
654    // their bytes; a parse failure on either side is treated as a non-match (fail closed).
655    match (question_range(query), question_range(resp)) {
656        (Some(q), Some(r)) => query[q] == resp[r],
657        _ => false,
658    }
659}
660
661/// Forward `query` to each upstream in order over the **overlay** netstack, returning the first
662/// well-formed response, or the prebuilt `fallback` buffer if every upstream times out or errors.
663///
664/// The caller supplies `fallback` (a SERVFAIL response for a forwarded off-tailnet name — an
665/// all-upstream failure is a soft "couldn't resolve", not a cacheable non-existence, matching Go
666/// forwarder.go:1297-1307). Keeping it caller-supplied means this fn is rcode-agnostic.
667///
668/// Anti-leak: forwarding goes through the overlay netstack `channel` (a fresh `0.0.0.0:0` overlay
669/// UDP socket per query), NEVER a host socket — so the real origin IP can't leak to the resolver,
670/// and split-DNS upstreams reachable only over the tailnet/subnet-router work. Each upstream is
671/// bounded by [`UPSTREAM_TIMEOUT`]; responses are capped at [`MAX_UPSTREAM_RESPONSE`].
672pub(crate) async fn forward_query(
673    channel: &Channel,
674    upstreams: &[SocketAddr],
675    query: &[u8],
676    fallback: Vec<u8>,
677) -> Vec<u8> {
678    for upstream in upstreams {
679        let socket = match channel
680            .udp_bind(SocketAddr::from((Ipv4Addr::UNSPECIFIED, 0)))
681            .await
682        {
683            Ok(s) => s,
684            Err(e) => {
685                tracing::warn!(error = %e, %upstream, "magic dns upstream bind failed");
686                continue;
687            }
688        };
689
690        if let Err(e) = socket.send_to(*upstream, query).await {
691            tracing::warn!(error = %e, %upstream, "magic dns upstream send failed");
692            continue;
693        }
694
695        match timeout(UPSTREAM_TIMEOUT, socket.recv_from_bytes()).await {
696            Ok(Ok((from, resp))) if !resp.is_empty() => {
697                // Anti-poisoning: only accept a datagram that came from the upstream we queried
698                // and whose DNS header matches this query (same transaction id, QR=response bit
699                // set). An off-path injector racing the real answer is otherwise relayed straight
700                // back to the stub resolver as authoritative.
701                if from.ip() != upstream.ip() || !response_matches_query(query, &resp) {
702                    tracing::debug!(%upstream, %from, "magic dns dropping unsolicited/mismatched response");
703                    continue;
704                }
705                return cap_response(resp.to_vec());
706            }
707            Ok(Ok(_)) => continue,
708            Ok(Err(e)) => {
709                tracing::warn!(error = %e, %upstream, "magic dns upstream recv failed");
710                continue;
711            }
712            Err(_) => {
713                tracing::debug!(%upstream, "magic dns upstream timed out");
714                continue;
715            }
716        }
717    }
718    fallback
719}
720
721/// Run the receive/answer loop for the bound socket until it (or the netstack) goes away.
722///
723/// Authoritative answers are sent inline. Forwarded queries are handled on spawned tasks (each
724/// cloning the overlay `channel`) so a slow upstream never blocks other queries.
725async fn serve(
726    socket: netstack::netsock::UdpSocket,
727    rx: watch::Receiver<Arc<DnsView>>,
728    channel: Channel,
729) {
730    let socket = Arc::new(socket);
731    let mut forwards = JoinSet::new();
732    // Bounds concurrent in-flight forwards (see `MAX_INFLIGHT_FORWARDS`); a permit is held for the
733    // lifetime of each spawned forward task and released on completion.
734    let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
735    loop {
736        let (src, buf) = match socket.recv_from_bytes().await {
737            Ok(pkt) => pkt,
738            Err(e) => {
739                tracing::warn!(error = %e, "magic dns socket recv failed, stopping responder");
740                return;
741            }
742        };
743
744        // Read the freshest view per packet.
745        let view = rx.borrow().clone();
746
747        match decide(&view, &buf) {
748            // Malformed query: drop silently.
749            None => continue,
750            Some(Decision::Reply(resp)) => {
751                if let Err(e) = socket.send_to(src, &resp).await {
752                    tracing::warn!(error = %e, %src, "magic dns response send failed");
753                }
754            }
755            Some(Decision::Forward {
756                upstreams,
757                query,
758                servfail,
759                recursive,
760            }) => {
761                // A recursive forward is eligible for exit-node DoH delegation; a split-DNS route
762                // always stays on its configured upstreams. Decide the plan against the current
763                // view so a query routed while an exit node is active egresses from that exit node.
764                let plan = if recursive {
765                    recursive_plan(&view, upstreams)
766                } else {
767                    RecursivePlan::Udp(upstreams)
768                };
769                // Fail closed at the in-flight cap: drop the query (the stub resolver retries or
770                // times out) rather than spawn an unbounded task that pins an overlay socket for up
771                // to UPSTREAM_TIMEOUT. The permit is moved into the task as a named `_permit` binding
772                // (NOT `let _ =`, which would drop it immediately) so it is released only when the
773                // task body completes.
774                let Ok(permit) = inflight.clone().try_acquire_owned() else {
775                    tracing::warn!(
776                        %src,
777                        max = MAX_INFLIGHT_FORWARDS,
778                        "magic dns drop: at max in-flight forwarded queries"
779                    );
780                    continue;
781                };
782                let socket = socket.clone();
783                let channel = channel.clone();
784                forwards.spawn(async move {
785                    let _permit = permit;
786                    let resp = match plan {
787                        RecursivePlan::Udp(upstreams) => {
788                            forward_query(&channel, &upstreams, &query, servfail).await
789                        }
790                        RecursivePlan::Doh(doh_addr) => {
791                            crate::peerapi_doh::forward_doh(&channel, doh_addr, &query, servfail)
792                                .await
793                        }
794                    };
795                    if let Err(e) = socket.send_to(src, &resp).await {
796                        tracing::warn!(error = %e, %src, "magic dns forwarded response send failed");
797                    }
798                });
799            }
800        }
801
802        // Reap finished forward tasks without blocking. The unreaped completed-handle backlog is
803        // bounded by MAX_INFLIGHT_FORWARDS (a task spawns only after acquiring a permit, and there
804        // are at most that many), so this bounds JoinSet memory too — not just the reap cadence.
805        while forwards.try_join_next().is_some() {}
806    }
807}
808
809/// The MagicDNS responder actor.
810///
811/// Subscribes to control state (for the DNS config + self node) and peer state (for the peer
812/// database), keeping a [`DnsView`] that the spawned answer loop reads for every query.
813pub struct MagicDnsActor {
814    /// Keeps the socket-serving task alive for the lifetime of the actor.
815    _joinset: JoinSet<()>,
816    /// The latest view, shared with the answer loop.
817    view_tx: watch::Sender<Arc<DnsView>>,
818    /// The runtime [`Env`], retained so each view rebuild (the `StateUpdate` / `PeerState` handlers)
819    /// can re-read the live [`Env::accept_dns`] cell. Unlike `enable_ipv6` (snapshotted once at
820    /// spawn), `accept_dns` is runtime-settable via `Device::set_accept_dns`, so it must be read at
821    /// rebuild time — not captured once — for a toggle to reach the served view.
822    env: Env,
823    /// The overlay channel, retained so the [`Query`] handler can run a query through the same
824    /// forward path the serve loop uses ([`forward_query`] / [`forward_doh`], both binding
825    /// `0.0.0.0:0` on this channel — never a host socket).
826    channel: Channel,
827}
828
829/// A programmatic DNS query routed through the live MagicDNS responder (the `100.100.100.100` path),
830/// for [`Device::query_dns`](crate::Device::query_dns). The handler synthesizes a query packet and
831/// drives it through the exact same [`decide`]/forward logic as an on-the-wire query, so the result
832/// (and its anti-leak posture) matches what a tailnet client would observe.
833pub struct Query {
834    /// The canonical name to resolve (e.g. `example.com`, no trailing dot).
835    pub name: String,
836    /// The DNS query type (`1`=A, `28`=AAAA, `12`=PTR, or any other RFC 1035 TYPE).
837    pub qtype: u16,
838}
839
840/// The outcome of a `Query`: the raw DNS response bytes, the RCODE, and which upstream resolvers
841/// (if any) were consulted. The response is returned as raw bytes (matching Go `LocalClient.QueryDNS`)
842/// rather than parsed records — this fork's wire codec has no answer-record decoder.
843///
844/// (`Query` is the crate-internal actor message; not linked here as it is a private item — a
845/// `pub` doc cannot intra-doc-link to it without erroring under the doc-lint gate.)
846#[derive(Debug, Clone, kameo::Reply)]
847pub struct DnsQueryResult {
848    /// The raw DNS response datagram (header + question + any answer records).
849    pub response: Vec<u8>,
850    /// The RCODE from the response header's low 4 bits (`0`=NoError, `2`=SERVFAIL, `3`=NXDOMAIN,
851    /// `5`=Refused, …).
852    pub rcode: u8,
853    /// The upstream resolver(s) the query was forwarded to. For a UDP forward this is the candidate
854    /// list tried in order (the forwarder returns on the first that answers); for an exit-node DoH
855    /// forward it is the single DoH endpoint. Empty for a locally-answered query (an authoritative
856    /// tailnet name, a NODATA, or a fail-closed NXDOMAIN — nothing egressed).
857    pub resolvers_consulted: Vec<SocketAddr>,
858}
859
860impl kameo::Actor for MagicDnsActor {
861    type Args = (Env, Channel);
862    type Error = Error;
863
864    async fn on_start(
865        (env, channel): Self::Args,
866        slf: ActorRef<Self>,
867    ) -> Result<Self, Self::Error> {
868        env.subscribe::<Arc<ts_control::StateUpdate>>(&slf).await?;
869        env.subscribe::<Arc<PeerState>>(&slf).await?;
870        env.subscribe::<crate::route_updater::ActiveExitNode>(&slf)
871            .await?;
872
873        // Seed the view with the runtime's IPv6 gate (default off) and the current accept-dns value.
874        // Subsequent control/peer updates clone-and-modify this view: `enable_ipv6` (set once here)
875        // is preserved, while `accept_dns` is re-read live from `Env` on every rebuild (it is
876        // runtime-settable). The seed value is moot — no query is served before the first
877        // StateUpdate — but seeding it keeps the pre-update view internally consistent.
878        let (view_tx, view_rx) = watch::channel(Arc::new(DnsView {
879            enable_ipv6: env.enable_ipv6,
880            accept_dns: env.accept_dns(),
881            ..DnsView::default()
882        }));
883
884        let mut joinset = JoinSet::new();
885
886        // Bind the MagicDNS socket. If the bind fails we still start (fail closed: the actor just
887        // never answers anything) so a transient bind error doesn't take down the runtime.
888        let addr = SocketAddr::from((MAGIC_DNS_IP, MAGIC_DNS_PORT));
889        match channel.udp_bind(addr).await {
890            Ok(socket) => {
891                tracing::debug!(%addr, "magic dns responder bound");
892                joinset.spawn(serve(socket, view_rx.clone(), channel.clone()));
893            }
894            Err(e) => {
895                tracing::error!(error = %e, %addr, "magic dns udp bind failed; responder inert");
896            }
897        }
898
899        // When this node advertises a peerAPI port, run the single peerAPI server on the same shared
900        // view. It routes `/dns-query` to the exit-node DoH handler (recursive resolution gated by
901        // `forward_exit_egress`, see `peerapi_doh`) and `/v0/put/<name>` to the Taildrop receive
902        // handler when a store is configured (access-gated, fail-closed, see `peerapi`).
903        if let Some(port) = env.peerapi_port {
904            let channel = channel.clone();
905            let view_rx = view_rx.clone();
906            let forward_exit_egress = env.forward_exit_egress;
907            let taildrop = env.taildrop_store.clone();
908            let funnel_ingress = env.funnel_ingress.clone();
909            joinset.spawn(crate::peerapi::serve(
910                channel,
911                port,
912                view_rx,
913                forward_exit_egress,
914                taildrop,
915                funnel_ingress,
916            ));
917        }
918
919        Ok(Self {
920            _joinset: joinset,
921            view_tx,
922            env,
923            channel,
924        })
925    }
926}
927
928/// A bare SERVFAIL response header for a [`Query`] whose name could not be encoded into a
929/// well-formed query (a non-ASCII label or an over-255-byte name). A 12-byte header with QR=1 (this
930/// is a response) and RCODE=2 (server failure); no question or answer section (we never produced a
931/// parseable question). Lets `query_dns` return a definite, honest RCODE instead of an empty buffer
932/// that would read back as a fabricated NoError.
933fn servfail_response() -> Vec<u8> {
934    let mut resp = vec![0u8; 12];
935    // Flags: QR=1 (byte 2, 0x80) + RCODE=2 (low nibble of byte 3). All other bits clear.
936    resp[2] = 0x80;
937    resp[3] = 0x02;
938    resp
939}
940
941impl Message<Query> for MagicDnsActor {
942    type Reply = DnsQueryResult;
943
944    async fn handle(&mut self, query: Query, _ctx: &mut Context<Self, Self::Reply>) -> Self::Reply {
945        // Synthesize a query packet and drive it through the SAME decide/forward path the serve loop
946        // uses, against the freshest view — so the result and its anti-leak posture exactly match an
947        // on-the-wire query. The id is fixed (0): a programmatic query has no concurrent-demux need,
948        // and `response_matches_query` validates the echoed id against this same buffer.
949        //
950        // Normalize the name into labels: strip a single trailing dot (an FQDN's root marker — Go's
951        // `dnsname.ToFQDN` does the same) and drop empty labels. An empty label would otherwise encode
952        // as a lone `0x00`, identical to the QNAME root terminator, truncating the wire query and
953        // corrupting the QTYPE/QCLASS that follow.
954        let trimmed = query.name.strip_suffix('.').unwrap_or(&query.name);
955        let labels: Vec<String> = trimmed
956            .split('.')
957            .filter(|label| !label.is_empty())
958            .map(str::to_owned)
959            .collect();
960        let qtype = match query.qtype {
961            1 => ts_dns_wire::QType::A,
962            28 => ts_dns_wire::QType::Aaaa,
963            12 => ts_dns_wire::QType::Ptr,
964            other => ts_dns_wire::QType::Other(other),
965        };
966        // Class IN (1) — the only class the responder serves authoritatively (a non-IN class still
967        // forwards via `forward_or_nodata`, matching the on-the-wire path).
968        let buf = ts_dns_wire::encode_query(0, &ts_dns_wire::Name(labels), &qtype, 1);
969
970        let view = self.view_tx.borrow().clone();
971
972        let (response, resolvers_consulted) = match decide(&view, &buf) {
973            // `decide` returns `None` only when `decode_query` rejects the buffer we just built. With
974            // the name normalized above that can still happen for a name `encode_query` accepts but
975            // `decode_query` rejects — a non-ASCII/IDN label (the caller must pass punycode) or a name
976            // whose wire form exceeds 255 bytes. Surface a SERVFAIL (RCODE 2: "could not process")
977            // rather than an empty buffer that would read back as a fabricated NoError. The serve loop
978            // silently drops here (the on-wire client times out); a programmatic caller gets a
979            // definite, honest error instead.
980            None => (servfail_response(), Vec::new()),
981            Some(Decision::Reply(resp)) => (resp, Vec::new()),
982            Some(Decision::Forward {
983                upstreams,
984                query,
985                servfail,
986                recursive,
987            }) => {
988                let plan = if recursive {
989                    recursive_plan(&view, upstreams)
990                } else {
991                    RecursivePlan::Udp(upstreams)
992                };
993                match plan {
994                    RecursivePlan::Udp(upstreams) => {
995                        let resp = forward_query(&self.channel, &upstreams, &query, servfail).await;
996                        (resp, upstreams)
997                    }
998                    RecursivePlan::Doh(doh_addr) => {
999                        let resp = crate::peerapi_doh::forward_doh(
1000                            &self.channel,
1001                            doh_addr,
1002                            &query,
1003                            servfail,
1004                        )
1005                        .await;
1006                        // The query egressed via the exit node's DoH endpoint, not a local UDP
1007                        // upstream — report the DoH address as the resolver consulted.
1008                        (resp, vec![doh_addr])
1009                    }
1010                }
1011            }
1012        };
1013
1014        // RCODE is the low 4 bits of the second flags byte (header byte 3).
1015        let rcode = response.get(3).map(|b| b & 0x0F).unwrap_or(0);
1016
1017        DnsQueryResult {
1018            response,
1019            rcode,
1020            resolvers_consulted,
1021        }
1022    }
1023}
1024
1025impl Message<Arc<ts_control::StateUpdate>> for MagicDnsActor {
1026    type Reply = ();
1027
1028    async fn handle(
1029        &mut self,
1030        update: Arc<ts_control::StateUpdate>,
1031        _ctx: &mut Context<Self, Self::Reply>,
1032    ) {
1033        // Re-read the live accept-dns cell on every rebuild (it is runtime-settable via
1034        // `Device::set_accept_dns`); `enable_ipv6` is preserved from the seed (set once at spawn).
1035        let accept_dns = self.env.accept_dns();
1036        self.view_tx.send_modify(|view| {
1037            let mut next = (**view).clone();
1038            next.cfg = update.dns_config.clone().unwrap_or_default();
1039            next.self_node = update.node.clone();
1040            next.accept_dns = accept_dns;
1041            *view = Arc::new(next);
1042        });
1043    }
1044}
1045
1046impl Message<Arc<PeerState>> for MagicDnsActor {
1047    type Reply = ();
1048
1049    async fn handle(&mut self, state: Arc<PeerState>, _ctx: &mut Context<Self, Self::Reply>) {
1050        // Re-read the live accept-dns cell on every rebuild: `Device::set_accept_dns` triggers a
1051        // `RepublishState` that lands here, so this is the path that re-applies the gate after a
1052        // runtime toggle (covers the netstack responder AND the peerAPI DoH server sharing the view).
1053        let accept_dns = self.env.accept_dns();
1054        self.view_tx.send_modify(|view| {
1055            let mut next = (**view).clone();
1056            next.peers = Some(state.peers.clone());
1057            next.accept_dns = accept_dns;
1058            *view = Arc::new(next);
1059        });
1060    }
1061}
1062
1063impl Message<crate::route_updater::ActiveExitNode> for MagicDnsActor {
1064    type Reply = ();
1065
1066    async fn handle(
1067        &mut self,
1068        active: crate::route_updater::ActiveExitNode,
1069        _ctx: &mut Context<Self, Self::Reply>,
1070    ) {
1071        // Cache the active exit node's DoH endpoint so the serve loop delegates recursive queries
1072        // to it. `None` (no exit node, or one that can't proxy DNS) keeps recursion local. Resolving
1073        // the address here — once, from the route updater's authoritative selection — means the
1074        // serve loop never re-resolves the selector.
1075        let exit_doh = active.node.as_ref().and_then(|n| n.peerapi_doh_addr());
1076        self.view_tx.send_modify(|view| {
1077            let mut next = (**view).clone();
1078            next.exit_doh = exit_doh;
1079            *view = Arc::new(next);
1080        });
1081    }
1082}
1083
1084#[cfg(test)]
1085mod tests {
1086    use ts_control::{StableNodeId, TailnetAddress};
1087
1088    use super::*;
1089
1090    /// Test wrapper: run [`decide`] and extract the reply bytes. These tests configure no
1091    /// upstream resolvers, so an unresolved name fails closed to a `Reply` (NXDOMAIN), never a
1092    /// `Forward`; a `Forward` here is a bug and panics.
1093    fn answer(view: &DnsView, buf: &[u8]) -> Option<Vec<u8>> {
1094        match decide(view, buf)? {
1095            Decision::Reply(resp) => Some(resp),
1096            Decision::Forward { .. } => panic!("unexpected forward in authoritative-only test"),
1097        }
1098    }
1099
1100    /// Build a `Node` named `host.user.ts.net` with a known v4/v6 tailnet address.
1101    fn test_node() -> Node {
1102        Node {
1103            id: 1,
1104            stable_id: StableNodeId("n1".to_string()),
1105            hostname: "host".to_string(),
1106            user_id: 0,
1107            tailnet: Some("user.ts.net".to_string()),
1108            tags: vec![],
1109            tailnet_address: TailnetAddress {
1110                ipv4: "100.64.0.1/32".parse().unwrap(),
1111                ipv6: "fd7a::1/128".parse().unwrap(),
1112            },
1113            node_key: [0u8; 32].into(),
1114            node_key_expiry: None,
1115            online: None,
1116            last_seen: None,
1117            key_signature: vec![],
1118            machine_key: None,
1119            disco_key: None,
1120            accepted_routes: vec![],
1121            underlay_addresses: vec![],
1122            derp_region: None,
1123            cap: Default::default(),
1124            cap_map: Default::default(),
1125            peerapi_port: None,
1126            peerapi_dns_proxy: false,
1127            is_wireguard_only: false,
1128            exit_node_dns_resolvers: vec![],
1129            peer_relay: false,
1130            ssh_host_keys: vec![],
1131            service_vips: Default::default(),
1132        }
1133    }
1134
1135    /// A view with MagicDNS on and a single peer in the db.
1136    fn view_with_peer() -> DnsView {
1137        let mut db = PeerDb::default();
1138        db.upsert(&test_node());
1139
1140        DnsView {
1141            cfg: DnsConfig {
1142                magic_dns: true,
1143                search_domains: vec!["user.ts.net".to_string()],
1144                ..Default::default()
1145            },
1146            peers: Some(Arc::new(db)),
1147            self_node: None,
1148            exit_doh: None,
1149            enable_ipv6: false,
1150            accept_dns: true,
1151        }
1152    }
1153
1154    /// Build a raw DNS query buffer for `labels` with the given id, qtype, qclass.
1155    fn build_query(id: u16, labels: &[&str], qtype: u16, qclass: u16) -> Vec<u8> {
1156        let mut buf: Vec<u8> = Vec::new();
1157        buf.extend_from_slice(&id.to_be_bytes());
1158        buf.extend_from_slice(&0u16.to_be_bytes()); // flags: QR=0 (query)
1159        buf.extend_from_slice(&1u16.to_be_bytes()); // QDCOUNT
1160        buf.extend_from_slice(&0u16.to_be_bytes()); // ANCOUNT
1161        buf.extend_from_slice(&0u16.to_be_bytes()); // NSCOUNT
1162        buf.extend_from_slice(&0u16.to_be_bytes()); // ARCOUNT
1163        for label in labels {
1164            buf.push(label.len() as u8);
1165            buf.extend_from_slice(label.as_bytes());
1166        }
1167        buf.push(0); // root label
1168        buf.extend_from_slice(&qtype.to_be_bytes());
1169        buf.extend_from_slice(&qclass.to_be_bytes());
1170        buf
1171    }
1172
1173    /// Parse a response header: returns `(id, rcode, ancount)`.
1174    fn parse_header(resp: &[u8]) -> (u16, u8, u16) {
1175        let id = u16::from_be_bytes([resp[0], resp[1]]);
1176        let flags = u16::from_be_bytes([resp[2], resp[3]]);
1177        let ancount = u16::from_be_bytes([resp[6], resp[7]]);
1178        (id, (flags & 0x000F) as u8, ancount)
1179    }
1180
1181    #[test]
1182    fn a_query_for_known_peer_answers_v4() {
1183        let view = view_with_peer();
1184        let buf = build_query(0x1234, &["host", "user", "ts", "net"], 1, 1);
1185
1186        let resp = answer(&view, &buf).expect("answers");
1187        let (id, rcode, ancount) = parse_header(&resp);
1188        assert_eq!(id, 0x1234);
1189        assert_eq!(rcode, 0, "NoError");
1190        assert_eq!(ancount, 1);
1191
1192        // The trailing RDATA of the single A record is the peer's tailnet v4 octets.
1193        let tail = &resp[resp.len() - 4..];
1194        assert_eq!(tail, &[100, 64, 0, 1]);
1195    }
1196
1197    #[test]
1198    fn aaaa_query_for_known_peer_is_nodata_when_ipv6_off() {
1199        // Gate OFF (default): an AAAA query for a known overlay peer must return NoError with an
1200        // empty answer (NODATA) — NOT the overlay v6 address, which the IPv4-only client can't
1201        // route. This is the anti-fingerprint / no-dead-connections posture.
1202        let view = view_with_peer();
1203        assert!(!view.enable_ipv6, "default gate is off");
1204        let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1205
1206        let resp = answer(&view, &buf).expect("answers");
1207        let (_, rcode, ancount) = parse_header(&resp);
1208        assert_eq!(rcode, 0, "NoError (NODATA)");
1209        assert_eq!(ancount, 0, "empty answer: no AAAA handed out with IPv6 off");
1210    }
1211
1212    #[test]
1213    fn a_query_still_resolves_when_ipv6_off() {
1214        // Gate OFF must not touch the A (v4) path: the v4 answer is byte-for-byte unchanged.
1215        let view = view_with_peer();
1216        let buf = build_query(0x6, &["host", "user", "ts", "net"], 1, 1);
1217
1218        let resp = answer(&view, &buf).expect("answers");
1219        let (_, rcode, ancount) = parse_header(&resp);
1220        assert_eq!(rcode, 0, "NoError");
1221        assert_eq!(ancount, 1);
1222        let tail = &resp[resp.len() - 4..];
1223        assert_eq!(tail, &[100, 64, 0, 1]);
1224    }
1225
1226    #[test]
1227    fn aaaa_query_for_known_peer_answers_v6_when_ipv6_on() {
1228        // Gate ON: historical behavior — answer AAAA from the overlay v6 address.
1229        let mut view = view_with_peer();
1230        view.enable_ipv6 = true;
1231        let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1232
1233        let resp = answer(&view, &buf).expect("answers");
1234        let (_, rcode, ancount) = parse_header(&resp);
1235        assert_eq!(rcode, 0, "NoError");
1236        assert_eq!(ancount, 1);
1237
1238        let expected = "fd7a::1".parse::<std::net::Ipv6Addr>().unwrap().octets();
1239        let tail = &resp[resp.len() - 16..];
1240        assert_eq!(tail, expected);
1241    }
1242
1243    #[test]
1244    fn aaaa_for_unknown_tailnet_name_is_nxdomain_not_forwarded_with_ipv6_off() {
1245        // Anti-leak, unchanged by the gate: an AAAA for a name under the tailnet suffix that has no
1246        // overlay match still fails closed to NXDOMAIN — never forwarded to a recursive upstream,
1247        // even with resolvers configured. (Gate OFF only changes the *positive* overlay match into
1248        // NODATA; a non-match still routes through `forward_or_nxdomain`.)
1249        let mut db = PeerDb::default();
1250        db.upsert(&test_node());
1251        let view = DnsView {
1252            cfg: DnsConfig {
1253                magic_dns: true,
1254                search_domains: vec!["user.ts.net".to_string()],
1255                fallback_resolvers: vec![DnsResolver {
1256                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1257                    use_with_exit_node: false,
1258                }],
1259                ..Default::default()
1260            },
1261            peers: Some(Arc::new(db)),
1262            self_node: None,
1263            exit_doh: None,
1264            enable_ipv6: false,
1265            accept_dns: true,
1266        };
1267        let buf = build_query(0x5A, &["ghost", "user", "ts", "net"], 28, 1);
1268
1269        match decide(&view, &buf).expect("decides") {
1270            Decision::Reply(resp) => {
1271                let (_, rcode, _) = parse_header(&resp);
1272                assert_eq!(rcode, 3, "NxDomain: tailnet AAAA not leaked upstream");
1273            }
1274            Decision::Forward { .. } => panic!("tailnet AAAA must never be forwarded"),
1275        }
1276    }
1277
1278    #[test]
1279    fn bare_hostname_resolves() {
1280        // The name index also stores the bare hostname.
1281        let view = view_with_peer();
1282        let buf = build_query(0x7, &["host"], 1, 1);
1283
1284        let resp = answer(&view, &buf).expect("answers");
1285        let (_, rcode, ancount) = parse_header(&resp);
1286        assert_eq!(rcode, 0);
1287        assert_eq!(ancount, 1);
1288    }
1289
1290    #[test]
1291    fn unknown_off_tailnet_name_with_no_upstream_is_servfail() {
1292        // An off-tailnet name with no resolver configured cannot be forwarded. Go answers SERVFAIL
1293        // (a soft "couldn't resolve"), not NXDOMAIN — asserting non-existence of a real name we
1294        // simply have no upstream for would poison a downstream stub's negative cache. (A *tailnet*
1295        // name with no overlay match stays NXDOMAIN — see `tailnet_name_is_never_forwarded` — and a
1296        // negative split-DNS route stays NXDOMAIN — see `negative_route_is_nxdomain_not_forwarded`.)
1297        let view = view_with_peer();
1298        let buf = build_query(0x9, &["nope", "example", "com"], 1, 1);
1299
1300        let resp = answer(&view, &buf).expect("answers");
1301        let (_, rcode, ancount) = parse_header(&resp);
1302        assert_eq!(
1303            rcode, 2,
1304            "ServFail: off-tailnet name, nothing to forward to"
1305        );
1306        assert_eq!(ancount, 0);
1307    }
1308
1309    #[test]
1310    fn magic_dns_off_is_refused() {
1311        // Fail closed: with MagicDNS disabled, even a known name is refused.
1312        let mut view = view_with_peer();
1313        view.cfg.magic_dns = false;
1314        let buf = build_query(0xAB, &["host", "user", "ts", "net"], 1, 1);
1315
1316        let resp = answer(&view, &buf).expect("answers");
1317        let (_, rcode, ancount) = parse_header(&resp);
1318        assert_eq!(rcode, 5, "Refused");
1319        assert_eq!(ancount, 0);
1320    }
1321
1322    #[test]
1323    fn accept_dns_false_refuses_otherwise_answerable_query() {
1324        // The accept-dns gate (Go `CorpDNS`): with `accept_dns == false` the node ignores the
1325        // tailnet DNS config, so even a known peer name that would normally answer authoritatively is
1326        // REFUSED (the responder serves nothing) — mirroring Go applying an empty `dns.Config`.
1327        let mut view = view_with_peer();
1328        assert!(view.cfg.magic_dns, "MagicDNS itself is on");
1329        view.accept_dns = false;
1330        let buf = build_query(0xDD, &["host", "user", "ts", "net"], 1, 1);
1331
1332        let resp = answer(&view, &buf).expect("answers");
1333        let (_, rcode, ancount) = parse_header(&resp);
1334        assert_eq!(rcode, 5, "Refused: accept_dns off ⇒ serve nothing");
1335        assert_eq!(ancount, 0);
1336
1337        // Flip accept_dns back ON (the config was never destroyed, only gated): the same query now
1338        // answers authoritatively — proving the OFF→ON restore is automatic.
1339        view.accept_dns = true;
1340        let resp = answer(&view, &buf).expect("answers");
1341        let (_, rcode, ancount) = parse_header(&resp);
1342        assert_eq!(rcode, 0, "NoError: accept_dns on ⇒ the known peer answers");
1343        assert_eq!(ancount, 1);
1344        let tail = &resp[resp.len() - 4..];
1345        assert_eq!(tail, &[100, 64, 0, 1], "the peer's tailnet v4 is served");
1346    }
1347
1348    #[test]
1349    fn default_view_serves_nothing() {
1350        // The default (no dns_config seen) has magic_dns == false: fail closed.
1351        let view = DnsView::default();
1352        let buf = build_query(0x1, &["host", "user", "ts", "net"], 1, 1);
1353
1354        let resp = answer(&view, &buf).expect("answers");
1355        let (_, rcode, _) = parse_header(&resp);
1356        assert_eq!(rcode, 5, "Refused");
1357    }
1358
1359    #[test]
1360    fn unsupported_qtype_on_tailnet_name_is_nodata_not_refused() {
1361        // TXT (type 16) for a tailnet-authoritative name: the name exists but we hold no TXT, so —
1362        // like Go — return NODATA (empty NOERROR), NOT REFUSED (which would make a stub abandon the
1363        // resolver) and NOT NXDOMAIN (the name exists). The name is never forwarded (anti-leak).
1364        let view = view_with_peer();
1365        let buf = build_query(0x1, &["host", "user", "ts", "net"], 16, 1);
1366
1367        let resp = answer(&view, &buf).expect("answers");
1368        let (_, rcode, ancount) = parse_header(&resp);
1369        assert_eq!(rcode, 0, "NoError (NODATA), not Refused");
1370        assert_eq!(ancount, 0, "no answer records (NODATA)");
1371    }
1372
1373    #[test]
1374    fn unsupported_qtype_off_tailnet_forwards_or_servfails() {
1375        // A non-A/AAAA/PTR qtype for an OFF-tailnet name must be forwardable like A/AAAA — never
1376        // REFUSED. With no upstream configured in this view it soft-fails to SERVFAIL (the same
1377        // disposition an off-tailnet A query gets here), proving the qtype no longer short-circuits
1378        // to REFUSED. HTTPS/SVCB is type 65 (the browser HTTP/3 + ECH case the old REFUSED broke).
1379        let view = view_with_peer();
1380        let buf = build_query(0x1, &["example", "com"], 65, 1);
1381
1382        let resp = answer(&view, &buf).expect("answers");
1383        let (_, rcode, _) = parse_header(&resp);
1384        assert_eq!(
1385            rcode, 2,
1386            "off-tailnet, no upstream -> SERVFAIL (forwardable, not Refused)"
1387        );
1388    }
1389
1390    #[test]
1391    fn unimplemented_qtype_on_tailnet_name_is_notimp() {
1392        // NS (2), SOA (6), HINFO (13), AXFR (252) for a tailnet-authoritative name must answer NOTIMP
1393        // (rcode 4), matching Go `resolveLocal`'s `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR,
1394        // dns.TypeHINFO: return RCodeNotImplemented`. Returning NODATA (rcode 0) here was a clean
1395        // fingerprint (a `dig SOA user.ts.net` answer differs from real tailscaled). The name is
1396        // still never forwarded (anti-leak).
1397        let view = view_with_peer();
1398        for qtype in [2u16, 6, 13, 252] {
1399            let buf = build_query(0x1, &["host", "user", "ts", "net"], qtype, 1);
1400            let resp = answer(&view, &buf).expect("answers");
1401            let (_, rcode, ancount) = parse_header(&resp);
1402            assert_eq!(rcode, 4, "qtype {qtype} on a tailnet name must be NOTIMP");
1403            assert_eq!(ancount, 0, "NOTIMP carries no answer records");
1404        }
1405    }
1406
1407    #[test]
1408    fn unimplemented_qtype_off_tailnet_still_forwards_not_notimp() {
1409        // The NOTIMP disposition is ONLY for a name we are authoritative for. An NS query for an
1410        // off-tailnet name must still forward (here: SERVFAIL, no upstream) — NOT NOTIMP — exactly
1411        // like the off-tailnet HTTPS/SVCB case above. Guards the NOTIMP change against over-reach.
1412        let view = view_with_peer();
1413        let buf = build_query(0x1, &["example", "com"], 2, 1); // NS, off-tailnet
1414        let resp = answer(&view, &buf).expect("answers");
1415        let (_, rcode, _) = parse_header(&resp);
1416        assert_eq!(
1417            rcode, 2,
1418            "off-tailnet NS -> SERVFAIL (forwardable), not NOTIMP"
1419        );
1420    }
1421
1422    #[test]
1423    fn malformed_query_is_dropped() {
1424        // A response (QR bit set) is not a query; we drop it (no answer).
1425        let mut buf = build_query(0x1, &["host"], 1, 1);
1426        buf[2] = 0x80; // set QR bit
1427        assert!(answer(&view_with_peer(), &buf).is_none());
1428    }
1429
1430    #[test]
1431    fn ptr_for_known_ip_answers_fqdn() {
1432        let view = view_with_peer();
1433        // Reverse name for 100.64.0.1 => 1.0.64.100.in-addr.arpa
1434        let buf = build_query(0x33, &["1", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1435
1436        let resp = answer(&view, &buf).expect("answers");
1437        let (_, rcode, ancount) = parse_header(&resp);
1438        assert_eq!(rcode, 0, "NoError");
1439        assert_eq!(ancount, 1);
1440
1441        // The PTR rdata encodes the peer's fqdn "host.user.ts.net" as length-prefixed labels.
1442        let expected = {
1443            let mut out = Vec::new();
1444            for label in ["host", "user", "ts", "net"] {
1445                out.push(label.len() as u8);
1446                out.extend_from_slice(label.as_bytes());
1447            }
1448            out.push(0);
1449            out
1450        };
1451        let tail = &resp[resp.len() - expected.len()..];
1452        assert_eq!(tail, expected.as_slice());
1453    }
1454
1455    #[test]
1456    fn ptr_for_unknown_public_ip_off_tailnet_is_servfail() {
1457        let view = view_with_peer();
1458        // 9.9.9.9 is a public IP, not a known tailnet IP and not in the CGNAT reverse zone — so its
1459        // reverse query is an ordinary off-tailnet name. With no upstream to forward it to, that is
1460        // SERVFAIL (soft), not NXDOMAIN. (A CGNAT/ip6.arpa reverse for an unmatched tailnet IP still
1461        // fails closed to NXDOMAIN as an anti-leak guard — see `ptr_for_unknown_tailnet_ip_*`.)
1462        let buf = build_query(0x34, &["9", "9", "9", "9", "in-addr", "arpa"], 12, 1);
1463
1464        let resp = answer(&view, &buf).expect("answers");
1465        let (_, rcode, _) = parse_header(&resp);
1466        assert_eq!(
1467            rcode, 2,
1468            "ServFail: off-tailnet public-IP reverse, no upstream"
1469        );
1470    }
1471
1472    #[test]
1473    fn ptr_for_unknown_tailnet_ip_is_nxdomain_not_forwarded() {
1474        // A view WITH an upstream resolver: an off-tailnet reverse query would forward, but a
1475        // reverse query for an unmatched IP in the CGNAT range (100.64.0.0/10) must fail closed to
1476        // NXDOMAIN — the probed tailnet IP must never leak upstream.
1477        let mut db = PeerDb::default();
1478        db.upsert(&test_node());
1479        let view = DnsView {
1480            cfg: DnsConfig {
1481                magic_dns: true,
1482                search_domains: vec!["user.ts.net".to_string()],
1483                fallback_resolvers: vec![DnsResolver {
1484                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1485                    use_with_exit_node: false,
1486                }],
1487                ..Default::default()
1488            },
1489            peers: Some(Arc::new(db)),
1490            self_node: None,
1491            exit_doh: None,
1492            enable_ipv6: false,
1493            accept_dns: true,
1494        };
1495
1496        // 100.64.0.9 is in CGNAT range but owned by no peer => NXDOMAIN, never a Forward.
1497        let buf = build_query(0x35, &["9", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1498        match decide(&view, &buf).expect("decides") {
1499            Decision::Reply(resp) => {
1500                let (_, rcode, _) = parse_header(&resp);
1501                assert_eq!(rcode, 3, "NxDomain");
1502            }
1503            Decision::Forward { .. } => {
1504                panic!("tailnet CGNAT PTR must never be forwarded upstream")
1505            }
1506        }
1507    }
1508
1509    /// Anti-leak regression for the exotic-qtype forward path: a NON-PTR query (TXT, type 16) for a
1510    /// tailnet CGNAT reverse name, with an upstream configured, must STILL fail closed to NXDOMAIN —
1511    /// never forward. The PTR arm guards this, but the `QType::Other` path routes through
1512    /// `forward_or_nodata`, which must re-apply the reverse-zone guard or the tailnet IP leaks.
1513    #[test]
1514    fn exotic_qtype_for_tailnet_cgnat_reverse_is_nxdomain_not_forwarded() {
1515        let mut db = PeerDb::default();
1516        db.upsert(&test_node());
1517        let view = DnsView {
1518            cfg: DnsConfig {
1519                magic_dns: true,
1520                search_domains: vec!["user.ts.net".to_string()],
1521                fallback_resolvers: vec![DnsResolver {
1522                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1523                    use_with_exit_node: false,
1524                }],
1525                ..Default::default()
1526            },
1527            peers: Some(Arc::new(db)),
1528            self_node: None,
1529            exit_doh: None,
1530            enable_ipv6: false,
1531            accept_dns: true,
1532        };
1533
1534        // TXT (16) for a CGNAT reverse name => NXDOMAIN, never a Forward (no tailnet-IP leak).
1535        let buf = build_query(0x36, &["9", "0", "64", "100", "in-addr", "arpa"], 16, 1);
1536        match decide(&view, &buf).expect("decides") {
1537            Decision::Reply(resp) => {
1538                let (_, rcode, _) = parse_header(&resp);
1539                assert_eq!(rcode, 3, "NxDomain");
1540            }
1541            Decision::Forward { .. } => {
1542                panic!("a non-PTR query for a tailnet CGNAT reverse name must never forward")
1543            }
1544        }
1545    }
1546
1547    /// Same anti-leak guard for an `ip6.arpa` reverse name under an exotic qtype: must NXDOMAIN, not
1548    /// forward (revealing a tailnet ULA was probed).
1549    #[test]
1550    fn exotic_qtype_for_ip6_arpa_is_nxdomain_not_forwarded() {
1551        let view = view_with_routes(
1552            std::collections::BTreeMap::new(),
1553            vec![udp("9.9.9.9:53")],
1554            vec![],
1555        );
1556        // An ip6.arpa reverse name with a TXT (16) qtype must fail closed.
1557        let buf = build_query(
1558            0x37,
1559            &[
1560                "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
1561                "a", "7", "d", "f", "ip6", "arpa",
1562            ],
1563            16,
1564            1,
1565        );
1566        match decide(&view, &buf).expect("decides") {
1567            Decision::Reply(resp) => {
1568                let (_, rcode, _) = parse_header(&resp);
1569                assert_eq!(rcode, 3, "NxDomain");
1570            }
1571            Decision::Forward { .. } => panic!("an ip6.arpa exotic-qtype query must never forward"),
1572        }
1573    }
1574
1575    #[test]
1576    fn is_tailnet_cgnat_classifies_range() {
1577        assert!(is_tailnet_cgnat("100.64.0.0".parse().unwrap()));
1578        assert!(is_tailnet_cgnat("100.64.0.1".parse().unwrap()));
1579        assert!(is_tailnet_cgnat("100.127.255.255".parse().unwrap()));
1580        // Outside the /10:
1581        assert!(!is_tailnet_cgnat("100.63.255.255".parse().unwrap()));
1582        assert!(!is_tailnet_cgnat("100.128.0.0".parse().unwrap()));
1583        assert!(!is_tailnet_cgnat("9.9.9.9".parse().unwrap()));
1584        // The MagicDNS resolver IP 100.100.100.100 is itself inside the /10.
1585        assert!(is_tailnet_cgnat("100.100.100.100".parse().unwrap()));
1586    }
1587
1588    #[test]
1589    fn response_matches_query_validates_id_and_qr() {
1590        // query id 0x1234, QR=0
1591        let query = build_query(0x1234, &["a", "com"], 1, 1);
1592
1593        // A well-formed response: same id, QR=1.
1594        let mut good = query.clone();
1595        good[2] |= 0x80;
1596        assert!(response_matches_query(&query, &good));
1597
1598        // Same id but QR still 0 (not a response): rejected.
1599        assert!(!response_matches_query(&query, &query));
1600
1601        // QR=1 but a different transaction id: rejected (off-path forgery).
1602        let mut wrong_id = good.clone();
1603        wrong_id[0] ^= 0xFF;
1604        assert!(!response_matches_query(&query, &wrong_id));
1605
1606        // Too-short buffers: rejected.
1607        assert!(!response_matches_query(&query, &[0u8; 2]));
1608        assert!(!response_matches_query(&[0u8; 3], &good));
1609    }
1610
1611    #[test]
1612    fn self_node_resolves_when_no_peer_match() {
1613        // With the peer db empty but a self node set, the self node answers for its own name.
1614        let view = DnsView {
1615            cfg: DnsConfig {
1616                magic_dns: true,
1617                search_domains: vec![],
1618                ..Default::default()
1619            },
1620            peers: None,
1621            self_node: Some(test_node()),
1622            exit_doh: None,
1623            enable_ipv6: false,
1624            accept_dns: true,
1625        };
1626        let buf = build_query(0x44, &["host", "user", "ts", "net"], 1, 1);
1627
1628        let resp = answer(&view, &buf).expect("answers");
1629        let (_, rcode, ancount) = parse_header(&resp);
1630        assert_eq!(rcode, 0);
1631        assert_eq!(ancount, 1);
1632        let tail = &resp[resp.len() - 4..];
1633        assert_eq!(tail, &[100, 64, 0, 1]);
1634    }
1635
1636    #[test]
1637    fn partially_qualified_name_resolves_via_search_domain() {
1638        // "host.user" is not indexed directly, but the "user.ts.net" search domain qualifies it
1639        // to "host.user.user.ts.net"... which does NOT match. The realistic case is "host" (bare,
1640        // already indexed) and "host.user.ts.net" (fqdn). Verify a name needing suffix expansion:
1641        // with search domain "ts.net" the partially-qualified "host.user" => "host.user.ts.net".
1642        let mut view = view_with_peer();
1643        view.cfg.search_domains = vec!["ts.net".to_string()];
1644        let buf = build_query(0x55, &["host", "user"], 1, 1);
1645
1646        let resp = answer(&view, &buf).expect("answers");
1647        let (_, rcode, ancount) = parse_header(&resp);
1648        assert_eq!(rcode, 0, "NoError via search-domain expansion");
1649        assert_eq!(ancount, 1);
1650        let tail = &resp[resp.len() - 4..];
1651        assert_eq!(tail, &[100, 64, 0, 1]);
1652    }
1653
1654    #[test]
1655    fn extra_record_a_answers_when_no_peer_match() {
1656        // A control-pushed static A record answers for a non-peer name, fail-closed otherwise.
1657        let mut view = view_with_peer();
1658        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1659            name: "static.user.ts.net".to_string(),
1660            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1661        }];
1662        let buf = build_query(0x77, &["static", "user", "ts", "net"], 1, 1);
1663
1664        let resp = answer(&view, &buf).expect("answers");
1665        let (_, rcode, ancount) = parse_header(&resp);
1666        assert_eq!(rcode, 0, "NoError from extra record");
1667        assert_eq!(ancount, 1);
1668        let tail = &resp[resp.len() - 4..];
1669        assert_eq!(tail, &[100, 64, 0, 9]);
1670    }
1671
1672    #[test]
1673    fn extra_record_matches_query_case_insensitively() {
1674        // The query name is canonicalized (lowercased) at decode time, so a mixed-case query
1675        // matches a lowercase extra record.
1676        let mut view = view_with_peer();
1677        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1678            name: "static.user.ts.net".to_string(),
1679            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1680        }];
1681        let buf = build_query(0x7A, &["Static", "User", "TS", "net"], 1, 1);
1682
1683        let resp = answer(&view, &buf).expect("answers");
1684        let (_, rcode, ancount) = parse_header(&resp);
1685        assert_eq!(rcode, 0, "NoError: case-insensitive match");
1686        assert_eq!(ancount, 1);
1687        let tail = &resp[resp.len() - 4..];
1688        assert_eq!(tail, &[100, 64, 0, 9]);
1689    }
1690
1691    #[test]
1692    fn extra_record_not_expanded_by_search_domain() {
1693        // Unlike peer names, an extra record is matched as an FQDN only: a bare query that would
1694        // need search-domain expansion to reach the record name must NOT resolve.
1695        let mut view = view_with_peer();
1696        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1697            name: "static.user.ts.net".to_string(),
1698            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1699        }];
1700        // "static" would only reach "static.user.ts.net" via the "user.ts.net" search domain.
1701        let buf = build_query(0x7B, &["static"], 1, 1);
1702
1703        let resp = answer(&view, &buf).expect("answers");
1704        let (_, rcode, _) = parse_header(&resp);
1705        // Not search-expanded → treated as the bare off-tailnet name "static", which has no upstream
1706        // here, so SERVFAIL (soft). The point of the test — that the extra record is NOT reachable
1707        // via search expansion — holds regardless of the failure rcode.
1708        assert_eq!(
1709            rcode, 2,
1710            "ServFail: bare 'static' is not search-expanded to the extra record"
1711        );
1712    }
1713
1714    #[test]
1715    fn extra_record_aaaa_family_is_isolated() {
1716        // An A-only extra record must NOT answer an AAAA query for the same name (NxDomain).
1717        let mut view = view_with_peer();
1718        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1719            name: "v4only.user.ts.net".to_string(),
1720            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1721        }];
1722        let buf = build_query(0x78, &["v4only", "user", "ts", "net"], 28, 1);
1723
1724        let resp = answer(&view, &buf).expect("answers");
1725        let (_, rcode, _) = parse_header(&resp);
1726        assert_eq!(rcode, 3, "NxDomain: A record does not satisfy AAAA");
1727    }
1728
1729    #[test]
1730    fn extra_record_ignored_when_magic_dns_off() {
1731        // Fail closed: extra records are never served while MagicDNS is disabled.
1732        let mut view = view_with_peer();
1733        view.cfg.magic_dns = false;
1734        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1735            name: "static.user.ts.net".to_string(),
1736            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1737        }];
1738        let buf = build_query(0x79, &["static", "user", "ts", "net"], 1, 1);
1739
1740        let resp = answer(&view, &buf).expect("answers");
1741        let (_, rcode, _) = parse_header(&resp);
1742        assert_eq!(rcode, 5, "Refused");
1743    }
1744
1745    #[test]
1746    fn non_in_class_on_tailnet_name_is_nodata_not_answered_as_in() {
1747        // A CHAOS-class (3) query for a tailnet name must NOT be answered as IN (no overlay A), and
1748        // must NOT be REFUSED (Go does no class check on the local path). It's an unsupported
1749        // authoritative class -> NODATA (empty NOERROR), and never forwarded (tailnet name).
1750        let view = view_with_peer();
1751        let buf = build_query(0x66, &["host", "user", "ts", "net"], 1, 3);
1752
1753        let resp = answer(&view, &buf).expect("answers");
1754        let (_, rcode, ancount) = parse_header(&resp);
1755        assert_eq!(
1756            rcode, 0,
1757            "NoError (NODATA), not Refused and not an IN answer"
1758        );
1759        assert_eq!(
1760            ancount, 0,
1761            "must not hand out the overlay A for a non-IN class"
1762        );
1763    }
1764
1765    #[test]
1766    fn non_in_class_off_tailnet_forwards_or_servfails() {
1767        // A non-IN class for an OFF-tailnet name is forwardable (Go forwards it), never REFUSED.
1768        // No upstream here -> SERVFAIL, proving the class gate no longer short-circuits to Refused.
1769        let view = view_with_peer();
1770        let buf = build_query(0x66, &["example", "com"], 1, 3);
1771
1772        let resp = answer(&view, &buf).expect("answers");
1773        let (_, rcode, _) = parse_header(&resp);
1774        assert_eq!(
1775            rcode, 2,
1776            "off-tailnet non-IN class, no upstream -> SERVFAIL, not Refused"
1777        );
1778    }
1779
1780    /// A view with MagicDNS on, the `user.ts.net` search domain, and the given split-DNS routes
1781    /// + global resolvers.
1782    fn view_with_routes(
1783        routes: std::collections::BTreeMap<String, Vec<DnsResolver>>,
1784        resolvers: Vec<DnsResolver>,
1785        fallback: Vec<DnsResolver>,
1786    ) -> DnsView {
1787        DnsView {
1788            cfg: DnsConfig {
1789                magic_dns: true,
1790                search_domains: vec!["user.ts.net".to_string()],
1791                routes,
1792                resolvers,
1793                fallback_resolvers: fallback,
1794                ..Default::default()
1795            },
1796            peers: None,
1797            self_node: None,
1798            exit_doh: None,
1799            enable_ipv6: false,
1800            accept_dns: true,
1801        }
1802    }
1803
1804    fn udp(addr: &str) -> DnsResolver {
1805        DnsResolver {
1806            transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
1807            use_with_exit_node: false,
1808        }
1809    }
1810
1811    #[test]
1812    fn split_dns_route_forwards_to_matching_upstream() {
1813        let mut routes = std::collections::BTreeMap::new();
1814        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1815        let view = view_with_routes(routes, vec![], vec![]);
1816        let buf = build_query(0x100, &["api", "corp", "example"], 1, 1);
1817
1818        match decide(&view, &buf).expect("decides") {
1819            Decision::Forward { upstreams, .. } => {
1820                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1821            }
1822            Decision::Reply(_) => panic!("expected forward to the split-DNS upstream"),
1823        }
1824    }
1825
1826    #[test]
1827    fn exotic_qtype_off_tailnet_forwards_to_upstream() {
1828        // The core of the fix: an HTTPS/SVCB (type 65) query for an off-tailnet name with a matching
1829        // route must FORWARD to the upstream (verbatim), exactly like an A query would — not REFUSE
1830        // and not NXDOMAIN. This is the browser HTTP/3 + ECH case the old blanket-REFUSE broke.
1831        let mut routes = std::collections::BTreeMap::new();
1832        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1833        let view = view_with_routes(routes, vec![], vec![]);
1834        let buf = build_query(0x102, &["api", "corp", "example"], 65, 1);
1835
1836        match decide(&view, &buf).expect("decides") {
1837            Decision::Forward {
1838                upstreams, query, ..
1839            } => {
1840                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1841                assert_eq!(query, buf, "the exotic-qtype query is forwarded verbatim");
1842            }
1843            Decision::Reply(_) => {
1844                panic!("an off-tailnet HTTPS-record query must forward, not reply")
1845            }
1846        }
1847    }
1848
1849    #[test]
1850    fn non_in_class_off_tailnet_forwards_to_upstream() {
1851        // A non-IN class for an off-tailnet routed name forwards too (Go does no class check on the
1852        // local path). Proves the class gate no longer short-circuits to REFUSED before routing.
1853        let mut routes = std::collections::BTreeMap::new();
1854        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1855        let view = view_with_routes(routes, vec![], vec![]);
1856        let buf = build_query(0x103, &["api", "corp", "example"], 1, 3);
1857
1858        match decide(&view, &buf).expect("decides") {
1859            Decision::Forward { upstreams, .. } => {
1860                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1861            }
1862            Decision::Reply(_) => {
1863                panic!("an off-tailnet non-IN-class query must forward, not reply")
1864            }
1865        }
1866    }
1867
1868    /// The local responder bounds concurrent in-flight forwards: `serve` acquires one
1869    /// `MAX_INFLIGHT_FORWARDS` permit per spawned forward task and drops the query fail-closed when
1870    /// the pool is exhausted (a client spraying forwardable names can't open unbounded overlay
1871    /// sockets). This pins the gating semantics `serve` relies on — drained pool refuses a new
1872    /// permit; releasing one restores capacity — and the cap constant itself. (The async `serve`
1873    /// loop has no netstack-free test seam, so the semaphore behavior is exercised directly here, the
1874    /// same `Arc<Semaphore>::try_acquire_owned` the loop uses.)
1875    #[test]
1876    fn forward_inflight_cap_fails_closed_when_saturated() {
1877        use std::sync::Arc;
1878
1879        use tokio::sync::Semaphore;
1880
1881        let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
1882
1883        // Drain every permit (one per concurrently in-flight forward).
1884        let mut held = Vec::with_capacity(MAX_INFLIGHT_FORWARDS);
1885        for _ in 0..MAX_INFLIGHT_FORWARDS {
1886            held.push(
1887                inflight
1888                    .clone()
1889                    .try_acquire_owned()
1890                    .expect("permits available below the cap"),
1891            );
1892        }
1893
1894        // At the cap, the next forward is refused — `serve` would drop the query, not spawn.
1895        assert!(
1896            inflight.clone().try_acquire_owned().is_err(),
1897            "a saturated forward pool must refuse a new permit (fail closed)"
1898        );
1899
1900        // Completing an in-flight forward releases its permit and restores capacity.
1901        drop(held.pop());
1902        assert!(
1903            inflight.clone().try_acquire_owned().is_ok(),
1904            "releasing a permit must let the next forward proceed"
1905        );
1906    }
1907
1908    /// A permit moved into a spawned forward task (the `let _permit = permit;` shape `serve` uses)
1909    /// must stay held for the *whole* task body — across the `.await` on the upstream — and release
1910    /// only when the task completes. This guards the regression the saturation test above can't see:
1911    /// "tidying" `let _permit = permit;` to `let _ = permit;` would drop the permit immediately,
1912    /// re-opening unbounded concurrency while leaving the synchronous drain/restore test green. Here a
1913    /// 1-permit pool is consumed by a task that holds it across a yield; the pool must read empty
1914    /// while the task runs and refill once it finishes.
1915    #[tokio::test]
1916    async fn forward_permit_is_held_for_the_task_lifetime_not_dropped_early() {
1917        use std::sync::Arc;
1918
1919        use tokio::sync::Semaphore;
1920
1921        let inflight = Arc::new(Semaphore::new(1));
1922        let permit = inflight
1923            .clone()
1924            .try_acquire_owned()
1925            .expect("the sole permit is available");
1926
1927        let (started_tx, started_rx) = tokio::sync::oneshot::channel();
1928        let (release_tx, release_rx) = tokio::sync::oneshot::channel();
1929        let task = tokio::spawn(async move {
1930            // Same shape as `serve`'s spawned forward: the permit is a named binding moved into the
1931            // task, so it lives until the body ends — not dropped at the `let`.
1932            let _permit = permit;
1933            started_tx.send(()).unwrap();
1934            // Stand in for the `.await` on the upstream forward.
1935            release_rx.await.unwrap();
1936        });
1937
1938        started_rx.await.unwrap();
1939        // While the task runs, the permit it moved in is still held — the pool is empty.
1940        assert!(
1941            inflight.clone().try_acquire_owned().is_err(),
1942            "a permit moved into a running task must stay held across its await"
1943        );
1944
1945        // Let the task finish; its permit drops with the body and capacity returns.
1946        release_tx.send(()).unwrap();
1947        task.await.unwrap();
1948        assert!(
1949            inflight.clone().try_acquire_owned().is_ok(),
1950            "the permit must be released once the task body completes"
1951        );
1952    }
1953
1954    #[test]
1955    fn longest_suffix_route_wins() {
1956        let mut routes = std::collections::BTreeMap::new();
1957        routes.insert("example".to_string(), vec![udp("10.0.0.1:53")]);
1958        routes.insert("corp.example".to_string(), vec![udp("10.0.0.2:53")]);
1959        let view = view_with_routes(routes, vec![], vec![]);
1960        let buf = build_query(0x101, &["api", "corp", "example"], 1, 1);
1961
1962        match decide(&view, &buf).expect("decides") {
1963            Decision::Forward { upstreams, .. } => {
1964                assert_eq!(
1965                    upstreams,
1966                    vec!["10.0.0.2:53".parse().unwrap()],
1967                    "longer suffix wins"
1968                );
1969            }
1970            Decision::Reply(_) => panic!("expected forward"),
1971        }
1972    }
1973
1974    #[test]
1975    fn negative_route_is_nxdomain_not_forwarded() {
1976        // An empty upstream list is a negative route: fail closed, never forward.
1977        let mut routes = std::collections::BTreeMap::new();
1978        routes.insert("blocked.example".to_string(), vec![]);
1979        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
1980        let buf = build_query(0x102, &["x", "blocked", "example"], 1, 1);
1981
1982        match decide(&view, &buf).expect("decides") {
1983            Decision::Reply(resp) => {
1984                let (_, rcode, _) = parse_header(&resp);
1985                assert_eq!(rcode, 3, "NxDomain: negative route is not forwarded");
1986            }
1987            Decision::Forward { .. } => panic!("negative route must not forward"),
1988        }
1989    }
1990
1991    #[test]
1992    fn unrouted_name_forwards_to_fallback_then_global() {
1993        // No route matches: fallback resolvers are preferred over global resolvers.
1994        let view = view_with_routes(
1995            std::collections::BTreeMap::new(),
1996            vec![udp("8.8.8.8:53")],
1997            vec![udp("1.1.1.1:53")],
1998        );
1999        let buf = build_query(0x103, &["example", "com"], 1, 1);
2000
2001        match decide(&view, &buf).expect("decides") {
2002            Decision::Forward { upstreams, .. } => {
2003                assert_eq!(
2004                    upstreams,
2005                    vec!["1.1.1.1:53".parse().unwrap()],
2006                    "fallback preferred"
2007                );
2008            }
2009            Decision::Reply(_) => panic!("expected forward to fallback"),
2010        }
2011    }
2012
2013    #[test]
2014    fn unrouted_name_forwards_to_global_when_no_fallback() {
2015        let view = view_with_routes(
2016            std::collections::BTreeMap::new(),
2017            vec![udp("8.8.8.8:53")],
2018            vec![],
2019        );
2020        let buf = build_query(0x104, &["example", "com"], 1, 1);
2021
2022        match decide(&view, &buf).expect("decides") {
2023            Decision::Forward { upstreams, .. } => {
2024                assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2025            }
2026            Decision::Reply(_) => panic!("expected forward to global resolver"),
2027        }
2028    }
2029
2030    #[test]
2031    fn tailnet_name_is_never_forwarded() {
2032        // Anti-leak: a name under a tailnet search domain that has no overlay match must fail
2033        // closed to NXDOMAIN, never leak to an upstream resolver, even with resolvers configured.
2034        let view = view_with_routes(
2035            std::collections::BTreeMap::new(),
2036            vec![udp("8.8.8.8:53")],
2037            vec![udp("1.1.1.1:53")],
2038        );
2039        // "ghost.user.ts.net" is under the tailnet suffix but matches no peer.
2040        let buf = build_query(0x105, &["ghost", "user", "ts", "net"], 1, 1);
2041
2042        match decide(&view, &buf).expect("decides") {
2043            Decision::Reply(resp) => {
2044                let (_, rcode, _) = parse_header(&resp);
2045                assert_eq!(rcode, 3, "NxDomain: tailnet name not leaked upstream");
2046            }
2047            Decision::Forward { .. } => panic!("tailnet name must never be forwarded"),
2048        }
2049    }
2050
2051    #[test]
2052    fn no_resolvers_off_tailnet_is_servfail_not_nxdomain() {
2053        // No route, no resolvers: an OFF-tailnet name cannot be forwarded. Go answers SERVFAIL
2054        // (forwarder.go:1207 "no upstream resolvers set, returning SERVFAIL"), NOT NXDOMAIN — a
2055        // cacheable non-existence for a real name we merely couldn't forward would poison downstream
2056        // stub caches. We still never forward (the name does not leak); we just soft-fail.
2057        let view = view_with_routes(std::collections::BTreeMap::new(), vec![], vec![]);
2058        let buf = build_query(0x106, &["example", "com"], 1, 1);
2059
2060        match decide(&view, &buf).expect("decides") {
2061            Decision::Reply(resp) => {
2062                let (_, rcode, _) = parse_header(&resp);
2063                assert_eq!(
2064                    rcode, 2,
2065                    "ServFail: off-tailnet name with no upstream to forward to"
2066                );
2067            }
2068            Decision::Forward { .. } => panic!("must not forward with no resolvers"),
2069        }
2070    }
2071
2072    #[test]
2073    fn route_with_only_ipv6_upstreams_off_tailnet_is_servfail() {
2074        // A split-DNS route exists but every resolver is IPv6 (filtered out under the IPv4-only
2075        // egress): we have a route yet nowhere to forward. That is an inability to forward an
2076        // off-tailnet name, so SERVFAIL (soft), not a fabricated NXDOMAIN.
2077        let mut routes = std::collections::BTreeMap::new();
2078        routes.insert("corp.example".to_string(), vec![udp("[2001:db8::53]:53")]);
2079        let view = view_with_routes(routes, vec![], vec![]);
2080        let buf = build_query(0x108, &["host", "corp", "example"], 1, 1);
2081
2082        match decide(&view, &buf).expect("decides") {
2083            Decision::Reply(resp) => {
2084                let (_, rcode, _) = parse_header(&resp);
2085                assert_eq!(
2086                    rcode, 2,
2087                    "ServFail: route's resolvers all filtered out (IPv6-only), cannot forward"
2088                );
2089            }
2090            Decision::Forward { .. } => panic!("must not forward when all upstreams are filtered"),
2091        }
2092    }
2093
2094    #[test]
2095    fn overlay_match_wins_over_forwarding() {
2096        // A known peer name resolves authoritatively even when upstream resolvers are configured.
2097        let mut db = PeerDb::default();
2098        db.upsert(&test_node());
2099        let view = DnsView {
2100            cfg: DnsConfig {
2101                magic_dns: true,
2102                search_domains: vec!["user.ts.net".to_string()],
2103                resolvers: vec![udp("8.8.8.8:53")],
2104                ..Default::default()
2105            },
2106            peers: Some(Arc::new(db)),
2107            self_node: None,
2108            exit_doh: None,
2109            enable_ipv6: false,
2110            accept_dns: true,
2111        };
2112        let buf = build_query(0x107, &["host", "user", "ts", "net"], 1, 1);
2113
2114        match decide(&view, &buf).expect("decides") {
2115            Decision::Reply(resp) => {
2116                let (_, rcode, ancount) = parse_header(&resp);
2117                assert_eq!(rcode, 0, "authoritative answer wins");
2118                assert_eq!(ancount, 1);
2119            }
2120            Decision::Forward { .. } => panic!("overlay match must not forward"),
2121        }
2122    }
2123
2124    #[test]
2125    fn ipv6_reverse_ptr_is_nxdomain_not_forwarded() {
2126        // Anti-leak: an `ip6.arpa` reverse PTR for a tailnet ULA (fd7a:…) must fail closed to
2127        // NXDOMAIN, never be forwarded — even with an upstream resolver configured. This fork is
2128        // IPv4-only on the tailnet; forwarding would reveal that a v6 address was probed.
2129        let view = view_with_routes(
2130            std::collections::BTreeMap::new(),
2131            vec![udp("8.8.8.8:53")],
2132            vec![udp("1.1.1.1:53")],
2133        );
2134        // Reverse name for fd7a::1 (nibble-reversed) under ip6.arpa. The exact nibble labels don't
2135        // matter to the guard — any name ending in ip6.arpa must fail closed.
2136        let labels = vec![
2137            "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
2138            "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "a", "7", "d", "f", "ip6",
2139            "arpa",
2140        ];
2141        let buf = build_query(0x200, &labels, 12, 1);
2142
2143        match decide(&view, &buf).expect("decides") {
2144            Decision::Reply(resp) => {
2145                let (_, rcode, _) = parse_header(&resp);
2146                assert_eq!(
2147                    rcode, 3,
2148                    "NxDomain: ip6.arpa reverse must not leak upstream"
2149                );
2150            }
2151            Decision::Forward { .. } => panic!("ip6.arpa PTR must never be forwarded"),
2152        }
2153    }
2154
2155    #[test]
2156    fn cap_response_sets_tc_when_truncated() {
2157        // An oversize upstream answer is capped to a single datagram AND marked truncated (TC bit)
2158        // so the stub resolver retries over TCP rather than trusting a chopped message.
2159        let mut big = build_query(0x300, &["example", "com"], 1, 1);
2160        big[2] |= 0x80; // make it a response (QR=1)
2161        big.resize(MAX_UPSTREAM_RESPONSE + 500, 0xAB);
2162
2163        let out = cap_response(big);
2164        assert_eq!(out.len(), MAX_UPSTREAM_RESPONSE, "capped to one datagram");
2165        assert_ne!(out[2] & 0x02, 0, "TC bit set on truncation");
2166    }
2167
2168    #[test]
2169    fn cap_response_leaves_small_response_untouched() {
2170        // A response that fits is returned verbatim with no TC bit forced on.
2171        let mut small = build_query(0x301, &["example", "com"], 1, 1);
2172        small[2] |= 0x80;
2173        let before = small.clone();
2174
2175        let out = cap_response(small);
2176        assert_eq!(out, before, "small response unchanged");
2177        assert_eq!(out[2] & 0x02, 0, "TC bit not set when no truncation");
2178    }
2179
2180    #[test]
2181    fn response_matches_query_rejects_mismatched_question() {
2182        // id + QR match but the echoed question differs (different QNAME) => rejected. This guards
2183        // against an off-path injector that guesses the id but answers a different question.
2184        let query = build_query(0x1234, &["a", "com"], 1, 1);
2185
2186        let mut wrong_question = build_query(0x1234, &["b", "com"], 1, 1);
2187        wrong_question[2] |= 0x80; // QR=1, same id
2188        assert!(
2189            !response_matches_query(&query, &wrong_question),
2190            "different QNAME must be rejected"
2191        );
2192
2193        // A different QTYPE with the same name is also rejected.
2194        let mut wrong_qtype = build_query(0x1234, &["a", "com"], 28, 1);
2195        wrong_qtype[2] |= 0x80;
2196        assert!(
2197            !response_matches_query(&query, &wrong_qtype),
2198            "different QTYPE must be rejected"
2199        );
2200
2201        // The exact echoed question with QR=1 is accepted.
2202        let mut good = query.clone();
2203        good[2] |= 0x80;
2204        assert!(
2205            response_matches_query(&query, &good),
2206            "matching question accepted"
2207        );
2208    }
2209
2210    #[test]
2211    fn suffix_matches_handles_boundaries_and_empty() {
2212        // Exact and label-boundary matches.
2213        assert!(suffix_matches("corp", "corp"));
2214        assert!(suffix_matches("a.corp", "corp"));
2215        assert!(suffix_matches("a.b.corp", "corp"));
2216        // Not a label boundary.
2217        assert!(!suffix_matches("acorp", "corp"));
2218        // Empty suffix never matches (defense-in-depth against `ends_with("")`).
2219        assert!(!suffix_matches("anything.example", ""));
2220        assert!(!suffix_matches("", ""));
2221    }
2222
2223    #[test]
2224    fn empty_search_domain_does_not_capture_everything() {
2225        // Defense-in-depth: an empty search domain must NOT make every name look like a tailnet
2226        // name (which would fail-close legitimate recursive queries / mis-route). With an empty
2227        // suffix present alongside a real resolver, an off-tailnet name still forwards.
2228        let mut view = view_with_routes(
2229            std::collections::BTreeMap::new(),
2230            vec![udp("8.8.8.8:53")],
2231            vec![],
2232        );
2233        view.cfg.search_domains = vec![String::new()];
2234        let buf = build_query(0x400, &["example", "com"], 1, 1);
2235
2236        match decide(&view, &buf).expect("decides") {
2237            Decision::Forward { upstreams, .. } => {
2238                assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2239            }
2240            Decision::Reply(_) => {
2241                panic!("empty search domain must not treat every name as tailnet")
2242            }
2243        }
2244    }
2245
2246    #[test]
2247    fn empty_route_suffix_does_not_capture_everything() {
2248        // Defense-in-depth: an empty route suffix must not match every name (which would route all
2249        // queries to that route's upstreams). With an empty-suffix route present, an unrelated name
2250        // still falls through to the global resolver.
2251        let mut routes = std::collections::BTreeMap::new();
2252        routes.insert(String::new(), vec![udp("10.9.9.9:53")]);
2253        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2254        let buf = build_query(0x401, &["example", "com"], 1, 1);
2255
2256        match decide(&view, &buf).expect("decides") {
2257            Decision::Forward { upstreams, .. } => {
2258                assert_eq!(
2259                    upstreams,
2260                    vec!["8.8.8.8:53".parse().unwrap()],
2261                    "empty route suffix must not capture; falls through to global"
2262                );
2263            }
2264            Decision::Reply(_) => panic!("expected forward to global resolver"),
2265        }
2266    }
2267
2268    fn udp_exit(addr: &str) -> DnsResolver {
2269        DnsResolver {
2270            transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
2271            use_with_exit_node: true,
2272        }
2273    }
2274
2275    #[test]
2276    fn recursive_forward_is_flagged_route_forward_is_not() {
2277        // A recursive (global/fallback) forward sets `recursive = true` (eligible for DoH
2278        // delegation); a deliberately-configured split-DNS route sets `recursive = false`.
2279        let mut routes = std::collections::BTreeMap::new();
2280        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
2281        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2282
2283        let routed = build_query(0x500, &["api", "corp", "example"], 1, 1);
2284        match decide(&view, &routed).expect("decides") {
2285            Decision::Forward { recursive, .. } => {
2286                assert!(!recursive, "split-DNS route is not a recursive forward")
2287            }
2288            Decision::Reply(_) => panic!("expected route forward"),
2289        }
2290
2291        let global = build_query(0x501, &["example", "com"], 1, 1);
2292        match decide(&view, &global).expect("decides") {
2293            Decision::Forward { recursive, .. } => {
2294                assert!(recursive, "unrouted name is a recursive forward")
2295            }
2296            Decision::Reply(_) => panic!("expected recursive forward"),
2297        }
2298    }
2299
2300    #[test]
2301    fn recursive_plan_keeps_udp_without_exit_node() {
2302        // No active exit node: a recursive forward stays on its default UDP upstreams.
2303        let view = view_with_routes(
2304            std::collections::BTreeMap::new(),
2305            vec![udp("8.8.8.8:53")],
2306            vec![],
2307        );
2308        let default = vec!["8.8.8.8:53".parse().unwrap()];
2309        assert_eq!(
2310            recursive_plan(&view, default.clone()),
2311            RecursivePlan::Udp(default)
2312        );
2313    }
2314
2315    #[test]
2316    fn recursive_plan_delegates_to_doh_with_exit_node() {
2317        // Exit node active, no kept-local resolvers: recursive queries delegate to the exit node's
2318        // DoH endpoint so resolution egresses from the exit node, not this host.
2319        let mut view = view_with_routes(
2320            std::collections::BTreeMap::new(),
2321            vec![udp("8.8.8.8:53")],
2322            vec![],
2323        );
2324        let doh: SocketAddr = "100.64.0.5:8080".parse().unwrap();
2325        view.exit_doh = Some(doh);
2326        assert_eq!(
2327            recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2328            RecursivePlan::Doh(doh)
2329        );
2330    }
2331
2332    #[test]
2333    fn recursive_plan_keeps_use_with_exit_node_resolvers_local() {
2334        // Even with an exit node active, resolvers flagged `use_with_exit_node` stay local (Go keeps
2335        // UseWithExitNode resolvers). The plan forwards to those over UDP, never delegating to DoH.
2336        let mut view = view_with_routes(
2337            std::collections::BTreeMap::new(),
2338            vec![udp_exit("10.0.0.53:53"), udp("8.8.8.8:53")],
2339            vec![],
2340        );
2341        view.exit_doh = Some("100.64.0.5:8080".parse().unwrap());
2342        // The default upstreams the caller computed are irrelevant when kept-local resolvers exist;
2343        // the plan must use the kept-local ones.
2344        assert_eq!(
2345            recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2346            RecursivePlan::Udp(vec!["10.0.0.53:53".parse().unwrap()])
2347        );
2348    }
2349}