Skip to main content

ts_runtime/
magic_dns.rs

1//! MagicDNS responder with a split-DNS / recursive forwarder.
2//!
3//! An in-netstack DNS server bound to `100.100.100.100:53`. It is authoritative for in-tailnet
4//! peer names and control-pushed [`ExtraRecord`][ts_control::ExtraRecord]s, answering `A`/`AAAA`/
5//! `PTR` for those directly. For names it is *not* authoritative for, it brings tsnet-style
6//! split-DNS and recursive resolution:
7//!
8//! - **Split DNS** ([`DnsConfig::routes`]): the longest matching suffix route forwards the query
9//!   to one of that route's upstream resolvers. A route with an **empty** upstream list is a
10//!   negative route — names under it are `NXDOMAIN` (Go keeps them on the built-in resolver; for
11//!   us that means fail-closed unless an overlay/extra record matched first).
12//! - **Recursive** ([`DnsConfig::fallback_resolvers`] / [`DnsConfig::resolvers`]): names matching
13//!   no route are forwarded to the fallback resolvers, else the global resolvers.
14//! - **Fail closed**: if no route and no resolver is configured, an unknown name is `NXDOMAIN`.
15//!
16//! Anti-leak / IPv6-off posture: upstream forwarding binds `0.0.0.0:0` (UDP, IPv4 only) and never
17//! opens an IPv6 socket. AAAA handling is gated on [`DnsView::enable_ipv6`] (default off): with the
18//! gate OFF an AAAA query for a tailnet/overlay/self name returns NoError with an empty answer
19//! (NODATA) rather than the overlay v6 address — answering a v6 the IPv4-only client can't route
20//! would only create dead connections and a fingerprint. With the gate ON, AAAA is answered from
21//! overlay data (the v6 overlay addr), as historically. AAAA for tailnet names is never forwarded
22//! to a recursive upstream regardless of the gate.
23//!
24//! - MagicDNS disabled (`dns_config == None` or `magic_dns == false`), OR the node does not accept
25//!   the tailnet DNS config ([`DnsView::accept_dns`] is `false`, i.e. `--accept-dns` / `CorpDNS`
26//!   off) => `REFUSED` for every query (the responder serves nothing, mirroring Go applying an empty
27//!   `dns.Config` when `CorpDNS` is off).
28//! - A qtype/class we don't serve authoritatively (anything but IN-class A/AAAA/PTR — TXT, SRV, MX,
29//!   HTTPS/SVCB, a CHAOS-class query, …) => NODATA (empty NOERROR) for a tailnet-authoritative name,
30//!   forwarded verbatim to upstream for an off-tailnet name — exactly like Go's resolver, NOT
31//!   `REFUSED` (a stub reads REFUSED as "won't serve me" and abandons the resolver). Tailnet reverse
32//!   zones (CGNAT `in-addr.arpa` / any `ip6.arpa`) still fail closed to NXDOMAIN for every qtype
33//!   (never forwarded — anti-leak).
34//! - Malformed query => dropped (no response).
35
36use std::{
37    net::{IpAddr, Ipv4Addr, SocketAddr},
38    sync::Arc,
39    time::Duration,
40};
41
42use kameo::{
43    actor::ActorRef,
44    message::{Context, Message},
45};
46use netstack::{CreateSocket, netcore::Channel};
47use tokio::{
48    sync::{Semaphore, watch},
49    task::JoinSet,
50    time::timeout,
51};
52use ts_control::{DnsConfig, DnsResolver, Node};
53use ts_dns_wire::{Name, QType, RData, Rcode, decode_query, encode_response};
54
55use crate::{
56    Error,
57    env::Env,
58    peer_tracker::{PeerDb, PeerState},
59};
60
61/// How long to wait for an upstream resolver to answer a forwarded query before giving up.
62const UPSTREAM_TIMEOUT: Duration = Duration::from_secs(5);
63/// Cap on concurrent in-flight forwarded queries on the local `100.100.100.100:53` responder.
64///
65/// Each forward is spawned onto a task that holds an overlay UDP socket until the upstream answers
66/// or [`UPSTREAM_TIMEOUT`] elapses. Without a cap, a local/tailnet client spraying distinct
67/// forwardable names opens unbounded concurrent overlay sockets + tasks (a resource-exhaustion DoS
68/// on a slow/black-holed upstream, since each lingers for the full timeout). Bound it the same way
69/// the peerAPI DoH server bounds its request handlers ([`crate::peerapi`]'s `MAX_INFLIGHT`): acquire
70/// a permit before spawning and drop the query fail-closed when saturated. A dropped DNS query is a
71/// benign outcome — the stub resolver simply retries or times out — and Go's resolver likewise
72/// bounds outstanding forwards rather than spawning without limit.
73const MAX_INFLIGHT_FORWARDS: usize = 512;
74/// Cap on a forwarded upstream response we read into memory (a single UDP datagram).
75///
76/// Matches Go's forwarder read buffer (`maxResponseBytes`, ~4 KiB). The client's query is forwarded
77/// verbatim, so a client advertising a large EDNS UDP size can elicit a legitimately large
78/// (1300–4096 byte) UDP answer (big TXT sets, DNSSEC, many-record round-robins). Capping at the old
79/// 1232 truncated those and set TC, forcing a TCP retry this fork's UDP-only forwarder can't serve —
80/// so the large answer became unreachable. 4096 relays them intact.
81const MAX_UPSTREAM_RESPONSE: usize = 4096;
82
83/// The MagicDNS service IP. The netstack interface owns this address, so a `udp_bind` here
84/// receives the tailnet's DNS traffic.
85const MAGIC_DNS_IP: Ipv4Addr = Ipv4Addr::new(100, 100, 100, 100);
86/// The DNS service port.
87const MAGIC_DNS_PORT: u16 = 53;
88
89/// The latest view the answer loop resolves queries against.
90///
91/// Updated by the actor's message handlers (from control `StateUpdate` and peer `PeerState`
92/// updates) and read fresh by the answer loop for every packet.
93#[derive(Clone, Default)]
94pub(crate) struct DnsView {
95    /// The DNS configuration. `magic_dns == false` (the default) means serve nothing.
96    pub(crate) cfg: DnsConfig,
97    /// The current peer database, if we've seen a peer update.
98    pub(crate) peers: Option<Arc<PeerDb>>,
99    /// This node, if we've seen a self-node update.
100    pub(crate) self_node: Option<Node>,
101    /// The peerAPI DoH socket address of the currently-selected exit node, if one is active and can
102    /// proxy DNS ([`Node::peerapi_doh_addr`]). When set, the MagicDNS *client* serve loop delegates
103    /// recursive resolution to this address over the overlay instead of forwarding to the locally
104    /// configured upstream resolvers — so recursive DNS egresses from the exit node, not this host.
105    ///
106    /// Only consumed by the local MagicDNS responder's serve loop (the client side). The peerAPI
107    /// DoH *server* shares this same view but ignores this field: an exit-node DNS proxy resolves
108    /// recursively itself (gated by `forward_exit_egress`), it never re-delegates to its own exit
109    /// node. `None` means no active exit node / no DoH delegation — recursion stays local.
110    pub(crate) exit_doh: Option<SocketAddr>,
111    /// Whether IPv6 is enabled on the tailnet overlay (from [`Env::enable_ipv6`], default `false`).
112    ///
113    /// Governs the AAAA answer path only: with the gate OFF (default) an AAAA query for a
114    /// tailnet/overlay/self name is answered NoError-with-empty-answer (NODATA) instead of the
115    /// overlay v6 address; with it ON, AAAA is answered from overlay data as historically. Set once
116    /// from the runtime `Env` when the actor starts; never changes for the life of the runtime.
117    pub(crate) enable_ipv6: bool,
118    /// Whether the tailnet's DNS configuration is accepted (`--accept-dns` / `CorpDNS`, from
119    /// [`Env::accept_dns`]). When `false`, [`decide`] refuses every query (the responder serves
120    /// nothing), mirroring Go applying an empty `dns.Config` when `CorpDNS` is off — so a node can
121    /// join for connectivity without taking over DNS.
122    ///
123    /// Unlike [`enable_ipv6`](DnsView::enable_ipv6) (snapshotted once at actor spawn), this is
124    /// runtime-settable via `Device::set_accept_dns`, so it is re-read from the live
125    /// [`Env::accept_dns`] cell on **every** view rebuild (the `StateUpdate` and `PeerState`
126    /// handlers), not just at spawn — otherwise a runtime toggle would never reach the served view.
127    pub(crate) accept_dns: bool,
128}
129
130impl DnsView {
131    /// Find the node (peer or self) that answers to `name`, case/dot-insensitively.
132    fn node_by_name(&self, name: &str) -> Option<Node> {
133        if let Some(node) = self
134            .peers
135            .as_ref()
136            .and_then(|p| p.get(&name).map(|(_, n)| n.clone()))
137        {
138            return Some(node);
139        }
140
141        self.self_node
142            .as_ref()
143            .filter(|n| n.matches_name(name))
144            .cloned()
145    }
146
147    /// Resolve `canon` to an answer address of the requested family. A tailnet peer/self match
148    /// wins first — tried as written and then qualified by each tailnet search domain (so a
149    /// short/partially-qualified name like `host` or `host.user` still resolves to
150    /// `host.user.ts.net`). Failing that, a control-pushed [`ExtraRecord`] of the matching family
151    /// answers, matched as a fully-qualified name only (no search-domain expansion — like Go tsnet,
152    /// ExtraRecords are authoritative FQDN entries, not subject to client search-list qualification).
153    /// Still fail-closed: only ever resolves to a known tailnet peer/self or an explicitly
154    /// control-pushed static record — never anything else.
155    fn resolve_addr(&self, canon: &str, want_v4: bool) -> Option<IpAddr> {
156        let addr_of = |node: Node| -> IpAddr {
157            if want_v4 {
158                IpAddr::from(node.tailnet_address.ipv4.addr())
159            } else {
160                IpAddr::from(node.tailnet_address.ipv6.addr())
161            }
162        };
163
164        if let Some(node) = self.node_by_name(canon) {
165            return Some(addr_of(node));
166        }
167        for suffix in &self.cfg.search_domains {
168            if let Some(node) = self.node_by_name(&format!("{canon}.{suffix}")) {
169                return Some(addr_of(node));
170            }
171        }
172
173        // Control-pushed static records match the fully-qualified query name only.
174        self.cfg.extra_records.iter().find_map(|rec| {
175            let family_ok = matches!(
176                (rec.addr, want_v4),
177                (IpAddr::V4(_), true) | (IpAddr::V6(_), false)
178            );
179            (rec.name == canon && family_ok).then_some(rec.addr)
180        })
181    }
182
183    /// Find the node (peer or self) that owns the tailnet IP `ip`.
184    fn node_by_ip(&self, ip: IpAddr) -> Option<Node> {
185        if let Some(node) = self
186            .peers
187            .as_ref()
188            .and_then(|p| p.get(&ip).map(|(_, n)| n.clone()))
189        {
190            return Some(node);
191        }
192
193        self.self_node
194            .as_ref()
195            .filter(|n| {
196                IpAddr::from(n.tailnet_address.ipv4.addr()) == ip
197                    || IpAddr::from(n.tailnet_address.ipv6.addr()) == ip
198            })
199            .cloned()
200    }
201
202    /// Decide how to resolve a non-overlay `name` against the split-DNS routes and recursive
203    /// resolvers, returning the upstreams to forward to.
204    ///
205    /// Longest-suffix wins among [`DnsConfig::routes`]: a route's suffix matches `name` if `name`
206    /// equals it or ends with `.suffix`. A matched route with a non-empty upstream list forwards
207    /// there; a matched route with an **empty** list is a negative route ([`Upstreams::Block`] =>
208    /// NXDOMAIN). With no route match, [`DnsConfig::fallback_resolvers`] (preferred) or
209    /// [`DnsConfig::resolvers`] resolve recursively; if neither is configured we stay fail-closed
210    /// ([`Upstreams::None`] => NXDOMAIN).
211    fn route_for(&self, name: &str) -> Upstreams<'_> {
212        let mut best: Option<(&str, &Vec<DnsResolver>)> = None;
213        for (suffix, upstreams) in &self.cfg.routes {
214            if suffix_matches(name, suffix) && best.is_none_or(|(b, _)| suffix.len() > b.len()) {
215                best = Some((suffix.as_str(), upstreams));
216            }
217        }
218
219        if let Some((_, upstreams)) = best {
220            return if upstreams.is_empty() {
221                Upstreams::Block
222            } else {
223                // A deliberately-configured split-DNS route: not eligible for exit-node DoH
224                // delegation — these upstreams (e.g. an internal resolver reachable over a subnet
225                // route) must keep receiving the query directly.
226                Upstreams::Route(upstreams)
227            };
228        }
229
230        if !self.cfg.fallback_resolvers.is_empty() {
231            return Upstreams::Recursive(&self.cfg.fallback_resolvers);
232        }
233        if !self.cfg.resolvers.is_empty() {
234            return Upstreams::Recursive(&self.cfg.resolvers);
235        }
236        Upstreams::None
237    }
238}
239
240/// The upstreams a non-overlay query should be forwarded to (or why it should not be forwarded).
241enum Upstreams<'a> {
242    /// A split-DNS route matched: forward to these route-specific upstreams (never DoH-delegated).
243    Route(&'a [DnsResolver]),
244    /// No route matched: forward to these recursive (fallback/global) resolvers. Eligible for
245    /// exit-node DoH delegation in the client serve loop.
246    Recursive(&'a [DnsResolver]),
247    /// A negative split-DNS route matched: do not resolve (NXDOMAIN).
248    Block,
249    /// No route and no resolver configured: fail closed (NXDOMAIN).
250    None,
251}
252
253/// What the (sync) decision step concluded for a query: either a complete response to send back,
254/// or a request to forward the original query to an upstream resolver.
255pub(crate) enum Decision {
256    /// A fully-formed response is ready to send.
257    Reply(Vec<u8>),
258    /// Forward the original query datagram to one of these upstream UDP resolvers; on success
259    /// relay the upstream answer, on failure/timeout answer with the prebuilt `servfail` buffer
260    /// (an off-tailnet name we failed to forward is a soft failure, not a cacheable non-existence —
261    /// Go forwarder.go:1297-1307).
262    Forward {
263        /// UDP upstreams to try, in order.
264        upstreams: Vec<SocketAddr>,
265        /// The original query bytes to forward verbatim.
266        query: Vec<u8>,
267        /// Fallback SERVFAIL response if every upstream fails or times out.
268        servfail: Vec<u8>,
269        /// Whether this is a *recursive* (catch-all fallback/global resolver) forward, as opposed
270        /// to a deliberately-configured split-DNS route. Only recursive forwards are eligible for
271        /// exit-node DoH delegation in the client serve loop (see [`DnsView::exit_doh`]); split-DNS
272        /// routes always stay on their configured upstreams (typically subnet-reachable internal
273        /// resolvers). The peerAPI DoH *server* ignores this flag entirely.
274        recursive: bool,
275    },
276}
277
278/// Whether `name` is `suffix` or sits under it at a label boundary: `"a.corp"` matches `"corp"`,
279/// `"acorp"` does not. An **empty** suffix never matches (defense-in-depth: an empty suffix would
280/// otherwise make `ends_with("")` match every name and either over-route or treat everything as a
281/// tailnet name — both leak-prone).
282fn suffix_matches(name: &str, suffix: &str) -> bool {
283    if suffix.is_empty() {
284        return false;
285    }
286    name == suffix
287        || (name.len() > suffix.len()
288            && name.ends_with(suffix)
289            && name.as_bytes()[name.len() - suffix.len() - 1] == b'.')
290}
291
292/// Returns `true` if `name` falls under one of the tailnet search domains. Such names are
293/// authoritative MagicDNS names and are NEVER forwarded to an upstream resolver — anti-leak: a
294/// tailnet name (and the fact that it was queried) must not escape to a third-party resolver.
295fn is_tailnet_name(view: &DnsView, name: &str) -> bool {
296    view.cfg
297        .search_domains
298        .iter()
299        .any(|suffix| suffix_matches(name, suffix))
300}
301
302/// Whether `name` is an IPv6 reverse-DNS (`PTR`) name (ends in `ip6.arpa`). This fork is IPv4-only
303/// on the tailnet; an IPv6 reverse lookup must NEVER be forwarded to a third-party resolver
304/// (anti-leak: it would reveal that a tailnet v6 address — e.g. a ULA `fd7a:…` — was probed). All
305/// such queries fail closed to NXDOMAIN.
306fn is_ip6_arpa(name: &str) -> bool {
307    suffix_matches(name, "ip6.arpa")
308}
309
310/// Whether `ip` is in the Tailscale CGNAT range `100.64.0.0/10` (RFC 6598, the tailnet IPv4 space).
311/// Reverse (`PTR`) queries for these addresses are authoritative to MagicDNS: if no peer owns the
312/// IP we fail closed to NXDOMAIN rather than forwarding the probe to a third-party resolver.
313fn is_tailnet_cgnat(ip: Ipv4Addr) -> bool {
314    let o = ip.octets();
315    o[0] == 100 && (64..=127).contains(&o[1])
316}
317
318/// Decide what to do with a single DNS query against `view`: either a complete response is ready
319/// ([`Decision::Reply`]), the query should be forwarded to upstream resolvers
320/// ([`Decision::Forward`]), or the packet should be dropped without answering (`None`).
321///
322/// Pure (no I/O), factored out of the socket loop so it can be unit-tested without a netstack. It
323/// never panics and fails closed: an unknown, unroutable, or tailnet-suffix name resolves to
324/// NXDOMAIN rather than leaking to an upstream resolver.
325pub(crate) fn decide(view: &DnsView, buf: &[u8]) -> Option<Decision> {
326    // Malformed / non-query input is dropped: we never answer something we can't parse.
327    let query = decode_query(buf).ok()?;
328    let q = &query.question;
329    let id = query.id;
330    // Echo the query's RD bit (and set RA when set) on the response — Go derives the response header
331    // from the query header.
332    let rd = query.recursion_desired;
333
334    let reply =
335        |rcode, answers: &[RData]| Decision::Reply(encode_response(id, q, rd, rcode, answers));
336
337    // Fail closed: MagicDNS off, or the node doesn't accept the tailnet's DNS config
338    // (`--accept-dns` / `CorpDNS` is false) => serve nothing. The `accept_dns` gate mirrors Go
339    // applying an empty `dns.Config` when `CorpDNS` is off: the node ignores the control-pushed DNS
340    // config and refuses every query. This one read site covers the netstack responder, the peerAPI
341    // DoH server that shares the view, and (via `tun_actor::plan_intercept`) the TUN query path.
342    if !view.cfg.magic_dns || !view.accept_dns {
343        return Some(reply(Rcode::Refused, &[]));
344    }
345
346    let canon = q.name.to_canon();
347
348    // We only serve the internet (IN) class authoritatively. A non-IN class (CHAOS, HESIOD, the
349    // ANY/255 class, ...) is NOT refused outright: Go's local resolver does no class check and
350    // forwards such a query like any other name. Treat it as an unsupported authoritative type —
351    // NODATA for a tailnet name, forward for an off-tailnet name — so a `CH TXT version.bind`
352    // diagnostic or a `qclass=ANY` probe reaches upstream instead of getting REFUSED.
353    const CLASS_IN: u16 = 1;
354    if q.qclass != CLASS_IN {
355        return Some(forward_or_nodata(view, &canon, buf, id, q, rd));
356    }
357
358    Some(match &q.qtype {
359        QType::A => match view.resolve_addr(&canon, true) {
360            Some(IpAddr::V4(v4)) => reply(Rcode::NoError, &[RData::A(v4.octets())]),
361            // No overlay/extra-record answer: try split-DNS / recursive upstreams.
362            _ => forward_or_nxdomain(view, &canon, buf, id, q, rd),
363        },
364        QType::Aaaa => match view.resolve_addr(&canon, false) {
365            // A tailnet/overlay/self (or extra-record) AAAA match. Gate on IPv6: with IPv6 OFF
366            // (default) the client is IPv4-only, so answering with the overlay v6 address would
367            // only hand out an unroutable address — dead connections plus a fingerprint. Return
368            // NoError with an empty answer (NODATA) instead. With the gate ON, answer from overlay
369            // data as historically. We never forward this name to a recursive upstream either way:
370            // a positive overlay match is authoritative.
371            Some(IpAddr::V6(v6)) if view.enable_ipv6 => {
372                reply(Rcode::NoError, &[RData::Aaaa(v6.octets())])
373            }
374            Some(IpAddr::V6(_)) => reply(Rcode::NoError, &[]),
375            // No overlay/extra-record answer: split-DNS / recursive upstreams (off-tailnet names);
376            // tailnet names fail closed to NXDOMAIN inside `forward_or_nxdomain`.
377            _ => forward_or_nxdomain(view, &canon, buf, id, q, rd),
378        },
379        QType::Ptr => match q.name.ptr_to_ipv4() {
380            Some(octets) => {
381                let v4: Ipv4Addr = octets.into();
382                let ip = IpAddr::V4(v4);
383                match view.node_by_ip(ip) {
384                    Some(node) => {
385                        let fqdn = node.fqdn(false);
386                        let labels: Vec<String> = fqdn.split('.').map(str::to_owned).collect();
387                        reply(Rcode::NoError, &[RData::Ptr(Name(labels))])
388                    }
389                    // Anti-leak: a reverse query for an IP in the tailnet CGNAT range
390                    // (100.64.0.0/10) that misses the peer set is authoritative-but-unknown; fail
391                    // closed to NXDOMAIN rather than leaking the probed tailnet IP upstream. Only
392                    // genuinely off-tailnet reverse queries are forwarded.
393                    None if is_tailnet_cgnat(v4) => reply(Rcode::NxDomain, &[]),
394                    None => forward_or_nxdomain(view, &canon, buf, id, q, rd),
395                }
396            }
397            // Anti-leak / IPv4-only-tailnet: an IPv6 reverse (`ip6.arpa`) PTR must never be
398            // forwarded — relaying it would reveal that a tailnet v6 address (e.g. a ULA `fd7a:…`)
399            // was probed. Fail closed to NXDOMAIN, exactly like the IPv4 CGNAT guard above.
400            None if is_ip6_arpa(&canon) => reply(Rcode::NxDomain, &[]),
401            None => forward_or_nxdomain(view, &canon, buf, id, q, rd),
402        },
403        // Anything else (TXT, SRV, MX, HTTPS/SVCB, CNAME, ...): we hold no authoritative record of
404        // that type, so — like Go's resolver — forward it to upstream for an off-tailnet name and
405        // return NODATA (empty NOERROR) for a tailnet-authoritative name. NOT REFUSED: a stub reads
406        // REFUSED as "this server won't serve me" and abandons the resolver, which would break
407        // ordinary client lookups (notably HTTPS/SVCB type 65, issued routinely by browsers for
408        // HTTP/3 + ECH) for the same off-tailnet names whose A/AAAA already forward.
409        QType::Other(_) => forward_or_nodata(view, &canon, buf, id, q, rd),
410    })
411}
412
413/// For a name with no overlay answer, consult the split-DNS routes + recursive resolvers and
414/// either forward (to UDP upstreams), answer authoritatively absent (NXDOMAIN), or fail soft
415/// (SERVFAIL) when an off-tailnet name simply can't be forwarded.
416///
417/// Rcode parity with Go's resolver (`net/dns/resolver/tsdns.go` resolution order + `forwarder.go`):
418/// - A **tailnet-authoritative** name (search-domain suffix) or a **negative split-DNS route**
419///   (`Upstreams::Block` — a route configured with no resolvers, which Go answers authoritatively
420///   from Hosts, so an unmatched name under it is authoritatively absent) → **NXDOMAIN**.
421/// - An **off-tailnet** name we cannot forward — no route and no resolver configured
422///   (`Upstreams::None`), or a route whose resolvers are all filtered out (IPv6-only under the
423///   IPv4-only egress) → **SERVFAIL**, matching Go forwarder.go:1207 ("no upstream resolvers set,
424///   returning SERVFAIL"). A cacheable NXDOMAIN on a transient/structural inability to forward would
425///   make a downstream stub cache the *non-existence* of a real name; SERVFAIL is a soft failure the
426///   stub retries.
427///
428/// Anti-leak: a tailnet-suffix name is authoritative and is never forwarded — neither the name nor
429/// the query leaks to a third-party resolver. (The CGNAT `in-addr.arpa` / `ip6.arpa` reverse-zone
430/// NXDOMAIN guards live in the PTR arm of [`decide`] and are likewise unaffected.)
431fn forward_or_nxdomain(
432    view: &DnsView,
433    canon: &str,
434    buf: &[u8],
435    id: u16,
436    q: &ts_dns_wire::Question,
437    rd: bool,
438) -> Decision {
439    // NXDOMAIN for authoritative-absent names; SERVFAIL for an off-tailnet name we can't forward.
440    let nxdomain = encode_response(id, q, rd, Rcode::NxDomain, &[]);
441    let servfail = encode_response(id, q, rd, Rcode::ServFail, &[]);
442
443    if is_tailnet_name(view, canon) {
444        return Decision::Reply(nxdomain);
445    }
446
447    let (resolvers, recursive) = match view.route_for(canon) {
448        Upstreams::Route(resolvers) => (resolvers, false),
449        Upstreams::Recursive(resolvers) => (resolvers, true),
450        // A negative split-DNS route is authoritative-absent (Go answers it from Hosts): NXDOMAIN.
451        Upstreams::Block => return Decision::Reply(nxdomain),
452        // No route and no resolver: an off-tailnet name we have nowhere to forward — SERVFAIL, not
453        // a cacheable non-existence (Go forwarder.go:1207).
454        Upstreams::None => return Decision::Reply(servfail),
455    };
456
457    let upstreams: Vec<SocketAddr> = resolvers
458        .iter()
459        .map(DnsResolver::udp_addr)
460        // Anti-leak / IPv6-off: only forward over IPv4 upstreams; never open a v6 socket.
461        .filter(SocketAddr::is_ipv4)
462        .collect();
463    if upstreams.is_empty() {
464        // We had a route but every resolver was filtered out (IPv6-only): we cannot forward this
465        // off-tailnet name, so soft-fail rather than assert non-existence.
466        Decision::Reply(servfail)
467    } else {
468        Decision::Forward {
469            upstreams,
470            query: buf.to_vec(),
471            // All upstreams failing at runtime is also an inability to forward, not a non-existence
472            // (Go forwarder.go:1297-1307): hand the forwarder a SERVFAIL fallback, not NXDOMAIN.
473            servfail,
474            recursive,
475        }
476    }
477}
478
479/// The DNS query types Go's resolver explicitly leaves unimplemented for a tailnet-authoritative
480/// name, answering `RCodeNotImplemented` (NOTIMP) rather than NODATA (`net/dns/resolver/tsdns.go`
481/// `resolveLocal`: `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO`). The numeric type
482/// codes: NS=2, SOA=6, HINFO=13, AXFR=252.
483fn is_unimplemented_tailnet_qtype(qtype: &ts_dns_wire::QType) -> bool {
484    matches!(qtype, ts_dns_wire::QType::Other(2 | 6 | 13 | 252))
485}
486
487/// For a query whose *qtype/qclass* we don't serve authoritatively (anything other than an IN-class
488/// A/AAAA/PTR — e.g. TXT, SRV, MX, HTTPS/SVCB, or a CHAOS-class query): forward it to upstream like
489/// any other name, but for a tailnet-authoritative name return an empty NOERROR (NODATA) instead of
490/// NXDOMAIN — except the NS/SOA/HINFO/AXFR types Go answers NOTIMP for
491/// ([`is_unimplemented_tailnet_qtype`]).
492///
493/// This mirrors Go's resolver: an authoritative name with no record of the requested type returns
494/// `RCodeSuccess` with no answers ("the name exists, but no records of that type"), NOT NXDOMAIN and
495/// NOT REFUSED; a non-authoritative name is forwarded verbatim regardless of qtype. The fork
496/// previously REFUSED every non-A/AAAA/PTR qtype (and every non-IN class) for *all* names, which a
497/// stub resolver reads as "this server won't serve me" — so it would abandon the resolver, breaking
498/// ordinary client lookups (HTTPS/SVCB type 65 issued routinely by browsers for HTTP/3 + ECH, plus
499/// MX/TXT/SRV) for off-tailnet names that A/AAAA queries already forward. Refusing these was never an
500/// anti-leak measure (the same name's A/AAAA already egresses); it was just broken interop.
501///
502/// Anti-leak is preserved: a tailnet-suffix name still never leaves this node (NODATA, not forward),
503/// exactly as the A/AAAA path keeps a positive overlay match authoritative.
504fn forward_or_nodata(
505    view: &DnsView,
506    canon: &str,
507    buf: &[u8],
508    id: u16,
509    q: &ts_dns_wire::Question,
510    rd: bool,
511) -> Decision {
512    // Authoritative tailnet name. For most unsupported types we answer NODATA (empty NOERROR) — the
513    // name exists, we just hold no record of that type. But a small set of types Go's resolver
514    // *explicitly* leaves unimplemented (`net/dns/resolver/tsdns.go` `resolveLocal`:
515    // `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO: return RCodeNotImplemented`) must
516    // answer NOTIMP, not NODATA — a `dig NS`/`SOA`/`HINFO` against the tailnet zone is otherwise a
517    // clean fingerprint distinguishing this fork from real tailscaled. Off-tailnet names are
518    // unaffected (they forward below regardless of type); this NOTIMP applies only to a name we are
519    // authoritative for.
520    if is_tailnet_name(view, canon) {
521        let rcode = if is_unimplemented_tailnet_qtype(&q.qtype) {
522            Rcode::NotImpl
523        } else {
524            Rcode::NoError
525        };
526        return Decision::Reply(encode_response(id, q, rd, rcode, &[]));
527    }
528    // Anti-leak parity with the `QType::Ptr` arm: a reverse query for a tailnet CGNAT IPv4
529    // (100.64.0.0/10) or ANY `ip6.arpa` name must NEVER egress to an upstream resolver, regardless
530    // of qtype/class — forwarding it would reveal that a specific tailnet IP was probed. The PTR arm
531    // enforces this (NXDOMAIN) but its guards live only inside that arm; without re-checking here, an
532    // exotic-qtype (TXT/ANY/…) or non-IN-class query for a tailnet reverse name would slip through to
533    // the forward path below. Fail closed to NXDOMAIN, matching the PTR arm's disposition.
534    if is_ip6_arpa(canon) {
535        return Decision::Reply(encode_response(id, q, rd, Rcode::NxDomain, &[]));
536    }
537    if let Some(octets) = q.name.ptr_to_ipv4()
538        && is_tailnet_cgnat(octets.into())
539    {
540        return Decision::Reply(encode_response(id, q, rd, Rcode::NxDomain, &[]));
541    }
542    // Off-tailnet, non-reverse-zone: forward verbatim. `forward_or_nxdomain` already forwards
543    // non-tailnet names and soft-fails (SERVFAIL) when no upstream is configured/routable; reuse it
544    // (the tailnet branch above is already handled, so its tailnet→NXDOMAIN and negative-route paths
545    // are unreachable here — this only exercises its off-tailnet forward / SERVFAIL dispositions).
546    forward_or_nxdomain(view, canon, buf, id, q, rd)
547}
548
549/// Client-side plan for a *recursive* forward: keep resolving over local UDP upstreams, or delegate
550/// the query to the active exit node's peerAPI DoH endpoint over the overlay.
551#[derive(Debug, PartialEq, Eq)]
552pub(crate) enum RecursivePlan {
553    /// Forward over UDP to these upstreams. Used when no exit node is active, or when the config
554    /// has `use_with_exit_node` resolvers (kept local even with an exit node selected).
555    Udp(Vec<SocketAddr>),
556    /// Delegate the query to the exit node's peerAPI DoH server at this overlay address.
557    Doh(SocketAddr),
558}
559
560/// Decide whether a recursive forward should stay on local UDP upstreams or be delegated to the
561/// active exit node's DoH endpoint. Pure (no I/O) so the delegation rule is unit-testable.
562///
563/// - No active exit node ([`DnsView::exit_doh`] is `None`) => keep `default_upstreams` (UDP).
564/// - Exit node active, but the config has [`use_with_exit_node`][ts_control::DnsResolver::use_with_exit_node]
565///   resolvers => those resolvers stay local (Go keeps `UseWithExitNode` resolvers when an exit node
566///   is selected); forward to them over UDP, do NOT delegate.
567/// - Exit node active, no kept-local resolvers => delegate to the exit node's DoH. Recursive DNS
568///   then egresses from the exit node, not this host (the whole point of routing through an exit
569///   node: this node's real IP is never used to resolve the peer's public names).
570pub(crate) fn recursive_plan(view: &DnsView, default_upstreams: Vec<SocketAddr>) -> RecursivePlan {
571    let Some(doh) = view.exit_doh else {
572        return RecursivePlan::Udp(default_upstreams);
573    };
574    let kept: Vec<SocketAddr> = view
575        .cfg
576        .resolvers_with_exit_node()
577        .map(DnsResolver::udp_addr)
578        // Anti-leak / IPv6-off: only ever resolve over IPv4 upstreams; never open a v6 socket.
579        .filter(SocketAddr::is_ipv4)
580        .collect();
581    if kept.is_empty() {
582        RecursivePlan::Doh(doh)
583    } else {
584        RecursivePlan::Udp(kept)
585    }
586}
587
588/// Cap a forwarded upstream response to a single UDP datagram ([`MAX_UPSTREAM_RESPONSE`]). When the
589/// response is too large it is truncated mid-message, so we set the `TC` (truncation) flag in the
590/// DNS header (byte 2, bit `0x02`) telling the stub resolver to retry over TCP — relaying a chopped
591/// answer without `TC` would surface a malformed-but-"complete" message. The flag is only set when
592/// truncation actually occurs.
593fn cap_response(mut resp: Vec<u8>) -> Vec<u8> {
594    if resp.len() > MAX_UPSTREAM_RESPONSE {
595        resp.truncate(MAX_UPSTREAM_RESPONSE);
596        // The header is 12 bytes; the TC bit lives in the second flags byte (header byte 2). A
597        // capped datagram is always >= the header length, but guard anyway to never panic.
598        if let Some(flags_hi) = resp.get_mut(2) {
599            *flags_hi |= 0x02;
600        }
601    }
602    resp
603}
604
605/// The byte length of a fixed DNS header.
606const DNS_HEADER_LEN: usize = 12;
607
608/// Return the byte range of the first question section (QNAME + QTYPE + QCLASS) within `msg`,
609/// starting just after the 12-byte header. Returns [`None`] if the name is malformed, uses a
610/// compression pointer (illegal in a question), or runs past the buffer. Used to byte-compare a
611/// forwarded query's question against the upstream response's question.
612fn question_range(msg: &[u8]) -> Option<std::ops::Range<usize>> {
613    let mut off = DNS_HEADER_LEN;
614    // Walk the QNAME label sequence to the terminating root label (0x00).
615    loop {
616        let len = *msg.get(off)? as usize;
617        // A compression pointer (top two bits set) is not valid in a question section.
618        if len & 0xC0 != 0 {
619            return None;
620        }
621        off += 1;
622        if len == 0 {
623            break; // root label: QNAME complete.
624        }
625        off = off.checked_add(len)?;
626        if off > msg.len() {
627            return None;
628        }
629    }
630    // QTYPE (2) + QCLASS (2) follow the name.
631    let end = off.checked_add(4)?;
632    if end > msg.len() {
633        return None;
634    }
635    Some(DNS_HEADER_LEN..end)
636}
637
638/// Whether `resp` is a plausible DNS response to `query`: same 16-bit transaction id, the QR
639/// (response) bit set, and a byte-identical question section (QNAME + QTYPE + QCLASS). Both buffers
640/// carry the DNS header in the first 12 bytes (id at [0..2], flags at [2..4], QR is the high bit of
641/// byte 2). Used to reject off-path/forged datagrams before relaying them back to the stub resolver
642/// as authoritative: matching only the id + QR lets an injector that guesses the id swap in an
643/// answer for a different question, so we also require the echoed question to match.
644fn response_matches_query(query: &[u8], resp: &[u8]) -> bool {
645    if query.len() < DNS_HEADER_LEN || resp.len() < DNS_HEADER_LEN {
646        return false;
647    }
648    let id_matches = query[0..2] == resp[0..2];
649    let is_response = resp[2] & 0x80 != 0;
650    if !id_matches || !is_response {
651        return false;
652    }
653    // The response must echo the exact question we asked. Parse both question sections and compare
654    // their bytes; a parse failure on either side is treated as a non-match (fail closed).
655    match (question_range(query), question_range(resp)) {
656        (Some(q), Some(r)) => query[q] == resp[r],
657        _ => false,
658    }
659}
660
661/// Forward `query` to each upstream in order over the **overlay** netstack, returning the first
662/// well-formed response, or the prebuilt `fallback` buffer if every upstream times out or errors.
663///
664/// The caller supplies `fallback` (a SERVFAIL response for a forwarded off-tailnet name — an
665/// all-upstream failure is a soft "couldn't resolve", not a cacheable non-existence, matching Go
666/// forwarder.go:1297-1307). Keeping it caller-supplied means this fn is rcode-agnostic.
667///
668/// Anti-leak: forwarding goes through the overlay netstack `channel` (a fresh `0.0.0.0:0` overlay
669/// UDP socket per query), NEVER a host socket — so the real origin IP can't leak to the resolver,
670/// and split-DNS upstreams reachable only over the tailnet/subnet-router work. Each upstream is
671/// bounded by [`UPSTREAM_TIMEOUT`]; responses are capped at [`MAX_UPSTREAM_RESPONSE`].
672pub(crate) async fn forward_query(
673    channel: &Channel,
674    upstreams: &[SocketAddr],
675    query: &[u8],
676    fallback: Vec<u8>,
677) -> Vec<u8> {
678    for upstream in upstreams {
679        let socket = match channel
680            .udp_bind(SocketAddr::from((Ipv4Addr::UNSPECIFIED, 0)))
681            .await
682        {
683            Ok(s) => s,
684            Err(e) => {
685                tracing::warn!(error = %e, %upstream, "magic dns upstream bind failed");
686                continue;
687            }
688        };
689
690        if let Err(e) = socket.send_to(*upstream, query).await {
691            tracing::warn!(error = %e, %upstream, "magic dns upstream send failed");
692            continue;
693        }
694
695        match timeout(UPSTREAM_TIMEOUT, socket.recv_from_bytes()).await {
696            Ok(Ok((from, resp))) if !resp.is_empty() => {
697                // Anti-poisoning: only accept a datagram that came from the upstream we queried
698                // and whose DNS header matches this query (same transaction id, QR=response bit
699                // set). An off-path injector racing the real answer is otherwise relayed straight
700                // back to the stub resolver as authoritative.
701                if from.ip() != upstream.ip() || !response_matches_query(query, &resp) {
702                    tracing::debug!(%upstream, %from, "magic dns dropping unsolicited/mismatched response");
703                    continue;
704                }
705                return cap_response(resp.to_vec());
706            }
707            Ok(Ok(_)) => continue,
708            Ok(Err(e)) => {
709                tracing::warn!(error = %e, %upstream, "magic dns upstream recv failed");
710                continue;
711            }
712            Err(_) => {
713                tracing::debug!(%upstream, "magic dns upstream timed out");
714                continue;
715            }
716        }
717    }
718    fallback
719}
720
721/// Run the receive/answer loop for the bound socket until it (or the netstack) goes away.
722///
723/// Authoritative answers are sent inline. Forwarded queries are handled on spawned tasks (each
724/// cloning the overlay `channel`) so a slow upstream never blocks other queries.
725async fn serve(
726    socket: netstack::netsock::UdpSocket,
727    rx: watch::Receiver<Arc<DnsView>>,
728    channel: Channel,
729) {
730    let socket = Arc::new(socket);
731    let mut forwards = JoinSet::new();
732    // Bounds concurrent in-flight forwards (see `MAX_INFLIGHT_FORWARDS`); a permit is held for the
733    // lifetime of each spawned forward task and released on completion.
734    let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
735    loop {
736        let (src, buf) = match socket.recv_from_bytes().await {
737            Ok(pkt) => pkt,
738            Err(e) => {
739                tracing::warn!(error = %e, "magic dns socket recv failed, stopping responder");
740                return;
741            }
742        };
743
744        // Read the freshest view per packet.
745        let view = rx.borrow().clone();
746
747        match decide(&view, &buf) {
748            // Malformed query: drop silently.
749            None => continue,
750            Some(Decision::Reply(resp)) => {
751                if let Err(e) = socket.send_to(src, &resp).await {
752                    tracing::warn!(error = %e, %src, "magic dns response send failed");
753                }
754            }
755            Some(Decision::Forward {
756                upstreams,
757                query,
758                servfail,
759                recursive,
760            }) => {
761                // A recursive forward is eligible for exit-node DoH delegation; a split-DNS route
762                // always stays on its configured upstreams. Decide the plan against the current
763                // view so a query routed while an exit node is active egresses from that exit node.
764                let plan = if recursive {
765                    recursive_plan(&view, upstreams)
766                } else {
767                    RecursivePlan::Udp(upstreams)
768                };
769                // Fail closed at the in-flight cap: drop the query (the stub resolver retries or
770                // times out) rather than spawn an unbounded task that pins an overlay socket for up
771                // to UPSTREAM_TIMEOUT. The permit is moved into the task as a named `_permit` binding
772                // (NOT `let _ =`, which would drop it immediately) so it is released only when the
773                // task body completes.
774                let Ok(permit) = inflight.clone().try_acquire_owned() else {
775                    tracing::warn!(
776                        %src,
777                        max = MAX_INFLIGHT_FORWARDS,
778                        "magic dns drop: at max in-flight forwarded queries"
779                    );
780                    continue;
781                };
782                let socket = socket.clone();
783                let channel = channel.clone();
784                forwards.spawn(async move {
785                    let _permit = permit;
786                    let resp = match plan {
787                        RecursivePlan::Udp(upstreams) => {
788                            forward_query(&channel, &upstreams, &query, servfail).await
789                        }
790                        RecursivePlan::Doh(doh_addr) => {
791                            crate::peerapi_doh::forward_doh(&channel, doh_addr, &query, servfail)
792                                .await
793                        }
794                    };
795                    if let Err(e) = socket.send_to(src, &resp).await {
796                        tracing::warn!(error = %e, %src, "magic dns forwarded response send failed");
797                    }
798                });
799            }
800        }
801
802        // Reap finished forward tasks without blocking. The unreaped completed-handle backlog is
803        // bounded by MAX_INFLIGHT_FORWARDS (a task spawns only after acquiring a permit, and there
804        // are at most that many), so this bounds JoinSet memory too — not just the reap cadence.
805        while forwards.try_join_next().is_some() {}
806    }
807}
808
809/// The MagicDNS responder actor.
810///
811/// Subscribes to control state (for the DNS config + self node) and peer state (for the peer
812/// database), keeping a [`DnsView`] that the spawned answer loop reads for every query.
813pub struct MagicDnsActor {
814    /// Keeps the socket-serving task alive for the lifetime of the actor.
815    _joinset: JoinSet<()>,
816    /// The latest view, shared with the answer loop.
817    view_tx: watch::Sender<Arc<DnsView>>,
818    /// The runtime [`Env`], retained so each view rebuild (the `StateUpdate` / `PeerState` handlers)
819    /// can re-read the live [`Env::accept_dns`] cell. Unlike `enable_ipv6` (snapshotted once at
820    /// spawn), `accept_dns` is runtime-settable via `Device::set_accept_dns`, so it must be read at
821    /// rebuild time — not captured once — for a toggle to reach the served view.
822    env: Env,
823    /// The overlay channel, retained so the [`Query`] handler can run a query through the same
824    /// forward path the serve loop uses ([`forward_query`] / [`forward_doh`], both binding
825    /// `0.0.0.0:0` on this channel — never a host socket).
826    channel: Channel,
827}
828
829/// A programmatic DNS query routed through the live MagicDNS responder (the `100.100.100.100` path),
830/// for [`Device::query_dns`](crate::Device::query_dns). The handler synthesizes a query packet and
831/// drives it through the exact same [`decide`]/forward logic as an on-the-wire query, so the result
832/// (and its anti-leak posture) matches what a tailnet client would observe.
833pub struct Query {
834    /// The canonical name to resolve (e.g. `example.com`, no trailing dot).
835    pub name: String,
836    /// The DNS query type (`1`=A, `28`=AAAA, `12`=PTR, or any other RFC 1035 TYPE).
837    pub qtype: u16,
838}
839
840/// The outcome of a `Query`: the raw DNS response bytes, the RCODE, and which upstream resolvers
841/// (if any) were consulted. The response is returned as raw bytes (matching Go `LocalClient.QueryDNS`)
842/// rather than parsed records — this fork's wire codec has no answer-record decoder.
843///
844/// (`Query` is the crate-internal actor message; not linked here as it is a private item — a
845/// `pub` doc cannot intra-doc-link to it without erroring under the doc-lint gate.)
846#[derive(Debug, Clone, kameo::Reply)]
847pub struct DnsQueryResult {
848    /// The raw DNS response datagram (header + question + any answer records).
849    pub response: Vec<u8>,
850    /// The RCODE from the response header's low 4 bits (`0`=NoError, `2`=SERVFAIL, `3`=NXDOMAIN,
851    /// `5`=Refused, …).
852    pub rcode: u8,
853    /// The upstream resolver(s) the query was forwarded to. For a UDP forward this is the candidate
854    /// list tried in order (the forwarder returns on the first that answers); for an exit-node DoH
855    /// forward it is the single DoH endpoint. Empty for a locally-answered query (an authoritative
856    /// tailnet name, a NODATA, or a fail-closed NXDOMAIN — nothing egressed).
857    pub resolvers_consulted: Vec<SocketAddr>,
858}
859
860impl kameo::Actor for MagicDnsActor {
861    type Args = (Env, Channel);
862    type Error = Error;
863
864    async fn on_start(
865        (env, channel): Self::Args,
866        slf: ActorRef<Self>,
867    ) -> Result<Self, Self::Error> {
868        env.subscribe::<Arc<ts_control::StateUpdate>>(&slf).await?;
869        env.subscribe::<Arc<PeerState>>(&slf).await?;
870        env.subscribe::<crate::route_updater::ActiveExitNode>(&slf)
871            .await?;
872
873        // Seed the view with the runtime's IPv6 gate (default off) and the current accept-dns value.
874        // Subsequent control/peer updates clone-and-modify this view: `enable_ipv6` (set once here)
875        // is preserved, while `accept_dns` is re-read live from `Env` on every rebuild (it is
876        // runtime-settable). The seed value is moot — no query is served before the first
877        // StateUpdate — but seeding it keeps the pre-update view internally consistent.
878        let (view_tx, view_rx) = watch::channel(Arc::new(DnsView {
879            enable_ipv6: env.enable_ipv6,
880            accept_dns: env.accept_dns(),
881            ..DnsView::default()
882        }));
883
884        let mut joinset = JoinSet::new();
885
886        // Bind the MagicDNS socket. If the bind fails we still start (fail closed: the actor just
887        // never answers anything) so a transient bind error doesn't take down the runtime.
888        let addr = SocketAddr::from((MAGIC_DNS_IP, MAGIC_DNS_PORT));
889        match channel.udp_bind(addr).await {
890            Ok(socket) => {
891                tracing::debug!(%addr, "magic dns responder bound");
892                joinset.spawn(serve(socket, view_rx.clone(), channel.clone()));
893            }
894            Err(e) => {
895                tracing::error!(error = %e, %addr, "magic dns udp bind failed; responder inert");
896            }
897        }
898
899        // When this node advertises a peerAPI port, run the single peerAPI server on the same shared
900        // view. It routes `/dns-query` to the exit-node DoH handler (recursive resolution gated by
901        // `forward_exit_egress`, see `peerapi_doh`) and `/v0/put/<name>` to the Taildrop receive
902        // handler when a store is configured (access-gated, fail-closed, see `peerapi`).
903        if let Some(port) = env.peerapi_port {
904            let channel = channel.clone();
905            let view_rx = view_rx.clone();
906            let forward_exit_egress = env.forward_exit_egress;
907            let taildrop = env.taildrop_store.clone();
908            let funnel_ingress = env.funnel_ingress.clone();
909            joinset.spawn(crate::peerapi::serve(
910                channel,
911                port,
912                view_rx,
913                forward_exit_egress,
914                taildrop,
915                funnel_ingress,
916            ));
917        }
918
919        Ok(Self {
920            _joinset: joinset,
921            view_tx,
922            env,
923            channel,
924        })
925    }
926}
927
928/// A bare SERVFAIL response header for a [`Query`] whose name could not be encoded into a
929/// well-formed query (a non-ASCII label or an over-255-byte name). A 12-byte header with QR=1 (this
930/// is a response) and RCODE=2 (server failure); no question or answer section (we never produced a
931/// parseable question). Lets `query_dns` return a definite, honest RCODE instead of an empty buffer
932/// that would read back as a fabricated NoError.
933fn servfail_response() -> Vec<u8> {
934    let mut resp = vec![0u8; 12];
935    // Flags: QR=1 (byte 2, 0x80) + RCODE=2 (low nibble of byte 3). All other bits clear.
936    resp[2] = 0x80;
937    resp[3] = 0x02;
938    resp
939}
940
941impl Message<Query> for MagicDnsActor {
942    type Reply = DnsQueryResult;
943
944    async fn handle(&mut self, query: Query, _ctx: &mut Context<Self, Self::Reply>) -> Self::Reply {
945        // Synthesize a query packet and drive it through the SAME decide/forward path the serve loop
946        // uses, against the freshest view — so the result and its anti-leak posture exactly match an
947        // on-the-wire query. The id is fixed (0): a programmatic query has no concurrent-demux need,
948        // and `response_matches_query` validates the echoed id against this same buffer.
949        //
950        // Normalize the name into labels: strip a single trailing dot (an FQDN's root marker — Go's
951        // `dnsname.ToFQDN` does the same) and drop empty labels. An empty label would otherwise encode
952        // as a lone `0x00`, identical to the QNAME root terminator, truncating the wire query and
953        // corrupting the QTYPE/QCLASS that follow.
954        let trimmed = query.name.strip_suffix('.').unwrap_or(&query.name);
955        let labels: Vec<String> = trimmed
956            .split('.')
957            .filter(|label| !label.is_empty())
958            .map(str::to_owned)
959            .collect();
960        let qtype = match query.qtype {
961            1 => ts_dns_wire::QType::A,
962            28 => ts_dns_wire::QType::Aaaa,
963            12 => ts_dns_wire::QType::Ptr,
964            other => ts_dns_wire::QType::Other(other),
965        };
966        // Class IN (1) — the only class the responder serves authoritatively (a non-IN class still
967        // forwards via `forward_or_nodata`, matching the on-the-wire path).
968        let buf = ts_dns_wire::encode_query(0, &ts_dns_wire::Name(labels), &qtype, 1);
969
970        let view = self.view_tx.borrow().clone();
971
972        let (response, resolvers_consulted) = match decide(&view, &buf) {
973            // `decide` returns `None` only when `decode_query` rejects the buffer we just built. With
974            // the name normalized above that can still happen for a name `encode_query` accepts but
975            // `decode_query` rejects — a non-ASCII/IDN label (the caller must pass punycode) or a name
976            // whose wire form exceeds 255 bytes. Surface a SERVFAIL (RCODE 2: "could not process")
977            // rather than an empty buffer that would read back as a fabricated NoError. The serve loop
978            // silently drops here (the on-wire client times out); a programmatic caller gets a
979            // definite, honest error instead.
980            None => (servfail_response(), Vec::new()),
981            Some(Decision::Reply(resp)) => (resp, Vec::new()),
982            Some(Decision::Forward {
983                upstreams,
984                query,
985                servfail,
986                recursive,
987            }) => {
988                let plan = if recursive {
989                    recursive_plan(&view, upstreams)
990                } else {
991                    RecursivePlan::Udp(upstreams)
992                };
993                match plan {
994                    RecursivePlan::Udp(upstreams) => {
995                        let resp = forward_query(&self.channel, &upstreams, &query, servfail).await;
996                        (resp, upstreams)
997                    }
998                    RecursivePlan::Doh(doh_addr) => {
999                        let resp = crate::peerapi_doh::forward_doh(
1000                            &self.channel,
1001                            doh_addr,
1002                            &query,
1003                            servfail,
1004                        )
1005                        .await;
1006                        // The query egressed via the exit node's DoH endpoint, not a local UDP
1007                        // upstream — report the DoH address as the resolver consulted.
1008                        (resp, vec![doh_addr])
1009                    }
1010                }
1011            }
1012        };
1013
1014        // RCODE is the low 4 bits of the second flags byte (header byte 3).
1015        let rcode = response.get(3).map(|b| b & 0x0F).unwrap_or(0);
1016
1017        DnsQueryResult {
1018            response,
1019            rcode,
1020            resolvers_consulted,
1021        }
1022    }
1023}
1024
1025impl Message<Arc<ts_control::StateUpdate>> for MagicDnsActor {
1026    type Reply = ();
1027
1028    async fn handle(
1029        &mut self,
1030        update: Arc<ts_control::StateUpdate>,
1031        _ctx: &mut Context<Self, Self::Reply>,
1032    ) {
1033        // Re-read the live accept-dns cell on every rebuild (it is runtime-settable via
1034        // `Device::set_accept_dns`); `enable_ipv6` is preserved from the seed (set once at spawn).
1035        let accept_dns = self.env.accept_dns();
1036        self.view_tx.send_modify(|view| {
1037            let mut next = (**view).clone();
1038            next.cfg = update.dns_config.clone().unwrap_or_default();
1039            next.self_node = update.node.clone();
1040            next.accept_dns = accept_dns;
1041            *view = Arc::new(next);
1042        });
1043    }
1044}
1045
1046impl Message<Arc<PeerState>> for MagicDnsActor {
1047    type Reply = ();
1048
1049    async fn handle(&mut self, state: Arc<PeerState>, _ctx: &mut Context<Self, Self::Reply>) {
1050        // Re-read the live accept-dns cell on every rebuild: `Device::set_accept_dns` triggers a
1051        // `RepublishState` that lands here, so this is the path that re-applies the gate after a
1052        // runtime toggle (covers the netstack responder AND the peerAPI DoH server sharing the view).
1053        let accept_dns = self.env.accept_dns();
1054        self.view_tx.send_modify(|view| {
1055            let mut next = (**view).clone();
1056            next.peers = Some(state.peers.clone());
1057            next.accept_dns = accept_dns;
1058            *view = Arc::new(next);
1059        });
1060    }
1061}
1062
1063impl Message<crate::route_updater::ActiveExitNode> for MagicDnsActor {
1064    type Reply = ();
1065
1066    async fn handle(
1067        &mut self,
1068        active: crate::route_updater::ActiveExitNode,
1069        _ctx: &mut Context<Self, Self::Reply>,
1070    ) {
1071        // Cache the active exit node's DoH endpoint so the serve loop delegates recursive queries
1072        // to it. `None` (no exit node, or one that can't proxy DNS) keeps recursion local. Resolving
1073        // the address here — once, from the route updater's authoritative selection — means the
1074        // serve loop never re-resolves the selector.
1075        let exit_doh = active.node.as_ref().and_then(|n| n.peerapi_doh_addr());
1076        self.view_tx.send_modify(|view| {
1077            let mut next = (**view).clone();
1078            next.exit_doh = exit_doh;
1079            *view = Arc::new(next);
1080        });
1081    }
1082}
1083
1084#[cfg(test)]
1085mod tests {
1086    use ts_control::{StableNodeId, TailnetAddress};
1087
1088    use super::*;
1089
1090    /// Test wrapper: run [`decide`] and extract the reply bytes. These tests configure no
1091    /// upstream resolvers, so an unresolved name fails closed to a `Reply` (NXDOMAIN), never a
1092    /// `Forward`; a `Forward` here is a bug and panics.
1093    fn answer(view: &DnsView, buf: &[u8]) -> Option<Vec<u8>> {
1094        match decide(view, buf)? {
1095            Decision::Reply(resp) => Some(resp),
1096            Decision::Forward { .. } => panic!("unexpected forward in authoritative-only test"),
1097        }
1098    }
1099
1100    /// Build a `Node` named `host.user.ts.net` with a known v4/v6 tailnet address.
1101    fn test_node() -> Node {
1102        Node {
1103            id: 1,
1104            stable_id: StableNodeId("n1".to_string()),
1105            hostname: "host".to_string(),
1106            user_id: 0,
1107            tailnet: Some("user.ts.net".to_string()),
1108            tags: vec![],
1109            tailnet_address: TailnetAddress {
1110                ipv4: "100.64.0.1/32".parse().unwrap(),
1111                ipv6: "fd7a::1/128".parse().unwrap(),
1112            },
1113            node_key: [0u8; 32].into(),
1114            node_key_expiry: None,
1115            online: None,
1116            last_seen: None,
1117            key_signature: vec![],
1118            machine_key: None,
1119            disco_key: None,
1120            accepted_routes: vec![],
1121            underlay_addresses: vec![],
1122            derp_region: None,
1123            cap: Default::default(),
1124            cap_map: Default::default(),
1125            peerapi_port: None,
1126            peerapi_dns_proxy: false,
1127            is_wireguard_only: false,
1128            exit_node_dns_resolvers: vec![],
1129            peer_relay: false,
1130            service_vips: Default::default(),
1131        }
1132    }
1133
1134    /// A view with MagicDNS on and a single peer in the db.
1135    fn view_with_peer() -> DnsView {
1136        let mut db = PeerDb::default();
1137        db.upsert(&test_node());
1138
1139        DnsView {
1140            cfg: DnsConfig {
1141                magic_dns: true,
1142                search_domains: vec!["user.ts.net".to_string()],
1143                ..Default::default()
1144            },
1145            peers: Some(Arc::new(db)),
1146            self_node: None,
1147            exit_doh: None,
1148            enable_ipv6: false,
1149            accept_dns: true,
1150        }
1151    }
1152
1153    /// Build a raw DNS query buffer for `labels` with the given id, qtype, qclass.
1154    fn build_query(id: u16, labels: &[&str], qtype: u16, qclass: u16) -> Vec<u8> {
1155        let mut buf: Vec<u8> = Vec::new();
1156        buf.extend_from_slice(&id.to_be_bytes());
1157        buf.extend_from_slice(&0u16.to_be_bytes()); // flags: QR=0 (query)
1158        buf.extend_from_slice(&1u16.to_be_bytes()); // QDCOUNT
1159        buf.extend_from_slice(&0u16.to_be_bytes()); // ANCOUNT
1160        buf.extend_from_slice(&0u16.to_be_bytes()); // NSCOUNT
1161        buf.extend_from_slice(&0u16.to_be_bytes()); // ARCOUNT
1162        for label in labels {
1163            buf.push(label.len() as u8);
1164            buf.extend_from_slice(label.as_bytes());
1165        }
1166        buf.push(0); // root label
1167        buf.extend_from_slice(&qtype.to_be_bytes());
1168        buf.extend_from_slice(&qclass.to_be_bytes());
1169        buf
1170    }
1171
1172    /// Parse a response header: returns `(id, rcode, ancount)`.
1173    fn parse_header(resp: &[u8]) -> (u16, u8, u16) {
1174        let id = u16::from_be_bytes([resp[0], resp[1]]);
1175        let flags = u16::from_be_bytes([resp[2], resp[3]]);
1176        let ancount = u16::from_be_bytes([resp[6], resp[7]]);
1177        (id, (flags & 0x000F) as u8, ancount)
1178    }
1179
1180    #[test]
1181    fn a_query_for_known_peer_answers_v4() {
1182        let view = view_with_peer();
1183        let buf = build_query(0x1234, &["host", "user", "ts", "net"], 1, 1);
1184
1185        let resp = answer(&view, &buf).expect("answers");
1186        let (id, rcode, ancount) = parse_header(&resp);
1187        assert_eq!(id, 0x1234);
1188        assert_eq!(rcode, 0, "NoError");
1189        assert_eq!(ancount, 1);
1190
1191        // The trailing RDATA of the single A record is the peer's tailnet v4 octets.
1192        let tail = &resp[resp.len() - 4..];
1193        assert_eq!(tail, &[100, 64, 0, 1]);
1194    }
1195
1196    #[test]
1197    fn aaaa_query_for_known_peer_is_nodata_when_ipv6_off() {
1198        // Gate OFF (default): an AAAA query for a known overlay peer must return NoError with an
1199        // empty answer (NODATA) — NOT the overlay v6 address, which the IPv4-only client can't
1200        // route. This is the anti-fingerprint / no-dead-connections posture.
1201        let view = view_with_peer();
1202        assert!(!view.enable_ipv6, "default gate is off");
1203        let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1204
1205        let resp = answer(&view, &buf).expect("answers");
1206        let (_, rcode, ancount) = parse_header(&resp);
1207        assert_eq!(rcode, 0, "NoError (NODATA)");
1208        assert_eq!(ancount, 0, "empty answer: no AAAA handed out with IPv6 off");
1209    }
1210
1211    #[test]
1212    fn a_query_still_resolves_when_ipv6_off() {
1213        // Gate OFF must not touch the A (v4) path: the v4 answer is byte-for-byte unchanged.
1214        let view = view_with_peer();
1215        let buf = build_query(0x6, &["host", "user", "ts", "net"], 1, 1);
1216
1217        let resp = answer(&view, &buf).expect("answers");
1218        let (_, rcode, ancount) = parse_header(&resp);
1219        assert_eq!(rcode, 0, "NoError");
1220        assert_eq!(ancount, 1);
1221        let tail = &resp[resp.len() - 4..];
1222        assert_eq!(tail, &[100, 64, 0, 1]);
1223    }
1224
1225    #[test]
1226    fn aaaa_query_for_known_peer_answers_v6_when_ipv6_on() {
1227        // Gate ON: historical behavior — answer AAAA from the overlay v6 address.
1228        let mut view = view_with_peer();
1229        view.enable_ipv6 = true;
1230        let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1231
1232        let resp = answer(&view, &buf).expect("answers");
1233        let (_, rcode, ancount) = parse_header(&resp);
1234        assert_eq!(rcode, 0, "NoError");
1235        assert_eq!(ancount, 1);
1236
1237        let expected = "fd7a::1".parse::<std::net::Ipv6Addr>().unwrap().octets();
1238        let tail = &resp[resp.len() - 16..];
1239        assert_eq!(tail, expected);
1240    }
1241
1242    #[test]
1243    fn aaaa_for_unknown_tailnet_name_is_nxdomain_not_forwarded_with_ipv6_off() {
1244        // Anti-leak, unchanged by the gate: an AAAA for a name under the tailnet suffix that has no
1245        // overlay match still fails closed to NXDOMAIN — never forwarded to a recursive upstream,
1246        // even with resolvers configured. (Gate OFF only changes the *positive* overlay match into
1247        // NODATA; a non-match still routes through `forward_or_nxdomain`.)
1248        let mut db = PeerDb::default();
1249        db.upsert(&test_node());
1250        let view = DnsView {
1251            cfg: DnsConfig {
1252                magic_dns: true,
1253                search_domains: vec!["user.ts.net".to_string()],
1254                fallback_resolvers: vec![DnsResolver {
1255                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1256                    use_with_exit_node: false,
1257                }],
1258                ..Default::default()
1259            },
1260            peers: Some(Arc::new(db)),
1261            self_node: None,
1262            exit_doh: None,
1263            enable_ipv6: false,
1264            accept_dns: true,
1265        };
1266        let buf = build_query(0x5A, &["ghost", "user", "ts", "net"], 28, 1);
1267
1268        match decide(&view, &buf).expect("decides") {
1269            Decision::Reply(resp) => {
1270                let (_, rcode, _) = parse_header(&resp);
1271                assert_eq!(rcode, 3, "NxDomain: tailnet AAAA not leaked upstream");
1272            }
1273            Decision::Forward { .. } => panic!("tailnet AAAA must never be forwarded"),
1274        }
1275    }
1276
1277    #[test]
1278    fn bare_hostname_resolves() {
1279        // The name index also stores the bare hostname.
1280        let view = view_with_peer();
1281        let buf = build_query(0x7, &["host"], 1, 1);
1282
1283        let resp = answer(&view, &buf).expect("answers");
1284        let (_, rcode, ancount) = parse_header(&resp);
1285        assert_eq!(rcode, 0);
1286        assert_eq!(ancount, 1);
1287    }
1288
1289    #[test]
1290    fn unknown_off_tailnet_name_with_no_upstream_is_servfail() {
1291        // An off-tailnet name with no resolver configured cannot be forwarded. Go answers SERVFAIL
1292        // (a soft "couldn't resolve"), not NXDOMAIN — asserting non-existence of a real name we
1293        // simply have no upstream for would poison a downstream stub's negative cache. (A *tailnet*
1294        // name with no overlay match stays NXDOMAIN — see `tailnet_name_is_never_forwarded` — and a
1295        // negative split-DNS route stays NXDOMAIN — see `negative_route_is_nxdomain_not_forwarded`.)
1296        let view = view_with_peer();
1297        let buf = build_query(0x9, &["nope", "example", "com"], 1, 1);
1298
1299        let resp = answer(&view, &buf).expect("answers");
1300        let (_, rcode, ancount) = parse_header(&resp);
1301        assert_eq!(
1302            rcode, 2,
1303            "ServFail: off-tailnet name, nothing to forward to"
1304        );
1305        assert_eq!(ancount, 0);
1306    }
1307
1308    #[test]
1309    fn magic_dns_off_is_refused() {
1310        // Fail closed: with MagicDNS disabled, even a known name is refused.
1311        let mut view = view_with_peer();
1312        view.cfg.magic_dns = false;
1313        let buf = build_query(0xAB, &["host", "user", "ts", "net"], 1, 1);
1314
1315        let resp = answer(&view, &buf).expect("answers");
1316        let (_, rcode, ancount) = parse_header(&resp);
1317        assert_eq!(rcode, 5, "Refused");
1318        assert_eq!(ancount, 0);
1319    }
1320
1321    #[test]
1322    fn accept_dns_false_refuses_otherwise_answerable_query() {
1323        // The accept-dns gate (Go `CorpDNS`): with `accept_dns == false` the node ignores the
1324        // tailnet DNS config, so even a known peer name that would normally answer authoritatively is
1325        // REFUSED (the responder serves nothing) — mirroring Go applying an empty `dns.Config`.
1326        let mut view = view_with_peer();
1327        assert!(view.cfg.magic_dns, "MagicDNS itself is on");
1328        view.accept_dns = false;
1329        let buf = build_query(0xDD, &["host", "user", "ts", "net"], 1, 1);
1330
1331        let resp = answer(&view, &buf).expect("answers");
1332        let (_, rcode, ancount) = parse_header(&resp);
1333        assert_eq!(rcode, 5, "Refused: accept_dns off ⇒ serve nothing");
1334        assert_eq!(ancount, 0);
1335
1336        // Flip accept_dns back ON (the config was never destroyed, only gated): the same query now
1337        // answers authoritatively — proving the OFF→ON restore is automatic.
1338        view.accept_dns = true;
1339        let resp = answer(&view, &buf).expect("answers");
1340        let (_, rcode, ancount) = parse_header(&resp);
1341        assert_eq!(rcode, 0, "NoError: accept_dns on ⇒ the known peer answers");
1342        assert_eq!(ancount, 1);
1343        let tail = &resp[resp.len() - 4..];
1344        assert_eq!(tail, &[100, 64, 0, 1], "the peer's tailnet v4 is served");
1345    }
1346
1347    #[test]
1348    fn default_view_serves_nothing() {
1349        // The default (no dns_config seen) has magic_dns == false: fail closed.
1350        let view = DnsView::default();
1351        let buf = build_query(0x1, &["host", "user", "ts", "net"], 1, 1);
1352
1353        let resp = answer(&view, &buf).expect("answers");
1354        let (_, rcode, _) = parse_header(&resp);
1355        assert_eq!(rcode, 5, "Refused");
1356    }
1357
1358    #[test]
1359    fn unsupported_qtype_on_tailnet_name_is_nodata_not_refused() {
1360        // TXT (type 16) for a tailnet-authoritative name: the name exists but we hold no TXT, so —
1361        // like Go — return NODATA (empty NOERROR), NOT REFUSED (which would make a stub abandon the
1362        // resolver) and NOT NXDOMAIN (the name exists). The name is never forwarded (anti-leak).
1363        let view = view_with_peer();
1364        let buf = build_query(0x1, &["host", "user", "ts", "net"], 16, 1);
1365
1366        let resp = answer(&view, &buf).expect("answers");
1367        let (_, rcode, ancount) = parse_header(&resp);
1368        assert_eq!(rcode, 0, "NoError (NODATA), not Refused");
1369        assert_eq!(ancount, 0, "no answer records (NODATA)");
1370    }
1371
1372    #[test]
1373    fn unsupported_qtype_off_tailnet_forwards_or_servfails() {
1374        // A non-A/AAAA/PTR qtype for an OFF-tailnet name must be forwardable like A/AAAA — never
1375        // REFUSED. With no upstream configured in this view it soft-fails to SERVFAIL (the same
1376        // disposition an off-tailnet A query gets here), proving the qtype no longer short-circuits
1377        // to REFUSED. HTTPS/SVCB is type 65 (the browser HTTP/3 + ECH case the old REFUSED broke).
1378        let view = view_with_peer();
1379        let buf = build_query(0x1, &["example", "com"], 65, 1);
1380
1381        let resp = answer(&view, &buf).expect("answers");
1382        let (_, rcode, _) = parse_header(&resp);
1383        assert_eq!(
1384            rcode, 2,
1385            "off-tailnet, no upstream -> SERVFAIL (forwardable, not Refused)"
1386        );
1387    }
1388
1389    #[test]
1390    fn unimplemented_qtype_on_tailnet_name_is_notimp() {
1391        // NS (2), SOA (6), HINFO (13), AXFR (252) for a tailnet-authoritative name must answer NOTIMP
1392        // (rcode 4), matching Go `resolveLocal`'s `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR,
1393        // dns.TypeHINFO: return RCodeNotImplemented`. Returning NODATA (rcode 0) here was a clean
1394        // fingerprint (a `dig SOA user.ts.net` answer differs from real tailscaled). The name is
1395        // still never forwarded (anti-leak).
1396        let view = view_with_peer();
1397        for qtype in [2u16, 6, 13, 252] {
1398            let buf = build_query(0x1, &["host", "user", "ts", "net"], qtype, 1);
1399            let resp = answer(&view, &buf).expect("answers");
1400            let (_, rcode, ancount) = parse_header(&resp);
1401            assert_eq!(rcode, 4, "qtype {qtype} on a tailnet name must be NOTIMP");
1402            assert_eq!(ancount, 0, "NOTIMP carries no answer records");
1403        }
1404    }
1405
1406    #[test]
1407    fn unimplemented_qtype_off_tailnet_still_forwards_not_notimp() {
1408        // The NOTIMP disposition is ONLY for a name we are authoritative for. An NS query for an
1409        // off-tailnet name must still forward (here: SERVFAIL, no upstream) — NOT NOTIMP — exactly
1410        // like the off-tailnet HTTPS/SVCB case above. Guards the NOTIMP change against over-reach.
1411        let view = view_with_peer();
1412        let buf = build_query(0x1, &["example", "com"], 2, 1); // NS, off-tailnet
1413        let resp = answer(&view, &buf).expect("answers");
1414        let (_, rcode, _) = parse_header(&resp);
1415        assert_eq!(
1416            rcode, 2,
1417            "off-tailnet NS -> SERVFAIL (forwardable), not NOTIMP"
1418        );
1419    }
1420
1421    #[test]
1422    fn malformed_query_is_dropped() {
1423        // A response (QR bit set) is not a query; we drop it (no answer).
1424        let mut buf = build_query(0x1, &["host"], 1, 1);
1425        buf[2] = 0x80; // set QR bit
1426        assert!(answer(&view_with_peer(), &buf).is_none());
1427    }
1428
1429    #[test]
1430    fn ptr_for_known_ip_answers_fqdn() {
1431        let view = view_with_peer();
1432        // Reverse name for 100.64.0.1 => 1.0.64.100.in-addr.arpa
1433        let buf = build_query(0x33, &["1", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1434
1435        let resp = answer(&view, &buf).expect("answers");
1436        let (_, rcode, ancount) = parse_header(&resp);
1437        assert_eq!(rcode, 0, "NoError");
1438        assert_eq!(ancount, 1);
1439
1440        // The PTR rdata encodes the peer's fqdn "host.user.ts.net" as length-prefixed labels.
1441        let expected = {
1442            let mut out = Vec::new();
1443            for label in ["host", "user", "ts", "net"] {
1444                out.push(label.len() as u8);
1445                out.extend_from_slice(label.as_bytes());
1446            }
1447            out.push(0);
1448            out
1449        };
1450        let tail = &resp[resp.len() - expected.len()..];
1451        assert_eq!(tail, expected.as_slice());
1452    }
1453
1454    #[test]
1455    fn ptr_for_unknown_public_ip_off_tailnet_is_servfail() {
1456        let view = view_with_peer();
1457        // 9.9.9.9 is a public IP, not a known tailnet IP and not in the CGNAT reverse zone — so its
1458        // reverse query is an ordinary off-tailnet name. With no upstream to forward it to, that is
1459        // SERVFAIL (soft), not NXDOMAIN. (A CGNAT/ip6.arpa reverse for an unmatched tailnet IP still
1460        // fails closed to NXDOMAIN as an anti-leak guard — see `ptr_for_unknown_tailnet_ip_*`.)
1461        let buf = build_query(0x34, &["9", "9", "9", "9", "in-addr", "arpa"], 12, 1);
1462
1463        let resp = answer(&view, &buf).expect("answers");
1464        let (_, rcode, _) = parse_header(&resp);
1465        assert_eq!(
1466            rcode, 2,
1467            "ServFail: off-tailnet public-IP reverse, no upstream"
1468        );
1469    }
1470
1471    #[test]
1472    fn ptr_for_unknown_tailnet_ip_is_nxdomain_not_forwarded() {
1473        // A view WITH an upstream resolver: an off-tailnet reverse query would forward, but a
1474        // reverse query for an unmatched IP in the CGNAT range (100.64.0.0/10) must fail closed to
1475        // NXDOMAIN — the probed tailnet IP must never leak upstream.
1476        let mut db = PeerDb::default();
1477        db.upsert(&test_node());
1478        let view = DnsView {
1479            cfg: DnsConfig {
1480                magic_dns: true,
1481                search_domains: vec!["user.ts.net".to_string()],
1482                fallback_resolvers: vec![DnsResolver {
1483                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1484                    use_with_exit_node: false,
1485                }],
1486                ..Default::default()
1487            },
1488            peers: Some(Arc::new(db)),
1489            self_node: None,
1490            exit_doh: None,
1491            enable_ipv6: false,
1492            accept_dns: true,
1493        };
1494
1495        // 100.64.0.9 is in CGNAT range but owned by no peer => NXDOMAIN, never a Forward.
1496        let buf = build_query(0x35, &["9", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1497        match decide(&view, &buf).expect("decides") {
1498            Decision::Reply(resp) => {
1499                let (_, rcode, _) = parse_header(&resp);
1500                assert_eq!(rcode, 3, "NxDomain");
1501            }
1502            Decision::Forward { .. } => {
1503                panic!("tailnet CGNAT PTR must never be forwarded upstream")
1504            }
1505        }
1506    }
1507
1508    /// Anti-leak regression for the exotic-qtype forward path: a NON-PTR query (TXT, type 16) for a
1509    /// tailnet CGNAT reverse name, with an upstream configured, must STILL fail closed to NXDOMAIN —
1510    /// never forward. The PTR arm guards this, but the `QType::Other` path routes through
1511    /// `forward_or_nodata`, which must re-apply the reverse-zone guard or the tailnet IP leaks.
1512    #[test]
1513    fn exotic_qtype_for_tailnet_cgnat_reverse_is_nxdomain_not_forwarded() {
1514        let mut db = PeerDb::default();
1515        db.upsert(&test_node());
1516        let view = DnsView {
1517            cfg: DnsConfig {
1518                magic_dns: true,
1519                search_domains: vec!["user.ts.net".to_string()],
1520                fallback_resolvers: vec![DnsResolver {
1521                    transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1522                    use_with_exit_node: false,
1523                }],
1524                ..Default::default()
1525            },
1526            peers: Some(Arc::new(db)),
1527            self_node: None,
1528            exit_doh: None,
1529            enable_ipv6: false,
1530            accept_dns: true,
1531        };
1532
1533        // TXT (16) for a CGNAT reverse name => NXDOMAIN, never a Forward (no tailnet-IP leak).
1534        let buf = build_query(0x36, &["9", "0", "64", "100", "in-addr", "arpa"], 16, 1);
1535        match decide(&view, &buf).expect("decides") {
1536            Decision::Reply(resp) => {
1537                let (_, rcode, _) = parse_header(&resp);
1538                assert_eq!(rcode, 3, "NxDomain");
1539            }
1540            Decision::Forward { .. } => {
1541                panic!("a non-PTR query for a tailnet CGNAT reverse name must never forward")
1542            }
1543        }
1544    }
1545
1546    /// Same anti-leak guard for an `ip6.arpa` reverse name under an exotic qtype: must NXDOMAIN, not
1547    /// forward (revealing a tailnet ULA was probed).
1548    #[test]
1549    fn exotic_qtype_for_ip6_arpa_is_nxdomain_not_forwarded() {
1550        let view = view_with_routes(
1551            std::collections::BTreeMap::new(),
1552            vec![udp("9.9.9.9:53")],
1553            vec![],
1554        );
1555        // An ip6.arpa reverse name with a TXT (16) qtype must fail closed.
1556        let buf = build_query(
1557            0x37,
1558            &[
1559                "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
1560                "a", "7", "d", "f", "ip6", "arpa",
1561            ],
1562            16,
1563            1,
1564        );
1565        match decide(&view, &buf).expect("decides") {
1566            Decision::Reply(resp) => {
1567                let (_, rcode, _) = parse_header(&resp);
1568                assert_eq!(rcode, 3, "NxDomain");
1569            }
1570            Decision::Forward { .. } => panic!("an ip6.arpa exotic-qtype query must never forward"),
1571        }
1572    }
1573
1574    #[test]
1575    fn is_tailnet_cgnat_classifies_range() {
1576        assert!(is_tailnet_cgnat("100.64.0.0".parse().unwrap()));
1577        assert!(is_tailnet_cgnat("100.64.0.1".parse().unwrap()));
1578        assert!(is_tailnet_cgnat("100.127.255.255".parse().unwrap()));
1579        // Outside the /10:
1580        assert!(!is_tailnet_cgnat("100.63.255.255".parse().unwrap()));
1581        assert!(!is_tailnet_cgnat("100.128.0.0".parse().unwrap()));
1582        assert!(!is_tailnet_cgnat("9.9.9.9".parse().unwrap()));
1583        // The MagicDNS resolver IP 100.100.100.100 is itself inside the /10.
1584        assert!(is_tailnet_cgnat("100.100.100.100".parse().unwrap()));
1585    }
1586
1587    #[test]
1588    fn response_matches_query_validates_id_and_qr() {
1589        // query id 0x1234, QR=0
1590        let query = build_query(0x1234, &["a", "com"], 1, 1);
1591
1592        // A well-formed response: same id, QR=1.
1593        let mut good = query.clone();
1594        good[2] |= 0x80;
1595        assert!(response_matches_query(&query, &good));
1596
1597        // Same id but QR still 0 (not a response): rejected.
1598        assert!(!response_matches_query(&query, &query));
1599
1600        // QR=1 but a different transaction id: rejected (off-path forgery).
1601        let mut wrong_id = good.clone();
1602        wrong_id[0] ^= 0xFF;
1603        assert!(!response_matches_query(&query, &wrong_id));
1604
1605        // Too-short buffers: rejected.
1606        assert!(!response_matches_query(&query, &[0u8; 2]));
1607        assert!(!response_matches_query(&[0u8; 3], &good));
1608    }
1609
1610    #[test]
1611    fn self_node_resolves_when_no_peer_match() {
1612        // With the peer db empty but a self node set, the self node answers for its own name.
1613        let view = DnsView {
1614            cfg: DnsConfig {
1615                magic_dns: true,
1616                search_domains: vec![],
1617                ..Default::default()
1618            },
1619            peers: None,
1620            self_node: Some(test_node()),
1621            exit_doh: None,
1622            enable_ipv6: false,
1623            accept_dns: true,
1624        };
1625        let buf = build_query(0x44, &["host", "user", "ts", "net"], 1, 1);
1626
1627        let resp = answer(&view, &buf).expect("answers");
1628        let (_, rcode, ancount) = parse_header(&resp);
1629        assert_eq!(rcode, 0);
1630        assert_eq!(ancount, 1);
1631        let tail = &resp[resp.len() - 4..];
1632        assert_eq!(tail, &[100, 64, 0, 1]);
1633    }
1634
1635    #[test]
1636    fn partially_qualified_name_resolves_via_search_domain() {
1637        // "host.user" is not indexed directly, but the "user.ts.net" search domain qualifies it
1638        // to "host.user.user.ts.net"... which does NOT match. The realistic case is "host" (bare,
1639        // already indexed) and "host.user.ts.net" (fqdn). Verify a name needing suffix expansion:
1640        // with search domain "ts.net" the partially-qualified "host.user" => "host.user.ts.net".
1641        let mut view = view_with_peer();
1642        view.cfg.search_domains = vec!["ts.net".to_string()];
1643        let buf = build_query(0x55, &["host", "user"], 1, 1);
1644
1645        let resp = answer(&view, &buf).expect("answers");
1646        let (_, rcode, ancount) = parse_header(&resp);
1647        assert_eq!(rcode, 0, "NoError via search-domain expansion");
1648        assert_eq!(ancount, 1);
1649        let tail = &resp[resp.len() - 4..];
1650        assert_eq!(tail, &[100, 64, 0, 1]);
1651    }
1652
1653    #[test]
1654    fn extra_record_a_answers_when_no_peer_match() {
1655        // A control-pushed static A record answers for a non-peer name, fail-closed otherwise.
1656        let mut view = view_with_peer();
1657        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1658            name: "static.user.ts.net".to_string(),
1659            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1660        }];
1661        let buf = build_query(0x77, &["static", "user", "ts", "net"], 1, 1);
1662
1663        let resp = answer(&view, &buf).expect("answers");
1664        let (_, rcode, ancount) = parse_header(&resp);
1665        assert_eq!(rcode, 0, "NoError from extra record");
1666        assert_eq!(ancount, 1);
1667        let tail = &resp[resp.len() - 4..];
1668        assert_eq!(tail, &[100, 64, 0, 9]);
1669    }
1670
1671    #[test]
1672    fn extra_record_matches_query_case_insensitively() {
1673        // The query name is canonicalized (lowercased) at decode time, so a mixed-case query
1674        // matches a lowercase extra record.
1675        let mut view = view_with_peer();
1676        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1677            name: "static.user.ts.net".to_string(),
1678            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1679        }];
1680        let buf = build_query(0x7A, &["Static", "User", "TS", "net"], 1, 1);
1681
1682        let resp = answer(&view, &buf).expect("answers");
1683        let (_, rcode, ancount) = parse_header(&resp);
1684        assert_eq!(rcode, 0, "NoError: case-insensitive match");
1685        assert_eq!(ancount, 1);
1686        let tail = &resp[resp.len() - 4..];
1687        assert_eq!(tail, &[100, 64, 0, 9]);
1688    }
1689
1690    #[test]
1691    fn extra_record_not_expanded_by_search_domain() {
1692        // Unlike peer names, an extra record is matched as an FQDN only: a bare query that would
1693        // need search-domain expansion to reach the record name must NOT resolve.
1694        let mut view = view_with_peer();
1695        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1696            name: "static.user.ts.net".to_string(),
1697            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1698        }];
1699        // "static" would only reach "static.user.ts.net" via the "user.ts.net" search domain.
1700        let buf = build_query(0x7B, &["static"], 1, 1);
1701
1702        let resp = answer(&view, &buf).expect("answers");
1703        let (_, rcode, _) = parse_header(&resp);
1704        // Not search-expanded → treated as the bare off-tailnet name "static", which has no upstream
1705        // here, so SERVFAIL (soft). The point of the test — that the extra record is NOT reachable
1706        // via search expansion — holds regardless of the failure rcode.
1707        assert_eq!(
1708            rcode, 2,
1709            "ServFail: bare 'static' is not search-expanded to the extra record"
1710        );
1711    }
1712
1713    #[test]
1714    fn extra_record_aaaa_family_is_isolated() {
1715        // An A-only extra record must NOT answer an AAAA query for the same name (NxDomain).
1716        let mut view = view_with_peer();
1717        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1718            name: "v4only.user.ts.net".to_string(),
1719            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1720        }];
1721        let buf = build_query(0x78, &["v4only", "user", "ts", "net"], 28, 1);
1722
1723        let resp = answer(&view, &buf).expect("answers");
1724        let (_, rcode, _) = parse_header(&resp);
1725        assert_eq!(rcode, 3, "NxDomain: A record does not satisfy AAAA");
1726    }
1727
1728    #[test]
1729    fn extra_record_ignored_when_magic_dns_off() {
1730        // Fail closed: extra records are never served while MagicDNS is disabled.
1731        let mut view = view_with_peer();
1732        view.cfg.magic_dns = false;
1733        view.cfg.extra_records = vec![ts_control::ExtraRecord {
1734            name: "static.user.ts.net".to_string(),
1735            addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1736        }];
1737        let buf = build_query(0x79, &["static", "user", "ts", "net"], 1, 1);
1738
1739        let resp = answer(&view, &buf).expect("answers");
1740        let (_, rcode, _) = parse_header(&resp);
1741        assert_eq!(rcode, 5, "Refused");
1742    }
1743
1744    #[test]
1745    fn non_in_class_on_tailnet_name_is_nodata_not_answered_as_in() {
1746        // A CHAOS-class (3) query for a tailnet name must NOT be answered as IN (no overlay A), and
1747        // must NOT be REFUSED (Go does no class check on the local path). It's an unsupported
1748        // authoritative class -> NODATA (empty NOERROR), and never forwarded (tailnet name).
1749        let view = view_with_peer();
1750        let buf = build_query(0x66, &["host", "user", "ts", "net"], 1, 3);
1751
1752        let resp = answer(&view, &buf).expect("answers");
1753        let (_, rcode, ancount) = parse_header(&resp);
1754        assert_eq!(
1755            rcode, 0,
1756            "NoError (NODATA), not Refused and not an IN answer"
1757        );
1758        assert_eq!(
1759            ancount, 0,
1760            "must not hand out the overlay A for a non-IN class"
1761        );
1762    }
1763
1764    #[test]
1765    fn non_in_class_off_tailnet_forwards_or_servfails() {
1766        // A non-IN class for an OFF-tailnet name is forwardable (Go forwards it), never REFUSED.
1767        // No upstream here -> SERVFAIL, proving the class gate no longer short-circuits to Refused.
1768        let view = view_with_peer();
1769        let buf = build_query(0x66, &["example", "com"], 1, 3);
1770
1771        let resp = answer(&view, &buf).expect("answers");
1772        let (_, rcode, _) = parse_header(&resp);
1773        assert_eq!(
1774            rcode, 2,
1775            "off-tailnet non-IN class, no upstream -> SERVFAIL, not Refused"
1776        );
1777    }
1778
1779    /// A view with MagicDNS on, the `user.ts.net` search domain, and the given split-DNS routes
1780    /// + global resolvers.
1781    fn view_with_routes(
1782        routes: std::collections::BTreeMap<String, Vec<DnsResolver>>,
1783        resolvers: Vec<DnsResolver>,
1784        fallback: Vec<DnsResolver>,
1785    ) -> DnsView {
1786        DnsView {
1787            cfg: DnsConfig {
1788                magic_dns: true,
1789                search_domains: vec!["user.ts.net".to_string()],
1790                routes,
1791                resolvers,
1792                fallback_resolvers: fallback,
1793                ..Default::default()
1794            },
1795            peers: None,
1796            self_node: None,
1797            exit_doh: None,
1798            enable_ipv6: false,
1799            accept_dns: true,
1800        }
1801    }
1802
1803    fn udp(addr: &str) -> DnsResolver {
1804        DnsResolver {
1805            transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
1806            use_with_exit_node: false,
1807        }
1808    }
1809
1810    #[test]
1811    fn split_dns_route_forwards_to_matching_upstream() {
1812        let mut routes = std::collections::BTreeMap::new();
1813        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1814        let view = view_with_routes(routes, vec![], vec![]);
1815        let buf = build_query(0x100, &["api", "corp", "example"], 1, 1);
1816
1817        match decide(&view, &buf).expect("decides") {
1818            Decision::Forward { upstreams, .. } => {
1819                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1820            }
1821            Decision::Reply(_) => panic!("expected forward to the split-DNS upstream"),
1822        }
1823    }
1824
1825    #[test]
1826    fn exotic_qtype_off_tailnet_forwards_to_upstream() {
1827        // The core of the fix: an HTTPS/SVCB (type 65) query for an off-tailnet name with a matching
1828        // route must FORWARD to the upstream (verbatim), exactly like an A query would — not REFUSE
1829        // and not NXDOMAIN. This is the browser HTTP/3 + ECH case the old blanket-REFUSE broke.
1830        let mut routes = std::collections::BTreeMap::new();
1831        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1832        let view = view_with_routes(routes, vec![], vec![]);
1833        let buf = build_query(0x102, &["api", "corp", "example"], 65, 1);
1834
1835        match decide(&view, &buf).expect("decides") {
1836            Decision::Forward {
1837                upstreams, query, ..
1838            } => {
1839                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1840                assert_eq!(query, buf, "the exotic-qtype query is forwarded verbatim");
1841            }
1842            Decision::Reply(_) => {
1843                panic!("an off-tailnet HTTPS-record query must forward, not reply")
1844            }
1845        }
1846    }
1847
1848    #[test]
1849    fn non_in_class_off_tailnet_forwards_to_upstream() {
1850        // A non-IN class for an off-tailnet routed name forwards too (Go does no class check on the
1851        // local path). Proves the class gate no longer short-circuits to REFUSED before routing.
1852        let mut routes = std::collections::BTreeMap::new();
1853        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1854        let view = view_with_routes(routes, vec![], vec![]);
1855        let buf = build_query(0x103, &["api", "corp", "example"], 1, 3);
1856
1857        match decide(&view, &buf).expect("decides") {
1858            Decision::Forward { upstreams, .. } => {
1859                assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1860            }
1861            Decision::Reply(_) => {
1862                panic!("an off-tailnet non-IN-class query must forward, not reply")
1863            }
1864        }
1865    }
1866
1867    /// The local responder bounds concurrent in-flight forwards: `serve` acquires one
1868    /// `MAX_INFLIGHT_FORWARDS` permit per spawned forward task and drops the query fail-closed when
1869    /// the pool is exhausted (a client spraying forwardable names can't open unbounded overlay
1870    /// sockets). This pins the gating semantics `serve` relies on — drained pool refuses a new
1871    /// permit; releasing one restores capacity — and the cap constant itself. (The async `serve`
1872    /// loop has no netstack-free test seam, so the semaphore behavior is exercised directly here, the
1873    /// same `Arc<Semaphore>::try_acquire_owned` the loop uses.)
1874    #[test]
1875    fn forward_inflight_cap_fails_closed_when_saturated() {
1876        use std::sync::Arc;
1877
1878        use tokio::sync::Semaphore;
1879
1880        let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
1881
1882        // Drain every permit (one per concurrently in-flight forward).
1883        let mut held = Vec::with_capacity(MAX_INFLIGHT_FORWARDS);
1884        for _ in 0..MAX_INFLIGHT_FORWARDS {
1885            held.push(
1886                inflight
1887                    .clone()
1888                    .try_acquire_owned()
1889                    .expect("permits available below the cap"),
1890            );
1891        }
1892
1893        // At the cap, the next forward is refused — `serve` would drop the query, not spawn.
1894        assert!(
1895            inflight.clone().try_acquire_owned().is_err(),
1896            "a saturated forward pool must refuse a new permit (fail closed)"
1897        );
1898
1899        // Completing an in-flight forward releases its permit and restores capacity.
1900        drop(held.pop());
1901        assert!(
1902            inflight.clone().try_acquire_owned().is_ok(),
1903            "releasing a permit must let the next forward proceed"
1904        );
1905    }
1906
1907    /// A permit moved into a spawned forward task (the `let _permit = permit;` shape `serve` uses)
1908    /// must stay held for the *whole* task body — across the `.await` on the upstream — and release
1909    /// only when the task completes. This guards the regression the saturation test above can't see:
1910    /// "tidying" `let _permit = permit;` to `let _ = permit;` would drop the permit immediately,
1911    /// re-opening unbounded concurrency while leaving the synchronous drain/restore test green. Here a
1912    /// 1-permit pool is consumed by a task that holds it across a yield; the pool must read empty
1913    /// while the task runs and refill once it finishes.
1914    #[tokio::test]
1915    async fn forward_permit_is_held_for_the_task_lifetime_not_dropped_early() {
1916        use std::sync::Arc;
1917
1918        use tokio::sync::Semaphore;
1919
1920        let inflight = Arc::new(Semaphore::new(1));
1921        let permit = inflight
1922            .clone()
1923            .try_acquire_owned()
1924            .expect("the sole permit is available");
1925
1926        let (started_tx, started_rx) = tokio::sync::oneshot::channel();
1927        let (release_tx, release_rx) = tokio::sync::oneshot::channel();
1928        let task = tokio::spawn(async move {
1929            // Same shape as `serve`'s spawned forward: the permit is a named binding moved into the
1930            // task, so it lives until the body ends — not dropped at the `let`.
1931            let _permit = permit;
1932            started_tx.send(()).unwrap();
1933            // Stand in for the `.await` on the upstream forward.
1934            release_rx.await.unwrap();
1935        });
1936
1937        started_rx.await.unwrap();
1938        // While the task runs, the permit it moved in is still held — the pool is empty.
1939        assert!(
1940            inflight.clone().try_acquire_owned().is_err(),
1941            "a permit moved into a running task must stay held across its await"
1942        );
1943
1944        // Let the task finish; its permit drops with the body and capacity returns.
1945        release_tx.send(()).unwrap();
1946        task.await.unwrap();
1947        assert!(
1948            inflight.clone().try_acquire_owned().is_ok(),
1949            "the permit must be released once the task body completes"
1950        );
1951    }
1952
1953    #[test]
1954    fn longest_suffix_route_wins() {
1955        let mut routes = std::collections::BTreeMap::new();
1956        routes.insert("example".to_string(), vec![udp("10.0.0.1:53")]);
1957        routes.insert("corp.example".to_string(), vec![udp("10.0.0.2:53")]);
1958        let view = view_with_routes(routes, vec![], vec![]);
1959        let buf = build_query(0x101, &["api", "corp", "example"], 1, 1);
1960
1961        match decide(&view, &buf).expect("decides") {
1962            Decision::Forward { upstreams, .. } => {
1963                assert_eq!(
1964                    upstreams,
1965                    vec!["10.0.0.2:53".parse().unwrap()],
1966                    "longer suffix wins"
1967                );
1968            }
1969            Decision::Reply(_) => panic!("expected forward"),
1970        }
1971    }
1972
1973    #[test]
1974    fn negative_route_is_nxdomain_not_forwarded() {
1975        // An empty upstream list is a negative route: fail closed, never forward.
1976        let mut routes = std::collections::BTreeMap::new();
1977        routes.insert("blocked.example".to_string(), vec![]);
1978        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
1979        let buf = build_query(0x102, &["x", "blocked", "example"], 1, 1);
1980
1981        match decide(&view, &buf).expect("decides") {
1982            Decision::Reply(resp) => {
1983                let (_, rcode, _) = parse_header(&resp);
1984                assert_eq!(rcode, 3, "NxDomain: negative route is not forwarded");
1985            }
1986            Decision::Forward { .. } => panic!("negative route must not forward"),
1987        }
1988    }
1989
1990    #[test]
1991    fn unrouted_name_forwards_to_fallback_then_global() {
1992        // No route matches: fallback resolvers are preferred over global resolvers.
1993        let view = view_with_routes(
1994            std::collections::BTreeMap::new(),
1995            vec![udp("8.8.8.8:53")],
1996            vec![udp("1.1.1.1:53")],
1997        );
1998        let buf = build_query(0x103, &["example", "com"], 1, 1);
1999
2000        match decide(&view, &buf).expect("decides") {
2001            Decision::Forward { upstreams, .. } => {
2002                assert_eq!(
2003                    upstreams,
2004                    vec!["1.1.1.1:53".parse().unwrap()],
2005                    "fallback preferred"
2006                );
2007            }
2008            Decision::Reply(_) => panic!("expected forward to fallback"),
2009        }
2010    }
2011
2012    #[test]
2013    fn unrouted_name_forwards_to_global_when_no_fallback() {
2014        let view = view_with_routes(
2015            std::collections::BTreeMap::new(),
2016            vec![udp("8.8.8.8:53")],
2017            vec![],
2018        );
2019        let buf = build_query(0x104, &["example", "com"], 1, 1);
2020
2021        match decide(&view, &buf).expect("decides") {
2022            Decision::Forward { upstreams, .. } => {
2023                assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2024            }
2025            Decision::Reply(_) => panic!("expected forward to global resolver"),
2026        }
2027    }
2028
2029    #[test]
2030    fn tailnet_name_is_never_forwarded() {
2031        // Anti-leak: a name under a tailnet search domain that has no overlay match must fail
2032        // closed to NXDOMAIN, never leak to an upstream resolver, even with resolvers configured.
2033        let view = view_with_routes(
2034            std::collections::BTreeMap::new(),
2035            vec![udp("8.8.8.8:53")],
2036            vec![udp("1.1.1.1:53")],
2037        );
2038        // "ghost.user.ts.net" is under the tailnet suffix but matches no peer.
2039        let buf = build_query(0x105, &["ghost", "user", "ts", "net"], 1, 1);
2040
2041        match decide(&view, &buf).expect("decides") {
2042            Decision::Reply(resp) => {
2043                let (_, rcode, _) = parse_header(&resp);
2044                assert_eq!(rcode, 3, "NxDomain: tailnet name not leaked upstream");
2045            }
2046            Decision::Forward { .. } => panic!("tailnet name must never be forwarded"),
2047        }
2048    }
2049
2050    #[test]
2051    fn no_resolvers_off_tailnet_is_servfail_not_nxdomain() {
2052        // No route, no resolvers: an OFF-tailnet name cannot be forwarded. Go answers SERVFAIL
2053        // (forwarder.go:1207 "no upstream resolvers set, returning SERVFAIL"), NOT NXDOMAIN — a
2054        // cacheable non-existence for a real name we merely couldn't forward would poison downstream
2055        // stub caches. We still never forward (the name does not leak); we just soft-fail.
2056        let view = view_with_routes(std::collections::BTreeMap::new(), vec![], vec![]);
2057        let buf = build_query(0x106, &["example", "com"], 1, 1);
2058
2059        match decide(&view, &buf).expect("decides") {
2060            Decision::Reply(resp) => {
2061                let (_, rcode, _) = parse_header(&resp);
2062                assert_eq!(
2063                    rcode, 2,
2064                    "ServFail: off-tailnet name with no upstream to forward to"
2065                );
2066            }
2067            Decision::Forward { .. } => panic!("must not forward with no resolvers"),
2068        }
2069    }
2070
2071    #[test]
2072    fn route_with_only_ipv6_upstreams_off_tailnet_is_servfail() {
2073        // A split-DNS route exists but every resolver is IPv6 (filtered out under the IPv4-only
2074        // egress): we have a route yet nowhere to forward. That is an inability to forward an
2075        // off-tailnet name, so SERVFAIL (soft), not a fabricated NXDOMAIN.
2076        let mut routes = std::collections::BTreeMap::new();
2077        routes.insert("corp.example".to_string(), vec![udp("[2001:db8::53]:53")]);
2078        let view = view_with_routes(routes, vec![], vec![]);
2079        let buf = build_query(0x108, &["host", "corp", "example"], 1, 1);
2080
2081        match decide(&view, &buf).expect("decides") {
2082            Decision::Reply(resp) => {
2083                let (_, rcode, _) = parse_header(&resp);
2084                assert_eq!(
2085                    rcode, 2,
2086                    "ServFail: route's resolvers all filtered out (IPv6-only), cannot forward"
2087                );
2088            }
2089            Decision::Forward { .. } => panic!("must not forward when all upstreams are filtered"),
2090        }
2091    }
2092
2093    #[test]
2094    fn overlay_match_wins_over_forwarding() {
2095        // A known peer name resolves authoritatively even when upstream resolvers are configured.
2096        let mut db = PeerDb::default();
2097        db.upsert(&test_node());
2098        let view = DnsView {
2099            cfg: DnsConfig {
2100                magic_dns: true,
2101                search_domains: vec!["user.ts.net".to_string()],
2102                resolvers: vec![udp("8.8.8.8:53")],
2103                ..Default::default()
2104            },
2105            peers: Some(Arc::new(db)),
2106            self_node: None,
2107            exit_doh: None,
2108            enable_ipv6: false,
2109            accept_dns: true,
2110        };
2111        let buf = build_query(0x107, &["host", "user", "ts", "net"], 1, 1);
2112
2113        match decide(&view, &buf).expect("decides") {
2114            Decision::Reply(resp) => {
2115                let (_, rcode, ancount) = parse_header(&resp);
2116                assert_eq!(rcode, 0, "authoritative answer wins");
2117                assert_eq!(ancount, 1);
2118            }
2119            Decision::Forward { .. } => panic!("overlay match must not forward"),
2120        }
2121    }
2122
2123    #[test]
2124    fn ipv6_reverse_ptr_is_nxdomain_not_forwarded() {
2125        // Anti-leak: an `ip6.arpa` reverse PTR for a tailnet ULA (fd7a:…) must fail closed to
2126        // NXDOMAIN, never be forwarded — even with an upstream resolver configured. This fork is
2127        // IPv4-only on the tailnet; forwarding would reveal that a v6 address was probed.
2128        let view = view_with_routes(
2129            std::collections::BTreeMap::new(),
2130            vec![udp("8.8.8.8:53")],
2131            vec![udp("1.1.1.1:53")],
2132        );
2133        // Reverse name for fd7a::1 (nibble-reversed) under ip6.arpa. The exact nibble labels don't
2134        // matter to the guard — any name ending in ip6.arpa must fail closed.
2135        let labels = vec![
2136            "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
2137            "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "a", "7", "d", "f", "ip6",
2138            "arpa",
2139        ];
2140        let buf = build_query(0x200, &labels, 12, 1);
2141
2142        match decide(&view, &buf).expect("decides") {
2143            Decision::Reply(resp) => {
2144                let (_, rcode, _) = parse_header(&resp);
2145                assert_eq!(
2146                    rcode, 3,
2147                    "NxDomain: ip6.arpa reverse must not leak upstream"
2148                );
2149            }
2150            Decision::Forward { .. } => panic!("ip6.arpa PTR must never be forwarded"),
2151        }
2152    }
2153
2154    #[test]
2155    fn cap_response_sets_tc_when_truncated() {
2156        // An oversize upstream answer is capped to a single datagram AND marked truncated (TC bit)
2157        // so the stub resolver retries over TCP rather than trusting a chopped message.
2158        let mut big = build_query(0x300, &["example", "com"], 1, 1);
2159        big[2] |= 0x80; // make it a response (QR=1)
2160        big.resize(MAX_UPSTREAM_RESPONSE + 500, 0xAB);
2161
2162        let out = cap_response(big);
2163        assert_eq!(out.len(), MAX_UPSTREAM_RESPONSE, "capped to one datagram");
2164        assert_ne!(out[2] & 0x02, 0, "TC bit set on truncation");
2165    }
2166
2167    #[test]
2168    fn cap_response_leaves_small_response_untouched() {
2169        // A response that fits is returned verbatim with no TC bit forced on.
2170        let mut small = build_query(0x301, &["example", "com"], 1, 1);
2171        small[2] |= 0x80;
2172        let before = small.clone();
2173
2174        let out = cap_response(small);
2175        assert_eq!(out, before, "small response unchanged");
2176        assert_eq!(out[2] & 0x02, 0, "TC bit not set when no truncation");
2177    }
2178
2179    #[test]
2180    fn response_matches_query_rejects_mismatched_question() {
2181        // id + QR match but the echoed question differs (different QNAME) => rejected. This guards
2182        // against an off-path injector that guesses the id but answers a different question.
2183        let query = build_query(0x1234, &["a", "com"], 1, 1);
2184
2185        let mut wrong_question = build_query(0x1234, &["b", "com"], 1, 1);
2186        wrong_question[2] |= 0x80; // QR=1, same id
2187        assert!(
2188            !response_matches_query(&query, &wrong_question),
2189            "different QNAME must be rejected"
2190        );
2191
2192        // A different QTYPE with the same name is also rejected.
2193        let mut wrong_qtype = build_query(0x1234, &["a", "com"], 28, 1);
2194        wrong_qtype[2] |= 0x80;
2195        assert!(
2196            !response_matches_query(&query, &wrong_qtype),
2197            "different QTYPE must be rejected"
2198        );
2199
2200        // The exact echoed question with QR=1 is accepted.
2201        let mut good = query.clone();
2202        good[2] |= 0x80;
2203        assert!(
2204            response_matches_query(&query, &good),
2205            "matching question accepted"
2206        );
2207    }
2208
2209    #[test]
2210    fn suffix_matches_handles_boundaries_and_empty() {
2211        // Exact and label-boundary matches.
2212        assert!(suffix_matches("corp", "corp"));
2213        assert!(suffix_matches("a.corp", "corp"));
2214        assert!(suffix_matches("a.b.corp", "corp"));
2215        // Not a label boundary.
2216        assert!(!suffix_matches("acorp", "corp"));
2217        // Empty suffix never matches (defense-in-depth against `ends_with("")`).
2218        assert!(!suffix_matches("anything.example", ""));
2219        assert!(!suffix_matches("", ""));
2220    }
2221
2222    #[test]
2223    fn empty_search_domain_does_not_capture_everything() {
2224        // Defense-in-depth: an empty search domain must NOT make every name look like a tailnet
2225        // name (which would fail-close legitimate recursive queries / mis-route). With an empty
2226        // suffix present alongside a real resolver, an off-tailnet name still forwards.
2227        let mut view = view_with_routes(
2228            std::collections::BTreeMap::new(),
2229            vec![udp("8.8.8.8:53")],
2230            vec![],
2231        );
2232        view.cfg.search_domains = vec![String::new()];
2233        let buf = build_query(0x400, &["example", "com"], 1, 1);
2234
2235        match decide(&view, &buf).expect("decides") {
2236            Decision::Forward { upstreams, .. } => {
2237                assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2238            }
2239            Decision::Reply(_) => {
2240                panic!("empty search domain must not treat every name as tailnet")
2241            }
2242        }
2243    }
2244
2245    #[test]
2246    fn empty_route_suffix_does_not_capture_everything() {
2247        // Defense-in-depth: an empty route suffix must not match every name (which would route all
2248        // queries to that route's upstreams). With an empty-suffix route present, an unrelated name
2249        // still falls through to the global resolver.
2250        let mut routes = std::collections::BTreeMap::new();
2251        routes.insert(String::new(), vec![udp("10.9.9.9:53")]);
2252        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2253        let buf = build_query(0x401, &["example", "com"], 1, 1);
2254
2255        match decide(&view, &buf).expect("decides") {
2256            Decision::Forward { upstreams, .. } => {
2257                assert_eq!(
2258                    upstreams,
2259                    vec!["8.8.8.8:53".parse().unwrap()],
2260                    "empty route suffix must not capture; falls through to global"
2261                );
2262            }
2263            Decision::Reply(_) => panic!("expected forward to global resolver"),
2264        }
2265    }
2266
2267    fn udp_exit(addr: &str) -> DnsResolver {
2268        DnsResolver {
2269            transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
2270            use_with_exit_node: true,
2271        }
2272    }
2273
2274    #[test]
2275    fn recursive_forward_is_flagged_route_forward_is_not() {
2276        // A recursive (global/fallback) forward sets `recursive = true` (eligible for DoH
2277        // delegation); a deliberately-configured split-DNS route sets `recursive = false`.
2278        let mut routes = std::collections::BTreeMap::new();
2279        routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
2280        let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2281
2282        let routed = build_query(0x500, &["api", "corp", "example"], 1, 1);
2283        match decide(&view, &routed).expect("decides") {
2284            Decision::Forward { recursive, .. } => {
2285                assert!(!recursive, "split-DNS route is not a recursive forward")
2286            }
2287            Decision::Reply(_) => panic!("expected route forward"),
2288        }
2289
2290        let global = build_query(0x501, &["example", "com"], 1, 1);
2291        match decide(&view, &global).expect("decides") {
2292            Decision::Forward { recursive, .. } => {
2293                assert!(recursive, "unrouted name is a recursive forward")
2294            }
2295            Decision::Reply(_) => panic!("expected recursive forward"),
2296        }
2297    }
2298
2299    #[test]
2300    fn recursive_plan_keeps_udp_without_exit_node() {
2301        // No active exit node: a recursive forward stays on its default UDP upstreams.
2302        let view = view_with_routes(
2303            std::collections::BTreeMap::new(),
2304            vec![udp("8.8.8.8:53")],
2305            vec![],
2306        );
2307        let default = vec!["8.8.8.8:53".parse().unwrap()];
2308        assert_eq!(
2309            recursive_plan(&view, default.clone()),
2310            RecursivePlan::Udp(default)
2311        );
2312    }
2313
2314    #[test]
2315    fn recursive_plan_delegates_to_doh_with_exit_node() {
2316        // Exit node active, no kept-local resolvers: recursive queries delegate to the exit node's
2317        // DoH endpoint so resolution egresses from the exit node, not this host.
2318        let mut view = view_with_routes(
2319            std::collections::BTreeMap::new(),
2320            vec![udp("8.8.8.8:53")],
2321            vec![],
2322        );
2323        let doh: SocketAddr = "100.64.0.5:8080".parse().unwrap();
2324        view.exit_doh = Some(doh);
2325        assert_eq!(
2326            recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2327            RecursivePlan::Doh(doh)
2328        );
2329    }
2330
2331    #[test]
2332    fn recursive_plan_keeps_use_with_exit_node_resolvers_local() {
2333        // Even with an exit node active, resolvers flagged `use_with_exit_node` stay local (Go keeps
2334        // UseWithExitNode resolvers). The plan forwards to those over UDP, never delegating to DoH.
2335        let mut view = view_with_routes(
2336            std::collections::BTreeMap::new(),
2337            vec![udp_exit("10.0.0.53:53"), udp("8.8.8.8:53")],
2338            vec![],
2339        );
2340        view.exit_doh = Some("100.64.0.5:8080".parse().unwrap());
2341        // The default upstreams the caller computed are irrelevant when kept-local resolvers exist;
2342        // the plan must use the kept-local ones.
2343        assert_eq!(
2344            recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2345            RecursivePlan::Udp(vec!["10.0.0.53:53".parse().unwrap()])
2346        );
2347    }
2348}