ts_runtime/magic_dns.rs
1//! MagicDNS responder with a split-DNS / recursive forwarder.
2//!
3//! An in-netstack DNS server bound to `100.100.100.100:53`. It is authoritative for in-tailnet
4//! peer names and control-pushed [`ExtraRecord`][ts_control::ExtraRecord]s, answering `A`/`AAAA`/
5//! `PTR` for those directly. For names it is *not* authoritative for, it brings tsnet-style
6//! split-DNS and recursive resolution:
7//!
8//! - **Split DNS** ([`DnsConfig::routes`]): the longest matching suffix route forwards the query
9//! to one of that route's upstream resolvers. A route with an **empty** upstream list is a
10//! negative route — names under it are `NXDOMAIN` (Go keeps them on the built-in resolver; for
11//! us that means fail-closed unless an overlay/extra record matched first).
12//! - **Recursive** ([`DnsConfig::fallback_resolvers`] / [`DnsConfig::resolvers`]): names matching
13//! no route are forwarded to the fallback resolvers, else the global resolvers.
14//! - **Fail closed**: if no route and no resolver is configured, an unknown name is `NXDOMAIN`.
15//!
16//! Anti-leak / IPv6-off posture: upstream forwarding binds `0.0.0.0:0` (UDP, IPv4 only) and never
17//! opens an IPv6 socket. AAAA handling is gated on [`DnsView::enable_ipv6`] (default off): with the
18//! gate OFF an AAAA query for a tailnet/overlay/self name returns NoError with an empty answer
19//! (NODATA) rather than the overlay v6 address — answering a v6 the IPv4-only client can't route
20//! would only create dead connections and a fingerprint. With the gate ON, AAAA is answered from
21//! overlay data (the v6 overlay addr), as historically. AAAA for tailnet names is never forwarded
22//! to a recursive upstream regardless of the gate.
23//!
24//! - MagicDNS disabled (`dns_config == None` or `magic_dns == false`), OR the node does not accept
25//! the tailnet DNS config ([`DnsView::accept_dns`] is `false`, i.e. `--accept-dns` / `CorpDNS`
26//! off) => `REFUSED` for every query (the responder serves nothing, mirroring Go applying an empty
27//! `dns.Config` when `CorpDNS` is off).
28//! - A qtype/class we don't serve authoritatively (anything but IN-class A/AAAA/PTR — TXT, SRV, MX,
29//! HTTPS/SVCB, a CHAOS-class query, …) => NODATA (empty NOERROR) for a tailnet-authoritative name,
30//! forwarded verbatim to upstream for an off-tailnet name — exactly like Go's resolver, NOT
31//! `REFUSED` (a stub reads REFUSED as "won't serve me" and abandons the resolver). Tailnet reverse
32//! zones (CGNAT `in-addr.arpa` / any `ip6.arpa`) still fail closed to NXDOMAIN for every qtype
33//! (never forwarded — anti-leak).
34//! - Malformed query => dropped (no response).
35
36use std::{
37 net::{IpAddr, Ipv4Addr, SocketAddr},
38 sync::Arc,
39 time::Duration,
40};
41
42use kameo::{
43 actor::ActorRef,
44 message::{Context, Message},
45};
46use netstack::{CreateSocket, netcore::Channel};
47use tokio::{
48 sync::{Semaphore, watch},
49 task::JoinSet,
50 time::timeout,
51};
52use ts_control::{DnsConfig, DnsResolver, Node};
53use ts_dns_wire::{Name, QType, RData, Rcode, decode_query, encode_response};
54
55use crate::{
56 Error,
57 env::Env,
58 peer_tracker::{PeerDb, PeerState},
59};
60
61/// How long to wait for an upstream resolver to answer a forwarded query before giving up.
62const UPSTREAM_TIMEOUT: Duration = Duration::from_secs(5);
63/// Cap on concurrent in-flight forwarded queries on the local `100.100.100.100:53` responder.
64///
65/// Each forward is spawned onto a task that holds an overlay UDP socket until the upstream answers
66/// or [`UPSTREAM_TIMEOUT`] elapses. Without a cap, a local/tailnet client spraying distinct
67/// forwardable names opens unbounded concurrent overlay sockets + tasks (a resource-exhaustion DoS
68/// on a slow/black-holed upstream, since each lingers for the full timeout). Bound it the same way
69/// the peerAPI DoH server bounds its request handlers ([`crate::peerapi`]'s `MAX_INFLIGHT`): acquire
70/// a permit before spawning and drop the query fail-closed when saturated. A dropped DNS query is a
71/// benign outcome — the stub resolver simply retries or times out — and Go's resolver likewise
72/// bounds outstanding forwards rather than spawning without limit.
73const MAX_INFLIGHT_FORWARDS: usize = 512;
74/// Cap on a forwarded upstream response we read into memory (a single UDP datagram).
75///
76/// Matches Go's forwarder read buffer (`maxResponseBytes`, ~4 KiB). The client's query is forwarded
77/// verbatim, so a client advertising a large EDNS UDP size can elicit a legitimately large
78/// (1300–4096 byte) UDP answer (big TXT sets, DNSSEC, many-record round-robins). Capping at the old
79/// 1232 truncated those and set TC, forcing a TCP retry this fork's UDP-only forwarder can't serve —
80/// so the large answer became unreachable. 4096 relays them intact.
81const MAX_UPSTREAM_RESPONSE: usize = 4096;
82
83/// The MagicDNS service IP. The netstack interface owns this address, so a `udp_bind` here
84/// receives the tailnet's DNS traffic.
85const MAGIC_DNS_IP: Ipv4Addr = Ipv4Addr::new(100, 100, 100, 100);
86/// The DNS service port.
87const MAGIC_DNS_PORT: u16 = 53;
88
89/// The latest view the answer loop resolves queries against.
90///
91/// Updated by the actor's message handlers (from control `StateUpdate` and peer `PeerState`
92/// updates) and read fresh by the answer loop for every packet.
93#[derive(Clone, Default)]
94pub(crate) struct DnsView {
95 /// The DNS configuration. `magic_dns == false` (the default) means serve nothing.
96 pub(crate) cfg: DnsConfig,
97 /// The current peer database, if we've seen a peer update.
98 pub(crate) peers: Option<Arc<PeerDb>>,
99 /// This node, if we've seen a self-node update.
100 pub(crate) self_node: Option<Node>,
101 /// The peerAPI DoH socket address of the currently-selected exit node, if one is active and can
102 /// proxy DNS ([`Node::peerapi_doh_addr`]). When set, the MagicDNS *client* serve loop delegates
103 /// recursive resolution to this address over the overlay instead of forwarding to the locally
104 /// configured upstream resolvers — so recursive DNS egresses from the exit node, not this host.
105 ///
106 /// Only consumed by the local MagicDNS responder's serve loop (the client side). The peerAPI
107 /// DoH *server* shares this same view but ignores this field: an exit-node DNS proxy resolves
108 /// recursively itself (gated by `forward_exit_egress`), it never re-delegates to its own exit
109 /// node. `None` means no active exit node / no DoH delegation — recursion stays local.
110 pub(crate) exit_doh: Option<SocketAddr>,
111 /// Whether IPv6 is enabled on the tailnet overlay (from [`Env::enable_ipv6`], default `false`).
112 ///
113 /// Governs the AAAA answer path only: with the gate OFF (default) an AAAA query for a
114 /// tailnet/overlay/self name is answered NoError-with-empty-answer (NODATA) instead of the
115 /// overlay v6 address; with it ON, AAAA is answered from overlay data as historically. Set once
116 /// from the runtime `Env` when the actor starts; never changes for the life of the runtime.
117 pub(crate) enable_ipv6: bool,
118 /// Whether the tailnet's DNS configuration is accepted (`--accept-dns` / `CorpDNS`, from
119 /// [`Env::accept_dns`]). When `false`, [`decide`] refuses every query (the responder serves
120 /// nothing), mirroring Go applying an empty `dns.Config` when `CorpDNS` is off — so a node can
121 /// join for connectivity without taking over DNS.
122 ///
123 /// Unlike [`enable_ipv6`](DnsView::enable_ipv6) (snapshotted once at actor spawn), this is
124 /// runtime-settable via `Device::set_accept_dns`, so it is re-read from the live
125 /// [`Env::accept_dns`] cell on **every** view rebuild (the `StateUpdate` and `PeerState`
126 /// handlers), not just at spawn — otherwise a runtime toggle would never reach the served view.
127 pub(crate) accept_dns: bool,
128}
129
130impl DnsView {
131 /// Find the node (peer or self) that answers to `name`, case/dot-insensitively.
132 fn node_by_name(&self, name: &str) -> Option<Node> {
133 if let Some(node) = self
134 .peers
135 .as_ref()
136 .and_then(|p| p.get(&name).map(|(_, n)| n.clone()))
137 {
138 return Some(node);
139 }
140
141 self.self_node
142 .as_ref()
143 .filter(|n| n.matches_name(name))
144 .cloned()
145 }
146
147 /// Resolve `canon` to an answer address of the requested family. A tailnet peer/self match
148 /// wins first — tried as written and then qualified by each tailnet search domain (so a
149 /// short/partially-qualified name like `host` or `host.user` still resolves to
150 /// `host.user.ts.net`). Failing that, a control-pushed [`ExtraRecord`] of the matching family
151 /// answers, matched as a fully-qualified name only (no search-domain expansion — like Go tsnet,
152 /// ExtraRecords are authoritative FQDN entries, not subject to client search-list qualification).
153 /// Still fail-closed: only ever resolves to a known tailnet peer/self or an explicitly
154 /// control-pushed static record — never anything else.
155 fn resolve_addr(&self, canon: &str, want_v4: bool) -> Option<IpAddr> {
156 let addr_of = |node: Node| -> IpAddr {
157 if want_v4 {
158 IpAddr::from(node.tailnet_address.ipv4.addr())
159 } else {
160 IpAddr::from(node.tailnet_address.ipv6.addr())
161 }
162 };
163
164 if let Some(node) = self.node_by_name(canon) {
165 return Some(addr_of(node));
166 }
167 for suffix in &self.cfg.search_domains {
168 if let Some(node) = self.node_by_name(&format!("{canon}.{suffix}")) {
169 return Some(addr_of(node));
170 }
171 }
172
173 // Control-pushed static records match the fully-qualified query name only.
174 self.cfg.extra_records.iter().find_map(|rec| {
175 let family_ok = matches!(
176 (rec.addr, want_v4),
177 (IpAddr::V4(_), true) | (IpAddr::V6(_), false)
178 );
179 (rec.name == canon && family_ok).then_some(rec.addr)
180 })
181 }
182
183 /// Find the node (peer or self) that owns the tailnet IP `ip`.
184 fn node_by_ip(&self, ip: IpAddr) -> Option<Node> {
185 if let Some(node) = self
186 .peers
187 .as_ref()
188 .and_then(|p| p.get(&ip).map(|(_, n)| n.clone()))
189 {
190 return Some(node);
191 }
192
193 self.self_node
194 .as_ref()
195 .filter(|n| {
196 IpAddr::from(n.tailnet_address.ipv4.addr()) == ip
197 || IpAddr::from(n.tailnet_address.ipv6.addr()) == ip
198 })
199 .cloned()
200 }
201
202 /// Decide how to resolve a non-overlay `name` against the split-DNS routes and recursive
203 /// resolvers, returning the upstreams to forward to.
204 ///
205 /// Longest-suffix wins among [`DnsConfig::routes`]: a route's suffix matches `name` if `name`
206 /// equals it or ends with `.suffix`. A matched route with a non-empty upstream list forwards
207 /// there; a matched route with an **empty** list is a negative route ([`Upstreams::Block`] =>
208 /// NXDOMAIN). With no route match, [`DnsConfig::fallback_resolvers`] (preferred) or
209 /// [`DnsConfig::resolvers`] resolve recursively; if neither is configured we stay fail-closed
210 /// ([`Upstreams::None`] => NXDOMAIN).
211 fn route_for(&self, name: &str) -> Upstreams<'_> {
212 let mut best: Option<(&str, &Vec<DnsResolver>)> = None;
213 for (suffix, upstreams) in &self.cfg.routes {
214 if suffix_matches(name, suffix) && best.is_none_or(|(b, _)| suffix.len() > b.len()) {
215 best = Some((suffix.as_str(), upstreams));
216 }
217 }
218
219 if let Some((_, upstreams)) = best {
220 return if upstreams.is_empty() {
221 Upstreams::Block
222 } else {
223 // A deliberately-configured split-DNS route: not eligible for exit-node DoH
224 // delegation — these upstreams (e.g. an internal resolver reachable over a subnet
225 // route) must keep receiving the query directly.
226 Upstreams::Route(upstreams)
227 };
228 }
229
230 if !self.cfg.fallback_resolvers.is_empty() {
231 return Upstreams::Recursive(&self.cfg.fallback_resolvers);
232 }
233 if !self.cfg.resolvers.is_empty() {
234 return Upstreams::Recursive(&self.cfg.resolvers);
235 }
236 Upstreams::None
237 }
238}
239
240/// The upstreams a non-overlay query should be forwarded to (or why it should not be forwarded).
241enum Upstreams<'a> {
242 /// A split-DNS route matched: forward to these route-specific upstreams (never DoH-delegated).
243 Route(&'a [DnsResolver]),
244 /// No route matched: forward to these recursive (fallback/global) resolvers. Eligible for
245 /// exit-node DoH delegation in the client serve loop.
246 Recursive(&'a [DnsResolver]),
247 /// A negative split-DNS route matched: do not resolve (NXDOMAIN).
248 Block,
249 /// No route and no resolver configured: fail closed (NXDOMAIN).
250 None,
251}
252
253/// What the (sync) decision step concluded for a query: either a complete response to send back,
254/// or a request to forward the original query to an upstream resolver.
255pub(crate) enum Decision {
256 /// A fully-formed response is ready to send.
257 Reply(Vec<u8>),
258 /// Forward the original query datagram to one of these upstream UDP resolvers; on success
259 /// relay the upstream answer, on failure/timeout answer with the prebuilt `servfail` buffer
260 /// (an off-tailnet name we failed to forward is a soft failure, not a cacheable non-existence —
261 /// Go forwarder.go:1297-1307).
262 Forward {
263 /// UDP upstreams to try, in order.
264 upstreams: Vec<SocketAddr>,
265 /// The original query bytes to forward verbatim.
266 query: Vec<u8>,
267 /// Fallback SERVFAIL response if every upstream fails or times out.
268 servfail: Vec<u8>,
269 /// Whether this is a *recursive* (catch-all fallback/global resolver) forward, as opposed
270 /// to a deliberately-configured split-DNS route. Only recursive forwards are eligible for
271 /// exit-node DoH delegation in the client serve loop (see [`DnsView::exit_doh`]); split-DNS
272 /// routes always stay on their configured upstreams (typically subnet-reachable internal
273 /// resolvers). The peerAPI DoH *server* ignores this flag entirely.
274 recursive: bool,
275 },
276}
277
278/// Whether `name` is `suffix` or sits under it at a label boundary: `"a.corp"` matches `"corp"`,
279/// `"acorp"` does not. An **empty** suffix never matches (defense-in-depth: an empty suffix would
280/// otherwise make `ends_with("")` match every name and either over-route or treat everything as a
281/// tailnet name — both leak-prone).
282fn suffix_matches(name: &str, suffix: &str) -> bool {
283 if suffix.is_empty() {
284 return false;
285 }
286 name == suffix
287 || (name.len() > suffix.len()
288 && name.ends_with(suffix)
289 && name.as_bytes()[name.len() - suffix.len() - 1] == b'.')
290}
291
292/// Returns `true` if `name` falls under one of the tailnet search domains. Such names are
293/// authoritative MagicDNS names and are NEVER forwarded to an upstream resolver — anti-leak: a
294/// tailnet name (and the fact that it was queried) must not escape to a third-party resolver.
295fn is_tailnet_name(view: &DnsView, name: &str) -> bool {
296 view.cfg
297 .search_domains
298 .iter()
299 .any(|suffix| suffix_matches(name, suffix))
300}
301
302/// Whether `name` is an IPv6 reverse-DNS (`PTR`) name (ends in `ip6.arpa`). This fork is IPv4-only
303/// on the tailnet; an IPv6 reverse lookup must NEVER be forwarded to a third-party resolver
304/// (anti-leak: it would reveal that a tailnet v6 address — e.g. a ULA `fd7a:…` — was probed). All
305/// such queries fail closed to NXDOMAIN.
306fn is_ip6_arpa(name: &str) -> bool {
307 suffix_matches(name, "ip6.arpa")
308}
309
310/// Whether `ip` is in the Tailscale CGNAT range `100.64.0.0/10` (RFC 6598, the tailnet IPv4 space).
311/// Reverse (`PTR`) queries for these addresses are authoritative to MagicDNS: if no peer owns the
312/// IP we fail closed to NXDOMAIN rather than forwarding the probe to a third-party resolver.
313fn is_tailnet_cgnat(ip: Ipv4Addr) -> bool {
314 let o = ip.octets();
315 o[0] == 100 && (64..=127).contains(&o[1])
316}
317
318/// Decide what to do with a single DNS query against `view`: either a complete response is ready
319/// ([`Decision::Reply`]), the query should be forwarded to upstream resolvers
320/// ([`Decision::Forward`]), or the packet should be dropped without answering (`None`).
321///
322/// Pure (no I/O), factored out of the socket loop so it can be unit-tested without a netstack. It
323/// never panics and fails closed: an unknown, unroutable, or tailnet-suffix name resolves to
324/// NXDOMAIN rather than leaking to an upstream resolver.
325pub(crate) fn decide(view: &DnsView, buf: &[u8]) -> Option<Decision> {
326 // Malformed / non-query input is dropped: we never answer something we can't parse.
327 let query = decode_query(buf).ok()?;
328 let q = &query.question;
329 let id = query.id;
330 // Echo the query's RD bit (and set RA when set) on the response — Go derives the response header
331 // from the query header.
332 let rd = query.recursion_desired;
333
334 let reply =
335 |rcode, answers: &[RData]| Decision::Reply(encode_response(id, q, rd, rcode, answers));
336
337 // Fail closed: MagicDNS off, or the node doesn't accept the tailnet's DNS config
338 // (`--accept-dns` / `CorpDNS` is false) => serve nothing. The `accept_dns` gate mirrors Go
339 // applying an empty `dns.Config` when `CorpDNS` is off: the node ignores the control-pushed DNS
340 // config and refuses every query. This one read site covers the netstack responder, the peerAPI
341 // DoH server that shares the view, and (via `tun_actor::plan_intercept`) the TUN query path.
342 if !view.cfg.magic_dns || !view.accept_dns {
343 return Some(reply(Rcode::Refused, &[]));
344 }
345
346 let canon = q.name.to_canon();
347
348 // We only serve the internet (IN) class authoritatively. A non-IN class (CHAOS, HESIOD, the
349 // ANY/255 class, ...) is NOT refused outright: Go's local resolver does no class check and
350 // forwards such a query like any other name. Treat it as an unsupported authoritative type —
351 // NODATA for a tailnet name, forward for an off-tailnet name — so a `CH TXT version.bind`
352 // diagnostic or a `qclass=ANY` probe reaches upstream instead of getting REFUSED.
353 const CLASS_IN: u16 = 1;
354 if q.qclass != CLASS_IN {
355 return Some(forward_or_nodata(view, &canon, buf, id, q, rd));
356 }
357
358 Some(match &q.qtype {
359 QType::A => match view.resolve_addr(&canon, true) {
360 Some(IpAddr::V4(v4)) => reply(Rcode::NoError, &[RData::A(v4.octets())]),
361 // No overlay/extra-record answer: try split-DNS / recursive upstreams.
362 _ => forward_or_nxdomain(view, &canon, buf, id, q, rd),
363 },
364 QType::Aaaa => match view.resolve_addr(&canon, false) {
365 // A tailnet/overlay/self (or extra-record) AAAA match. Gate on IPv6: with IPv6 OFF
366 // (default) the client is IPv4-only, so answering with the overlay v6 address would
367 // only hand out an unroutable address — dead connections plus a fingerprint. Return
368 // NoError with an empty answer (NODATA) instead. With the gate ON, answer from overlay
369 // data as historically. We never forward this name to a recursive upstream either way:
370 // a positive overlay match is authoritative.
371 Some(IpAddr::V6(v6)) if view.enable_ipv6 => {
372 reply(Rcode::NoError, &[RData::Aaaa(v6.octets())])
373 }
374 Some(IpAddr::V6(_)) => reply(Rcode::NoError, &[]),
375 // No overlay/extra-record answer: split-DNS / recursive upstreams (off-tailnet names);
376 // tailnet names fail closed to NXDOMAIN inside `forward_or_nxdomain`.
377 _ => forward_or_nxdomain(view, &canon, buf, id, q, rd),
378 },
379 QType::Ptr => match q.name.ptr_to_ipv4() {
380 Some(octets) => {
381 let v4: Ipv4Addr = octets.into();
382 let ip = IpAddr::V4(v4);
383 match view.node_by_ip(ip) {
384 Some(node) => {
385 let fqdn = node.fqdn(false);
386 let labels: Vec<String> = fqdn.split('.').map(str::to_owned).collect();
387 reply(Rcode::NoError, &[RData::Ptr(Name(labels))])
388 }
389 // Anti-leak: a reverse query for an IP in the tailnet CGNAT range
390 // (100.64.0.0/10) that misses the peer set is authoritative-but-unknown; fail
391 // closed to NXDOMAIN rather than leaking the probed tailnet IP upstream. Only
392 // genuinely off-tailnet reverse queries are forwarded.
393 None if is_tailnet_cgnat(v4) => reply(Rcode::NxDomain, &[]),
394 None => forward_or_nxdomain(view, &canon, buf, id, q, rd),
395 }
396 }
397 // Anti-leak / IPv4-only-tailnet: an IPv6 reverse (`ip6.arpa`) PTR must never be
398 // forwarded — relaying it would reveal that a tailnet v6 address (e.g. a ULA `fd7a:…`)
399 // was probed. Fail closed to NXDOMAIN, exactly like the IPv4 CGNAT guard above.
400 None if is_ip6_arpa(&canon) => reply(Rcode::NxDomain, &[]),
401 None => forward_or_nxdomain(view, &canon, buf, id, q, rd),
402 },
403 // Anything else (TXT, SRV, MX, HTTPS/SVCB, CNAME, ...): we hold no authoritative record of
404 // that type, so — like Go's resolver — forward it to upstream for an off-tailnet name and
405 // return NODATA (empty NOERROR) for a tailnet-authoritative name. NOT REFUSED: a stub reads
406 // REFUSED as "this server won't serve me" and abandons the resolver, which would break
407 // ordinary client lookups (notably HTTPS/SVCB type 65, issued routinely by browsers for
408 // HTTP/3 + ECH) for the same off-tailnet names whose A/AAAA already forward.
409 QType::Other(_) => forward_or_nodata(view, &canon, buf, id, q, rd),
410 })
411}
412
413/// For a name with no overlay answer, consult the split-DNS routes + recursive resolvers and
414/// either forward (to UDP upstreams), answer authoritatively absent (NXDOMAIN), or fail soft
415/// (SERVFAIL) when an off-tailnet name simply can't be forwarded.
416///
417/// Rcode parity with Go's resolver (`net/dns/resolver/tsdns.go` resolution order + `forwarder.go`):
418/// - A **tailnet-authoritative** name (search-domain suffix) or a **negative split-DNS route**
419/// (`Upstreams::Block` — a route configured with no resolvers, which Go answers authoritatively
420/// from Hosts, so an unmatched name under it is authoritatively absent) → **NXDOMAIN**.
421/// - An **off-tailnet** name we cannot forward — no route and no resolver configured
422/// (`Upstreams::None`), or a route whose resolvers are all filtered out (IPv6-only under the
423/// IPv4-only egress) → **SERVFAIL**, matching Go forwarder.go:1207 ("no upstream resolvers set,
424/// returning SERVFAIL"). A cacheable NXDOMAIN on a transient/structural inability to forward would
425/// make a downstream stub cache the *non-existence* of a real name; SERVFAIL is a soft failure the
426/// stub retries.
427///
428/// Anti-leak: a tailnet-suffix name is authoritative and is never forwarded — neither the name nor
429/// the query leaks to a third-party resolver. (The CGNAT `in-addr.arpa` / `ip6.arpa` reverse-zone
430/// NXDOMAIN guards live in the PTR arm of [`decide`] and are likewise unaffected.)
431fn forward_or_nxdomain(
432 view: &DnsView,
433 canon: &str,
434 buf: &[u8],
435 id: u16,
436 q: &ts_dns_wire::Question,
437 rd: bool,
438) -> Decision {
439 // NXDOMAIN for authoritative-absent names; SERVFAIL for an off-tailnet name we can't forward.
440 let nxdomain = encode_response(id, q, rd, Rcode::NxDomain, &[]);
441 let servfail = encode_response(id, q, rd, Rcode::ServFail, &[]);
442
443 if is_tailnet_name(view, canon) {
444 return Decision::Reply(nxdomain);
445 }
446
447 let (resolvers, recursive) = match view.route_for(canon) {
448 Upstreams::Route(resolvers) => (resolvers, false),
449 Upstreams::Recursive(resolvers) => (resolvers, true),
450 // A negative split-DNS route is authoritative-absent (Go answers it from Hosts): NXDOMAIN.
451 Upstreams::Block => return Decision::Reply(nxdomain),
452 // No route and no resolver: an off-tailnet name we have nowhere to forward — SERVFAIL, not
453 // a cacheable non-existence (Go forwarder.go:1207).
454 Upstreams::None => return Decision::Reply(servfail),
455 };
456
457 let upstreams: Vec<SocketAddr> = resolvers
458 .iter()
459 .map(DnsResolver::udp_addr)
460 // Anti-leak / IPv6-off: only forward over IPv4 upstreams; never open a v6 socket.
461 .filter(SocketAddr::is_ipv4)
462 .collect();
463 if upstreams.is_empty() {
464 // We had a route but every resolver was filtered out (IPv6-only): we cannot forward this
465 // off-tailnet name, so soft-fail rather than assert non-existence.
466 Decision::Reply(servfail)
467 } else {
468 Decision::Forward {
469 upstreams,
470 query: buf.to_vec(),
471 // All upstreams failing at runtime is also an inability to forward, not a non-existence
472 // (Go forwarder.go:1297-1307): hand the forwarder a SERVFAIL fallback, not NXDOMAIN.
473 servfail,
474 recursive,
475 }
476 }
477}
478
479/// The DNS query types Go's resolver explicitly leaves unimplemented for a tailnet-authoritative
480/// name, answering `RCodeNotImplemented` (NOTIMP) rather than NODATA (`net/dns/resolver/tsdns.go`
481/// `resolveLocal`: `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO`). The numeric type
482/// codes: NS=2, SOA=6, HINFO=13, AXFR=252.
483fn is_unimplemented_tailnet_qtype(qtype: &ts_dns_wire::QType) -> bool {
484 matches!(qtype, ts_dns_wire::QType::Other(2 | 6 | 13 | 252))
485}
486
487/// For a query whose *qtype/qclass* we don't serve authoritatively (anything other than an IN-class
488/// A/AAAA/PTR — e.g. TXT, SRV, MX, HTTPS/SVCB, or a CHAOS-class query): forward it to upstream like
489/// any other name, but for a tailnet-authoritative name return an empty NOERROR (NODATA) instead of
490/// NXDOMAIN — except the NS/SOA/HINFO/AXFR types Go answers NOTIMP for
491/// ([`is_unimplemented_tailnet_qtype`]).
492///
493/// This mirrors Go's resolver: an authoritative name with no record of the requested type returns
494/// `RCodeSuccess` with no answers ("the name exists, but no records of that type"), NOT NXDOMAIN and
495/// NOT REFUSED; a non-authoritative name is forwarded verbatim regardless of qtype. The fork
496/// previously REFUSED every non-A/AAAA/PTR qtype (and every non-IN class) for *all* names, which a
497/// stub resolver reads as "this server won't serve me" — so it would abandon the resolver, breaking
498/// ordinary client lookups (HTTPS/SVCB type 65 issued routinely by browsers for HTTP/3 + ECH, plus
499/// MX/TXT/SRV) for off-tailnet names that A/AAAA queries already forward. Refusing these was never an
500/// anti-leak measure (the same name's A/AAAA already egresses); it was just broken interop.
501///
502/// Anti-leak is preserved: a tailnet-suffix name still never leaves this node (NODATA, not forward),
503/// exactly as the A/AAAA path keeps a positive overlay match authoritative.
504fn forward_or_nodata(
505 view: &DnsView,
506 canon: &str,
507 buf: &[u8],
508 id: u16,
509 q: &ts_dns_wire::Question,
510 rd: bool,
511) -> Decision {
512 // Authoritative tailnet name. For most unsupported types we answer NODATA (empty NOERROR) — the
513 // name exists, we just hold no record of that type. But a small set of types Go's resolver
514 // *explicitly* leaves unimplemented (`net/dns/resolver/tsdns.go` `resolveLocal`:
515 // `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO: return RCodeNotImplemented`) must
516 // answer NOTIMP, not NODATA — a `dig NS`/`SOA`/`HINFO` against the tailnet zone is otherwise a
517 // clean fingerprint distinguishing this fork from real tailscaled. Off-tailnet names are
518 // unaffected (they forward below regardless of type); this NOTIMP applies only to a name we are
519 // authoritative for.
520 if is_tailnet_name(view, canon) {
521 let rcode = if is_unimplemented_tailnet_qtype(&q.qtype) {
522 Rcode::NotImpl
523 } else {
524 Rcode::NoError
525 };
526 return Decision::Reply(encode_response(id, q, rd, rcode, &[]));
527 }
528 // Anti-leak parity with the `QType::Ptr` arm: a reverse query for a tailnet CGNAT IPv4
529 // (100.64.0.0/10) or ANY `ip6.arpa` name must NEVER egress to an upstream resolver, regardless
530 // of qtype/class — forwarding it would reveal that a specific tailnet IP was probed. The PTR arm
531 // enforces this (NXDOMAIN) but its guards live only inside that arm; without re-checking here, an
532 // exotic-qtype (TXT/ANY/…) or non-IN-class query for a tailnet reverse name would slip through to
533 // the forward path below. Fail closed to NXDOMAIN, matching the PTR arm's disposition.
534 if is_ip6_arpa(canon) {
535 return Decision::Reply(encode_response(id, q, rd, Rcode::NxDomain, &[]));
536 }
537 if let Some(octets) = q.name.ptr_to_ipv4()
538 && is_tailnet_cgnat(octets.into())
539 {
540 return Decision::Reply(encode_response(id, q, rd, Rcode::NxDomain, &[]));
541 }
542 // Off-tailnet, non-reverse-zone: forward verbatim. `forward_or_nxdomain` already forwards
543 // non-tailnet names and soft-fails (SERVFAIL) when no upstream is configured/routable; reuse it
544 // (the tailnet branch above is already handled, so its tailnet→NXDOMAIN and negative-route paths
545 // are unreachable here — this only exercises its off-tailnet forward / SERVFAIL dispositions).
546 forward_or_nxdomain(view, canon, buf, id, q, rd)
547}
548
549/// Client-side plan for a *recursive* forward: keep resolving over local UDP upstreams, or delegate
550/// the query to the active exit node's peerAPI DoH endpoint over the overlay.
551#[derive(Debug, PartialEq, Eq)]
552pub(crate) enum RecursivePlan {
553 /// Forward over UDP to these upstreams. Used when no exit node is active, or when the config
554 /// has `use_with_exit_node` resolvers (kept local even with an exit node selected).
555 Udp(Vec<SocketAddr>),
556 /// Delegate the query to the exit node's peerAPI DoH server at this overlay address.
557 Doh(SocketAddr),
558}
559
560/// Decide whether a recursive forward should stay on local UDP upstreams or be delegated to the
561/// active exit node's DoH endpoint. Pure (no I/O) so the delegation rule is unit-testable.
562///
563/// - No active exit node ([`DnsView::exit_doh`] is `None`) => keep `default_upstreams` (UDP).
564/// - Exit node active, but the config has [`use_with_exit_node`][ts_control::DnsResolver::use_with_exit_node]
565/// resolvers => those resolvers stay local (Go keeps `UseWithExitNode` resolvers when an exit node
566/// is selected); forward to them over UDP, do NOT delegate.
567/// - Exit node active, no kept-local resolvers => delegate to the exit node's DoH. Recursive DNS
568/// then egresses from the exit node, not this host (the whole point of routing through an exit
569/// node: this node's real IP is never used to resolve the peer's public names).
570pub(crate) fn recursive_plan(view: &DnsView, default_upstreams: Vec<SocketAddr>) -> RecursivePlan {
571 let Some(doh) = view.exit_doh else {
572 return RecursivePlan::Udp(default_upstreams);
573 };
574 let kept: Vec<SocketAddr> = view
575 .cfg
576 .resolvers_with_exit_node()
577 .map(DnsResolver::udp_addr)
578 // Anti-leak / IPv6-off: only ever resolve over IPv4 upstreams; never open a v6 socket.
579 .filter(SocketAddr::is_ipv4)
580 .collect();
581 if kept.is_empty() {
582 RecursivePlan::Doh(doh)
583 } else {
584 RecursivePlan::Udp(kept)
585 }
586}
587
588/// Cap a forwarded upstream response to a single UDP datagram ([`MAX_UPSTREAM_RESPONSE`]). When the
589/// response is too large it is truncated mid-message, so we set the `TC` (truncation) flag in the
590/// DNS header (byte 2, bit `0x02`) telling the stub resolver to retry over TCP — relaying a chopped
591/// answer without `TC` would surface a malformed-but-"complete" message. The flag is only set when
592/// truncation actually occurs.
593fn cap_response(mut resp: Vec<u8>) -> Vec<u8> {
594 if resp.len() > MAX_UPSTREAM_RESPONSE {
595 resp.truncate(MAX_UPSTREAM_RESPONSE);
596 // The header is 12 bytes; the TC bit lives in the second flags byte (header byte 2). A
597 // capped datagram is always >= the header length, but guard anyway to never panic.
598 if let Some(flags_hi) = resp.get_mut(2) {
599 *flags_hi |= 0x02;
600 }
601 }
602 resp
603}
604
605/// The byte length of a fixed DNS header.
606const DNS_HEADER_LEN: usize = 12;
607
608/// Return the byte range of the first question section (QNAME + QTYPE + QCLASS) within `msg`,
609/// starting just after the 12-byte header. Returns [`None`] if the name is malformed, uses a
610/// compression pointer (illegal in a question), or runs past the buffer. Used to byte-compare a
611/// forwarded query's question against the upstream response's question.
612fn question_range(msg: &[u8]) -> Option<std::ops::Range<usize>> {
613 let mut off = DNS_HEADER_LEN;
614 // Walk the QNAME label sequence to the terminating root label (0x00).
615 loop {
616 let len = *msg.get(off)? as usize;
617 // A compression pointer (top two bits set) is not valid in a question section.
618 if len & 0xC0 != 0 {
619 return None;
620 }
621 off += 1;
622 if len == 0 {
623 break; // root label: QNAME complete.
624 }
625 off = off.checked_add(len)?;
626 if off > msg.len() {
627 return None;
628 }
629 }
630 // QTYPE (2) + QCLASS (2) follow the name.
631 let end = off.checked_add(4)?;
632 if end > msg.len() {
633 return None;
634 }
635 Some(DNS_HEADER_LEN..end)
636}
637
638/// Whether `resp` is a plausible DNS response to `query`: same 16-bit transaction id, the QR
639/// (response) bit set, and a byte-identical question section (QNAME + QTYPE + QCLASS). Both buffers
640/// carry the DNS header in the first 12 bytes (id at [0..2], flags at [2..4], QR is the high bit of
641/// byte 2). Used to reject off-path/forged datagrams before relaying them back to the stub resolver
642/// as authoritative: matching only the id + QR lets an injector that guesses the id swap in an
643/// answer for a different question, so we also require the echoed question to match.
644fn response_matches_query(query: &[u8], resp: &[u8]) -> bool {
645 if query.len() < DNS_HEADER_LEN || resp.len() < DNS_HEADER_LEN {
646 return false;
647 }
648 let id_matches = query[0..2] == resp[0..2];
649 let is_response = resp[2] & 0x80 != 0;
650 if !id_matches || !is_response {
651 return false;
652 }
653 // The response must echo the exact question we asked. Parse both question sections and compare
654 // their bytes; a parse failure on either side is treated as a non-match (fail closed).
655 match (question_range(query), question_range(resp)) {
656 (Some(q), Some(r)) => query[q] == resp[r],
657 _ => false,
658 }
659}
660
661/// Forward `query` to each upstream in order over the **overlay** netstack, returning the first
662/// well-formed response, or the prebuilt `fallback` buffer if every upstream times out or errors.
663///
664/// The caller supplies `fallback` (a SERVFAIL response for a forwarded off-tailnet name — an
665/// all-upstream failure is a soft "couldn't resolve", not a cacheable non-existence, matching Go
666/// forwarder.go:1297-1307). Keeping it caller-supplied means this fn is rcode-agnostic.
667///
668/// Anti-leak: forwarding goes through the overlay netstack `channel` (a fresh `0.0.0.0:0` overlay
669/// UDP socket per query), NEVER a host socket — so the real origin IP can't leak to the resolver,
670/// and split-DNS upstreams reachable only over the tailnet/subnet-router work. Each upstream is
671/// bounded by [`UPSTREAM_TIMEOUT`]; responses are capped at [`MAX_UPSTREAM_RESPONSE`].
672pub(crate) async fn forward_query(
673 channel: &Channel,
674 upstreams: &[SocketAddr],
675 query: &[u8],
676 fallback: Vec<u8>,
677) -> Vec<u8> {
678 for upstream in upstreams {
679 let socket = match channel
680 .udp_bind(SocketAddr::from((Ipv4Addr::UNSPECIFIED, 0)))
681 .await
682 {
683 Ok(s) => s,
684 Err(e) => {
685 tracing::warn!(error = %e, %upstream, "magic dns upstream bind failed");
686 continue;
687 }
688 };
689
690 if let Err(e) = socket.send_to(*upstream, query).await {
691 tracing::warn!(error = %e, %upstream, "magic dns upstream send failed");
692 continue;
693 }
694
695 match timeout(UPSTREAM_TIMEOUT, socket.recv_from_bytes()).await {
696 Ok(Ok((from, resp))) if !resp.is_empty() => {
697 // Anti-poisoning: only accept a datagram that came from the upstream we queried
698 // and whose DNS header matches this query (same transaction id, QR=response bit
699 // set). An off-path injector racing the real answer is otherwise relayed straight
700 // back to the stub resolver as authoritative.
701 if from.ip() != upstream.ip() || !response_matches_query(query, &resp) {
702 tracing::debug!(%upstream, %from, "magic dns dropping unsolicited/mismatched response");
703 continue;
704 }
705 return cap_response(resp.to_vec());
706 }
707 Ok(Ok(_)) => continue,
708 Ok(Err(e)) => {
709 tracing::warn!(error = %e, %upstream, "magic dns upstream recv failed");
710 continue;
711 }
712 Err(_) => {
713 tracing::debug!(%upstream, "magic dns upstream timed out");
714 continue;
715 }
716 }
717 }
718 fallback
719}
720
721/// Run the receive/answer loop for the bound socket until it (or the netstack) goes away.
722///
723/// Authoritative answers are sent inline. Forwarded queries are handled on spawned tasks (each
724/// cloning the overlay `channel`) so a slow upstream never blocks other queries.
725async fn serve(
726 socket: netstack::netsock::UdpSocket,
727 rx: watch::Receiver<Arc<DnsView>>,
728 channel: Channel,
729) {
730 let socket = Arc::new(socket);
731 let mut forwards = JoinSet::new();
732 // Bounds concurrent in-flight forwards (see `MAX_INFLIGHT_FORWARDS`); a permit is held for the
733 // lifetime of each spawned forward task and released on completion.
734 let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
735 loop {
736 let (src, buf) = match socket.recv_from_bytes().await {
737 Ok(pkt) => pkt,
738 Err(e) => {
739 tracing::warn!(error = %e, "magic dns socket recv failed, stopping responder");
740 return;
741 }
742 };
743
744 // Read the freshest view per packet.
745 let view = rx.borrow().clone();
746
747 match decide(&view, &buf) {
748 // Malformed query: drop silently.
749 None => continue,
750 Some(Decision::Reply(resp)) => {
751 if let Err(e) = socket.send_to(src, &resp).await {
752 tracing::warn!(error = %e, %src, "magic dns response send failed");
753 }
754 }
755 Some(Decision::Forward {
756 upstreams,
757 query,
758 servfail,
759 recursive,
760 }) => {
761 // A recursive forward is eligible for exit-node DoH delegation; a split-DNS route
762 // always stays on its configured upstreams. Decide the plan against the current
763 // view so a query routed while an exit node is active egresses from that exit node.
764 let plan = if recursive {
765 recursive_plan(&view, upstreams)
766 } else {
767 RecursivePlan::Udp(upstreams)
768 };
769 // Fail closed at the in-flight cap: drop the query (the stub resolver retries or
770 // times out) rather than spawn an unbounded task that pins an overlay socket for up
771 // to UPSTREAM_TIMEOUT. The permit is moved into the task as a named `_permit` binding
772 // (NOT `let _ =`, which would drop it immediately) so it is released only when the
773 // task body completes.
774 let Ok(permit) = inflight.clone().try_acquire_owned() else {
775 tracing::warn!(
776 %src,
777 max = MAX_INFLIGHT_FORWARDS,
778 "magic dns drop: at max in-flight forwarded queries"
779 );
780 continue;
781 };
782 let socket = socket.clone();
783 let channel = channel.clone();
784 forwards.spawn(async move {
785 let _permit = permit;
786 let resp = match plan {
787 RecursivePlan::Udp(upstreams) => {
788 forward_query(&channel, &upstreams, &query, servfail).await
789 }
790 RecursivePlan::Doh(doh_addr) => {
791 crate::peerapi_doh::forward_doh(&channel, doh_addr, &query, servfail)
792 .await
793 }
794 };
795 if let Err(e) = socket.send_to(src, &resp).await {
796 tracing::warn!(error = %e, %src, "magic dns forwarded response send failed");
797 }
798 });
799 }
800 }
801
802 // Reap finished forward tasks without blocking. The unreaped completed-handle backlog is
803 // bounded by MAX_INFLIGHT_FORWARDS (a task spawns only after acquiring a permit, and there
804 // are at most that many), so this bounds JoinSet memory too — not just the reap cadence.
805 while forwards.try_join_next().is_some() {}
806 }
807}
808
809/// The MagicDNS responder actor.
810///
811/// Subscribes to control state (for the DNS config + self node) and peer state (for the peer
812/// database), keeping a [`DnsView`] that the spawned answer loop reads for every query.
813pub struct MagicDnsActor {
814 /// Keeps the socket-serving task alive for the lifetime of the actor.
815 _joinset: JoinSet<()>,
816 /// The latest view, shared with the answer loop.
817 view_tx: watch::Sender<Arc<DnsView>>,
818 /// The runtime [`Env`], retained so each view rebuild (the `StateUpdate` / `PeerState` handlers)
819 /// can re-read the live [`Env::accept_dns`] cell. Unlike `enable_ipv6` (snapshotted once at
820 /// spawn), `accept_dns` is runtime-settable via `Device::set_accept_dns`, so it must be read at
821 /// rebuild time — not captured once — for a toggle to reach the served view.
822 env: Env,
823 /// The overlay channel, retained so the [`Query`] handler can run a query through the same
824 /// forward path the serve loop uses ([`forward_query`] / [`forward_doh`], both binding
825 /// `0.0.0.0:0` on this channel — never a host socket).
826 channel: Channel,
827}
828
829/// A programmatic DNS query routed through the live MagicDNS responder (the `100.100.100.100` path),
830/// for [`Device::query_dns`](crate::Device::query_dns). The handler synthesizes a query packet and
831/// drives it through the exact same [`decide`]/forward logic as an on-the-wire query, so the result
832/// (and its anti-leak posture) matches what a tailnet client would observe.
833pub struct Query {
834 /// The canonical name to resolve (e.g. `example.com`, no trailing dot).
835 pub name: String,
836 /// The DNS query type (`1`=A, `28`=AAAA, `12`=PTR, or any other RFC 1035 TYPE).
837 pub qtype: u16,
838}
839
840/// The outcome of a `Query`: the raw DNS response bytes, the RCODE, and which upstream resolvers
841/// (if any) were consulted. The response is returned as raw bytes (matching Go `LocalClient.QueryDNS`)
842/// rather than parsed records — this fork's wire codec has no answer-record decoder.
843///
844/// (`Query` is the crate-internal actor message; not linked here as it is a private item — a
845/// `pub` doc cannot intra-doc-link to it without erroring under the doc-lint gate.)
846#[derive(Debug, Clone, kameo::Reply)]
847pub struct DnsQueryResult {
848 /// The raw DNS response datagram (header + question + any answer records).
849 pub response: Vec<u8>,
850 /// The RCODE from the response header's low 4 bits (`0`=NoError, `2`=SERVFAIL, `3`=NXDOMAIN,
851 /// `5`=Refused, …).
852 pub rcode: u8,
853 /// The upstream resolver(s) the query was forwarded to. For a UDP forward this is the candidate
854 /// list tried in order (the forwarder returns on the first that answers); for an exit-node DoH
855 /// forward it is the single DoH endpoint. Empty for a locally-answered query (an authoritative
856 /// tailnet name, a NODATA, or a fail-closed NXDOMAIN — nothing egressed).
857 pub resolvers_consulted: Vec<SocketAddr>,
858}
859
860impl kameo::Actor for MagicDnsActor {
861 type Args = (Env, Channel);
862 type Error = Error;
863
864 async fn on_start(
865 (env, channel): Self::Args,
866 slf: ActorRef<Self>,
867 ) -> Result<Self, Self::Error> {
868 env.subscribe::<Arc<ts_control::StateUpdate>>(&slf).await?;
869 env.subscribe::<Arc<PeerState>>(&slf).await?;
870 env.subscribe::<crate::route_updater::ActiveExitNode>(&slf)
871 .await?;
872
873 // Seed the view with the runtime's IPv6 gate (default off) and the current accept-dns value.
874 // Subsequent control/peer updates clone-and-modify this view: `enable_ipv6` (set once here)
875 // is preserved, while `accept_dns` is re-read live from `Env` on every rebuild (it is
876 // runtime-settable). The seed value is moot — no query is served before the first
877 // StateUpdate — but seeding it keeps the pre-update view internally consistent.
878 let (view_tx, view_rx) = watch::channel(Arc::new(DnsView {
879 enable_ipv6: env.enable_ipv6,
880 accept_dns: env.accept_dns(),
881 ..DnsView::default()
882 }));
883
884 let mut joinset = JoinSet::new();
885
886 // Bind the MagicDNS socket. If the bind fails we still start (fail closed: the actor just
887 // never answers anything) so a transient bind error doesn't take down the runtime.
888 let addr = SocketAddr::from((MAGIC_DNS_IP, MAGIC_DNS_PORT));
889 match channel.udp_bind(addr).await {
890 Ok(socket) => {
891 tracing::debug!(%addr, "magic dns responder bound");
892 joinset.spawn(serve(socket, view_rx.clone(), channel.clone()));
893 }
894 Err(e) => {
895 tracing::error!(error = %e, %addr, "magic dns udp bind failed; responder inert");
896 }
897 }
898
899 // When this node advertises a peerAPI port, run the single peerAPI server on the same shared
900 // view. It routes `/dns-query` to the exit-node DoH handler (recursive resolution gated by
901 // `forward_exit_egress`, see `peerapi_doh`) and `/v0/put/<name>` to the Taildrop receive
902 // handler when a store is configured (access-gated, fail-closed, see `peerapi`).
903 if let Some(port) = env.peerapi_port {
904 let channel = channel.clone();
905 let view_rx = view_rx.clone();
906 let forward_exit_egress = env.forward_exit_egress;
907 let taildrop = env.taildrop_store.clone();
908 let funnel_ingress = env.funnel_ingress.clone();
909 joinset.spawn(crate::peerapi::serve(
910 channel,
911 port,
912 view_rx,
913 forward_exit_egress,
914 taildrop,
915 funnel_ingress,
916 ));
917 }
918
919 Ok(Self {
920 _joinset: joinset,
921 view_tx,
922 env,
923 channel,
924 })
925 }
926}
927
928/// A bare SERVFAIL response header for a [`Query`] whose name could not be encoded into a
929/// well-formed query (a non-ASCII label or an over-255-byte name). A 12-byte header with QR=1 (this
930/// is a response) and RCODE=2 (server failure); no question or answer section (we never produced a
931/// parseable question). Lets `query_dns` return a definite, honest RCODE instead of an empty buffer
932/// that would read back as a fabricated NoError.
933fn servfail_response() -> Vec<u8> {
934 let mut resp = vec![0u8; 12];
935 // Flags: QR=1 (byte 2, 0x80) + RCODE=2 (low nibble of byte 3). All other bits clear.
936 resp[2] = 0x80;
937 resp[3] = 0x02;
938 resp
939}
940
941impl Message<Query> for MagicDnsActor {
942 type Reply = DnsQueryResult;
943
944 async fn handle(&mut self, query: Query, _ctx: &mut Context<Self, Self::Reply>) -> Self::Reply {
945 // Synthesize a query packet and drive it through the SAME decide/forward path the serve loop
946 // uses, against the freshest view — so the result and its anti-leak posture exactly match an
947 // on-the-wire query. The id is fixed (0): a programmatic query has no concurrent-demux need,
948 // and `response_matches_query` validates the echoed id against this same buffer.
949 //
950 // Normalize the name into labels: strip a single trailing dot (an FQDN's root marker — Go's
951 // `dnsname.ToFQDN` does the same) and drop empty labels. An empty label would otherwise encode
952 // as a lone `0x00`, identical to the QNAME root terminator, truncating the wire query and
953 // corrupting the QTYPE/QCLASS that follow.
954 let trimmed = query.name.strip_suffix('.').unwrap_or(&query.name);
955 let labels: Vec<String> = trimmed
956 .split('.')
957 .filter(|label| !label.is_empty())
958 .map(str::to_owned)
959 .collect();
960 let qtype = match query.qtype {
961 1 => ts_dns_wire::QType::A,
962 28 => ts_dns_wire::QType::Aaaa,
963 12 => ts_dns_wire::QType::Ptr,
964 other => ts_dns_wire::QType::Other(other),
965 };
966 // Class IN (1) — the only class the responder serves authoritatively (a non-IN class still
967 // forwards via `forward_or_nodata`, matching the on-the-wire path).
968 let buf = ts_dns_wire::encode_query(0, &ts_dns_wire::Name(labels), &qtype, 1);
969
970 let view = self.view_tx.borrow().clone();
971
972 let (response, resolvers_consulted) = match decide(&view, &buf) {
973 // `decide` returns `None` only when `decode_query` rejects the buffer we just built. With
974 // the name normalized above that can still happen for a name `encode_query` accepts but
975 // `decode_query` rejects — a non-ASCII/IDN label (the caller must pass punycode) or a name
976 // whose wire form exceeds 255 bytes. Surface a SERVFAIL (RCODE 2: "could not process")
977 // rather than an empty buffer that would read back as a fabricated NoError. The serve loop
978 // silently drops here (the on-wire client times out); a programmatic caller gets a
979 // definite, honest error instead.
980 None => (servfail_response(), Vec::new()),
981 Some(Decision::Reply(resp)) => (resp, Vec::new()),
982 Some(Decision::Forward {
983 upstreams,
984 query,
985 servfail,
986 recursive,
987 }) => {
988 let plan = if recursive {
989 recursive_plan(&view, upstreams)
990 } else {
991 RecursivePlan::Udp(upstreams)
992 };
993 match plan {
994 RecursivePlan::Udp(upstreams) => {
995 let resp = forward_query(&self.channel, &upstreams, &query, servfail).await;
996 (resp, upstreams)
997 }
998 RecursivePlan::Doh(doh_addr) => {
999 let resp = crate::peerapi_doh::forward_doh(
1000 &self.channel,
1001 doh_addr,
1002 &query,
1003 servfail,
1004 )
1005 .await;
1006 // The query egressed via the exit node's DoH endpoint, not a local UDP
1007 // upstream — report the DoH address as the resolver consulted.
1008 (resp, vec![doh_addr])
1009 }
1010 }
1011 }
1012 };
1013
1014 // RCODE is the low 4 bits of the second flags byte (header byte 3).
1015 let rcode = response.get(3).map(|b| b & 0x0F).unwrap_or(0);
1016
1017 DnsQueryResult {
1018 response,
1019 rcode,
1020 resolvers_consulted,
1021 }
1022 }
1023}
1024
1025impl Message<Arc<ts_control::StateUpdate>> for MagicDnsActor {
1026 type Reply = ();
1027
1028 async fn handle(
1029 &mut self,
1030 update: Arc<ts_control::StateUpdate>,
1031 _ctx: &mut Context<Self, Self::Reply>,
1032 ) {
1033 // Re-read the live accept-dns cell on every rebuild (it is runtime-settable via
1034 // `Device::set_accept_dns`); `enable_ipv6` is preserved from the seed (set once at spawn).
1035 let accept_dns = self.env.accept_dns();
1036 self.view_tx.send_modify(|view| {
1037 let mut next = (**view).clone();
1038 next.cfg = update.dns_config.clone().unwrap_or_default();
1039 next.self_node = update.node.clone();
1040 next.accept_dns = accept_dns;
1041 *view = Arc::new(next);
1042 });
1043 }
1044}
1045
1046impl Message<Arc<PeerState>> for MagicDnsActor {
1047 type Reply = ();
1048
1049 async fn handle(&mut self, state: Arc<PeerState>, _ctx: &mut Context<Self, Self::Reply>) {
1050 // Re-read the live accept-dns cell on every rebuild: `Device::set_accept_dns` triggers a
1051 // `RepublishState` that lands here, so this is the path that re-applies the gate after a
1052 // runtime toggle (covers the netstack responder AND the peerAPI DoH server sharing the view).
1053 let accept_dns = self.env.accept_dns();
1054 self.view_tx.send_modify(|view| {
1055 let mut next = (**view).clone();
1056 next.peers = Some(state.peers.clone());
1057 next.accept_dns = accept_dns;
1058 *view = Arc::new(next);
1059 });
1060 }
1061}
1062
1063impl Message<crate::route_updater::ActiveExitNode> for MagicDnsActor {
1064 type Reply = ();
1065
1066 async fn handle(
1067 &mut self,
1068 active: crate::route_updater::ActiveExitNode,
1069 _ctx: &mut Context<Self, Self::Reply>,
1070 ) {
1071 // Cache the active exit node's DoH endpoint so the serve loop delegates recursive queries
1072 // to it. `None` (no exit node, or one that can't proxy DNS) keeps recursion local. Resolving
1073 // the address here — once, from the route updater's authoritative selection — means the
1074 // serve loop never re-resolves the selector.
1075 let exit_doh = active.node.as_ref().and_then(|n| n.peerapi_doh_addr());
1076 self.view_tx.send_modify(|view| {
1077 let mut next = (**view).clone();
1078 next.exit_doh = exit_doh;
1079 *view = Arc::new(next);
1080 });
1081 }
1082}
1083
1084#[cfg(test)]
1085mod tests {
1086 use ts_control::{StableNodeId, TailnetAddress};
1087
1088 use super::*;
1089
1090 /// Test wrapper: run [`decide`] and extract the reply bytes. These tests configure no
1091 /// upstream resolvers, so an unresolved name fails closed to a `Reply` (NXDOMAIN), never a
1092 /// `Forward`; a `Forward` here is a bug and panics.
1093 fn answer(view: &DnsView, buf: &[u8]) -> Option<Vec<u8>> {
1094 match decide(view, buf)? {
1095 Decision::Reply(resp) => Some(resp),
1096 Decision::Forward { .. } => panic!("unexpected forward in authoritative-only test"),
1097 }
1098 }
1099
1100 /// Build a `Node` named `host.user.ts.net` with a known v4/v6 tailnet address.
1101 fn test_node() -> Node {
1102 Node {
1103 id: 1,
1104 stable_id: StableNodeId("n1".to_string()),
1105 hostname: "host".to_string(),
1106 user_id: 0,
1107 tailnet: Some("user.ts.net".to_string()),
1108 tags: vec![],
1109 tailnet_address: TailnetAddress {
1110 ipv4: "100.64.0.1/32".parse().unwrap(),
1111 ipv6: "fd7a::1/128".parse().unwrap(),
1112 },
1113 node_key: [0u8; 32].into(),
1114 node_key_expiry: None,
1115 online: None,
1116 last_seen: None,
1117 key_signature: vec![],
1118 machine_key: None,
1119 disco_key: None,
1120 accepted_routes: vec![],
1121 underlay_addresses: vec![],
1122 derp_region: None,
1123 cap: Default::default(),
1124 cap_map: Default::default(),
1125 peerapi_port: None,
1126 peerapi_dns_proxy: false,
1127 is_wireguard_only: false,
1128 exit_node_dns_resolvers: vec![],
1129 peer_relay: false,
1130 service_vips: Default::default(),
1131 }
1132 }
1133
1134 /// A view with MagicDNS on and a single peer in the db.
1135 fn view_with_peer() -> DnsView {
1136 let mut db = PeerDb::default();
1137 db.upsert(&test_node());
1138
1139 DnsView {
1140 cfg: DnsConfig {
1141 magic_dns: true,
1142 search_domains: vec!["user.ts.net".to_string()],
1143 ..Default::default()
1144 },
1145 peers: Some(Arc::new(db)),
1146 self_node: None,
1147 exit_doh: None,
1148 enable_ipv6: false,
1149 accept_dns: true,
1150 }
1151 }
1152
1153 /// Build a raw DNS query buffer for `labels` with the given id, qtype, qclass.
1154 fn build_query(id: u16, labels: &[&str], qtype: u16, qclass: u16) -> Vec<u8> {
1155 let mut buf: Vec<u8> = Vec::new();
1156 buf.extend_from_slice(&id.to_be_bytes());
1157 buf.extend_from_slice(&0u16.to_be_bytes()); // flags: QR=0 (query)
1158 buf.extend_from_slice(&1u16.to_be_bytes()); // QDCOUNT
1159 buf.extend_from_slice(&0u16.to_be_bytes()); // ANCOUNT
1160 buf.extend_from_slice(&0u16.to_be_bytes()); // NSCOUNT
1161 buf.extend_from_slice(&0u16.to_be_bytes()); // ARCOUNT
1162 for label in labels {
1163 buf.push(label.len() as u8);
1164 buf.extend_from_slice(label.as_bytes());
1165 }
1166 buf.push(0); // root label
1167 buf.extend_from_slice(&qtype.to_be_bytes());
1168 buf.extend_from_slice(&qclass.to_be_bytes());
1169 buf
1170 }
1171
1172 /// Parse a response header: returns `(id, rcode, ancount)`.
1173 fn parse_header(resp: &[u8]) -> (u16, u8, u16) {
1174 let id = u16::from_be_bytes([resp[0], resp[1]]);
1175 let flags = u16::from_be_bytes([resp[2], resp[3]]);
1176 let ancount = u16::from_be_bytes([resp[6], resp[7]]);
1177 (id, (flags & 0x000F) as u8, ancount)
1178 }
1179
1180 #[test]
1181 fn a_query_for_known_peer_answers_v4() {
1182 let view = view_with_peer();
1183 let buf = build_query(0x1234, &["host", "user", "ts", "net"], 1, 1);
1184
1185 let resp = answer(&view, &buf).expect("answers");
1186 let (id, rcode, ancount) = parse_header(&resp);
1187 assert_eq!(id, 0x1234);
1188 assert_eq!(rcode, 0, "NoError");
1189 assert_eq!(ancount, 1);
1190
1191 // The trailing RDATA of the single A record is the peer's tailnet v4 octets.
1192 let tail = &resp[resp.len() - 4..];
1193 assert_eq!(tail, &[100, 64, 0, 1]);
1194 }
1195
1196 #[test]
1197 fn aaaa_query_for_known_peer_is_nodata_when_ipv6_off() {
1198 // Gate OFF (default): an AAAA query for a known overlay peer must return NoError with an
1199 // empty answer (NODATA) — NOT the overlay v6 address, which the IPv4-only client can't
1200 // route. This is the anti-fingerprint / no-dead-connections posture.
1201 let view = view_with_peer();
1202 assert!(!view.enable_ipv6, "default gate is off");
1203 let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1204
1205 let resp = answer(&view, &buf).expect("answers");
1206 let (_, rcode, ancount) = parse_header(&resp);
1207 assert_eq!(rcode, 0, "NoError (NODATA)");
1208 assert_eq!(ancount, 0, "empty answer: no AAAA handed out with IPv6 off");
1209 }
1210
1211 #[test]
1212 fn a_query_still_resolves_when_ipv6_off() {
1213 // Gate OFF must not touch the A (v4) path: the v4 answer is byte-for-byte unchanged.
1214 let view = view_with_peer();
1215 let buf = build_query(0x6, &["host", "user", "ts", "net"], 1, 1);
1216
1217 let resp = answer(&view, &buf).expect("answers");
1218 let (_, rcode, ancount) = parse_header(&resp);
1219 assert_eq!(rcode, 0, "NoError");
1220 assert_eq!(ancount, 1);
1221 let tail = &resp[resp.len() - 4..];
1222 assert_eq!(tail, &[100, 64, 0, 1]);
1223 }
1224
1225 #[test]
1226 fn aaaa_query_for_known_peer_answers_v6_when_ipv6_on() {
1227 // Gate ON: historical behavior — answer AAAA from the overlay v6 address.
1228 let mut view = view_with_peer();
1229 view.enable_ipv6 = true;
1230 let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1231
1232 let resp = answer(&view, &buf).expect("answers");
1233 let (_, rcode, ancount) = parse_header(&resp);
1234 assert_eq!(rcode, 0, "NoError");
1235 assert_eq!(ancount, 1);
1236
1237 let expected = "fd7a::1".parse::<std::net::Ipv6Addr>().unwrap().octets();
1238 let tail = &resp[resp.len() - 16..];
1239 assert_eq!(tail, expected);
1240 }
1241
1242 #[test]
1243 fn aaaa_for_unknown_tailnet_name_is_nxdomain_not_forwarded_with_ipv6_off() {
1244 // Anti-leak, unchanged by the gate: an AAAA for a name under the tailnet suffix that has no
1245 // overlay match still fails closed to NXDOMAIN — never forwarded to a recursive upstream,
1246 // even with resolvers configured. (Gate OFF only changes the *positive* overlay match into
1247 // NODATA; a non-match still routes through `forward_or_nxdomain`.)
1248 let mut db = PeerDb::default();
1249 db.upsert(&test_node());
1250 let view = DnsView {
1251 cfg: DnsConfig {
1252 magic_dns: true,
1253 search_domains: vec!["user.ts.net".to_string()],
1254 fallback_resolvers: vec![DnsResolver {
1255 transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1256 use_with_exit_node: false,
1257 }],
1258 ..Default::default()
1259 },
1260 peers: Some(Arc::new(db)),
1261 self_node: None,
1262 exit_doh: None,
1263 enable_ipv6: false,
1264 accept_dns: true,
1265 };
1266 let buf = build_query(0x5A, &["ghost", "user", "ts", "net"], 28, 1);
1267
1268 match decide(&view, &buf).expect("decides") {
1269 Decision::Reply(resp) => {
1270 let (_, rcode, _) = parse_header(&resp);
1271 assert_eq!(rcode, 3, "NxDomain: tailnet AAAA not leaked upstream");
1272 }
1273 Decision::Forward { .. } => panic!("tailnet AAAA must never be forwarded"),
1274 }
1275 }
1276
1277 #[test]
1278 fn bare_hostname_resolves() {
1279 // The name index also stores the bare hostname.
1280 let view = view_with_peer();
1281 let buf = build_query(0x7, &["host"], 1, 1);
1282
1283 let resp = answer(&view, &buf).expect("answers");
1284 let (_, rcode, ancount) = parse_header(&resp);
1285 assert_eq!(rcode, 0);
1286 assert_eq!(ancount, 1);
1287 }
1288
1289 #[test]
1290 fn unknown_off_tailnet_name_with_no_upstream_is_servfail() {
1291 // An off-tailnet name with no resolver configured cannot be forwarded. Go answers SERVFAIL
1292 // (a soft "couldn't resolve"), not NXDOMAIN — asserting non-existence of a real name we
1293 // simply have no upstream for would poison a downstream stub's negative cache. (A *tailnet*
1294 // name with no overlay match stays NXDOMAIN — see `tailnet_name_is_never_forwarded` — and a
1295 // negative split-DNS route stays NXDOMAIN — see `negative_route_is_nxdomain_not_forwarded`.)
1296 let view = view_with_peer();
1297 let buf = build_query(0x9, &["nope", "example", "com"], 1, 1);
1298
1299 let resp = answer(&view, &buf).expect("answers");
1300 let (_, rcode, ancount) = parse_header(&resp);
1301 assert_eq!(
1302 rcode, 2,
1303 "ServFail: off-tailnet name, nothing to forward to"
1304 );
1305 assert_eq!(ancount, 0);
1306 }
1307
1308 #[test]
1309 fn magic_dns_off_is_refused() {
1310 // Fail closed: with MagicDNS disabled, even a known name is refused.
1311 let mut view = view_with_peer();
1312 view.cfg.magic_dns = false;
1313 let buf = build_query(0xAB, &["host", "user", "ts", "net"], 1, 1);
1314
1315 let resp = answer(&view, &buf).expect("answers");
1316 let (_, rcode, ancount) = parse_header(&resp);
1317 assert_eq!(rcode, 5, "Refused");
1318 assert_eq!(ancount, 0);
1319 }
1320
1321 #[test]
1322 fn accept_dns_false_refuses_otherwise_answerable_query() {
1323 // The accept-dns gate (Go `CorpDNS`): with `accept_dns == false` the node ignores the
1324 // tailnet DNS config, so even a known peer name that would normally answer authoritatively is
1325 // REFUSED (the responder serves nothing) — mirroring Go applying an empty `dns.Config`.
1326 let mut view = view_with_peer();
1327 assert!(view.cfg.magic_dns, "MagicDNS itself is on");
1328 view.accept_dns = false;
1329 let buf = build_query(0xDD, &["host", "user", "ts", "net"], 1, 1);
1330
1331 let resp = answer(&view, &buf).expect("answers");
1332 let (_, rcode, ancount) = parse_header(&resp);
1333 assert_eq!(rcode, 5, "Refused: accept_dns off ⇒ serve nothing");
1334 assert_eq!(ancount, 0);
1335
1336 // Flip accept_dns back ON (the config was never destroyed, only gated): the same query now
1337 // answers authoritatively — proving the OFF→ON restore is automatic.
1338 view.accept_dns = true;
1339 let resp = answer(&view, &buf).expect("answers");
1340 let (_, rcode, ancount) = parse_header(&resp);
1341 assert_eq!(rcode, 0, "NoError: accept_dns on ⇒ the known peer answers");
1342 assert_eq!(ancount, 1);
1343 let tail = &resp[resp.len() - 4..];
1344 assert_eq!(tail, &[100, 64, 0, 1], "the peer's tailnet v4 is served");
1345 }
1346
1347 #[test]
1348 fn default_view_serves_nothing() {
1349 // The default (no dns_config seen) has magic_dns == false: fail closed.
1350 let view = DnsView::default();
1351 let buf = build_query(0x1, &["host", "user", "ts", "net"], 1, 1);
1352
1353 let resp = answer(&view, &buf).expect("answers");
1354 let (_, rcode, _) = parse_header(&resp);
1355 assert_eq!(rcode, 5, "Refused");
1356 }
1357
1358 #[test]
1359 fn unsupported_qtype_on_tailnet_name_is_nodata_not_refused() {
1360 // TXT (type 16) for a tailnet-authoritative name: the name exists but we hold no TXT, so —
1361 // like Go — return NODATA (empty NOERROR), NOT REFUSED (which would make a stub abandon the
1362 // resolver) and NOT NXDOMAIN (the name exists). The name is never forwarded (anti-leak).
1363 let view = view_with_peer();
1364 let buf = build_query(0x1, &["host", "user", "ts", "net"], 16, 1);
1365
1366 let resp = answer(&view, &buf).expect("answers");
1367 let (_, rcode, ancount) = parse_header(&resp);
1368 assert_eq!(rcode, 0, "NoError (NODATA), not Refused");
1369 assert_eq!(ancount, 0, "no answer records (NODATA)");
1370 }
1371
1372 #[test]
1373 fn unsupported_qtype_off_tailnet_forwards_or_servfails() {
1374 // A non-A/AAAA/PTR qtype for an OFF-tailnet name must be forwardable like A/AAAA — never
1375 // REFUSED. With no upstream configured in this view it soft-fails to SERVFAIL (the same
1376 // disposition an off-tailnet A query gets here), proving the qtype no longer short-circuits
1377 // to REFUSED. HTTPS/SVCB is type 65 (the browser HTTP/3 + ECH case the old REFUSED broke).
1378 let view = view_with_peer();
1379 let buf = build_query(0x1, &["example", "com"], 65, 1);
1380
1381 let resp = answer(&view, &buf).expect("answers");
1382 let (_, rcode, _) = parse_header(&resp);
1383 assert_eq!(
1384 rcode, 2,
1385 "off-tailnet, no upstream -> SERVFAIL (forwardable, not Refused)"
1386 );
1387 }
1388
1389 #[test]
1390 fn unimplemented_qtype_on_tailnet_name_is_notimp() {
1391 // NS (2), SOA (6), HINFO (13), AXFR (252) for a tailnet-authoritative name must answer NOTIMP
1392 // (rcode 4), matching Go `resolveLocal`'s `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR,
1393 // dns.TypeHINFO: return RCodeNotImplemented`. Returning NODATA (rcode 0) here was a clean
1394 // fingerprint (a `dig SOA user.ts.net` answer differs from real tailscaled). The name is
1395 // still never forwarded (anti-leak).
1396 let view = view_with_peer();
1397 for qtype in [2u16, 6, 13, 252] {
1398 let buf = build_query(0x1, &["host", "user", "ts", "net"], qtype, 1);
1399 let resp = answer(&view, &buf).expect("answers");
1400 let (_, rcode, ancount) = parse_header(&resp);
1401 assert_eq!(rcode, 4, "qtype {qtype} on a tailnet name must be NOTIMP");
1402 assert_eq!(ancount, 0, "NOTIMP carries no answer records");
1403 }
1404 }
1405
1406 #[test]
1407 fn unimplemented_qtype_off_tailnet_still_forwards_not_notimp() {
1408 // The NOTIMP disposition is ONLY for a name we are authoritative for. An NS query for an
1409 // off-tailnet name must still forward (here: SERVFAIL, no upstream) — NOT NOTIMP — exactly
1410 // like the off-tailnet HTTPS/SVCB case above. Guards the NOTIMP change against over-reach.
1411 let view = view_with_peer();
1412 let buf = build_query(0x1, &["example", "com"], 2, 1); // NS, off-tailnet
1413 let resp = answer(&view, &buf).expect("answers");
1414 let (_, rcode, _) = parse_header(&resp);
1415 assert_eq!(
1416 rcode, 2,
1417 "off-tailnet NS -> SERVFAIL (forwardable), not NOTIMP"
1418 );
1419 }
1420
1421 #[test]
1422 fn malformed_query_is_dropped() {
1423 // A response (QR bit set) is not a query; we drop it (no answer).
1424 let mut buf = build_query(0x1, &["host"], 1, 1);
1425 buf[2] = 0x80; // set QR bit
1426 assert!(answer(&view_with_peer(), &buf).is_none());
1427 }
1428
1429 #[test]
1430 fn ptr_for_known_ip_answers_fqdn() {
1431 let view = view_with_peer();
1432 // Reverse name for 100.64.0.1 => 1.0.64.100.in-addr.arpa
1433 let buf = build_query(0x33, &["1", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1434
1435 let resp = answer(&view, &buf).expect("answers");
1436 let (_, rcode, ancount) = parse_header(&resp);
1437 assert_eq!(rcode, 0, "NoError");
1438 assert_eq!(ancount, 1);
1439
1440 // The PTR rdata encodes the peer's fqdn "host.user.ts.net" as length-prefixed labels.
1441 let expected = {
1442 let mut out = Vec::new();
1443 for label in ["host", "user", "ts", "net"] {
1444 out.push(label.len() as u8);
1445 out.extend_from_slice(label.as_bytes());
1446 }
1447 out.push(0);
1448 out
1449 };
1450 let tail = &resp[resp.len() - expected.len()..];
1451 assert_eq!(tail, expected.as_slice());
1452 }
1453
1454 #[test]
1455 fn ptr_for_unknown_public_ip_off_tailnet_is_servfail() {
1456 let view = view_with_peer();
1457 // 9.9.9.9 is a public IP, not a known tailnet IP and not in the CGNAT reverse zone — so its
1458 // reverse query is an ordinary off-tailnet name. With no upstream to forward it to, that is
1459 // SERVFAIL (soft), not NXDOMAIN. (A CGNAT/ip6.arpa reverse for an unmatched tailnet IP still
1460 // fails closed to NXDOMAIN as an anti-leak guard — see `ptr_for_unknown_tailnet_ip_*`.)
1461 let buf = build_query(0x34, &["9", "9", "9", "9", "in-addr", "arpa"], 12, 1);
1462
1463 let resp = answer(&view, &buf).expect("answers");
1464 let (_, rcode, _) = parse_header(&resp);
1465 assert_eq!(
1466 rcode, 2,
1467 "ServFail: off-tailnet public-IP reverse, no upstream"
1468 );
1469 }
1470
1471 #[test]
1472 fn ptr_for_unknown_tailnet_ip_is_nxdomain_not_forwarded() {
1473 // A view WITH an upstream resolver: an off-tailnet reverse query would forward, but a
1474 // reverse query for an unmatched IP in the CGNAT range (100.64.0.0/10) must fail closed to
1475 // NXDOMAIN — the probed tailnet IP must never leak upstream.
1476 let mut db = PeerDb::default();
1477 db.upsert(&test_node());
1478 let view = DnsView {
1479 cfg: DnsConfig {
1480 magic_dns: true,
1481 search_domains: vec!["user.ts.net".to_string()],
1482 fallback_resolvers: vec![DnsResolver {
1483 transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1484 use_with_exit_node: false,
1485 }],
1486 ..Default::default()
1487 },
1488 peers: Some(Arc::new(db)),
1489 self_node: None,
1490 exit_doh: None,
1491 enable_ipv6: false,
1492 accept_dns: true,
1493 };
1494
1495 // 100.64.0.9 is in CGNAT range but owned by no peer => NXDOMAIN, never a Forward.
1496 let buf = build_query(0x35, &["9", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1497 match decide(&view, &buf).expect("decides") {
1498 Decision::Reply(resp) => {
1499 let (_, rcode, _) = parse_header(&resp);
1500 assert_eq!(rcode, 3, "NxDomain");
1501 }
1502 Decision::Forward { .. } => {
1503 panic!("tailnet CGNAT PTR must never be forwarded upstream")
1504 }
1505 }
1506 }
1507
1508 /// Anti-leak regression for the exotic-qtype forward path: a NON-PTR query (TXT, type 16) for a
1509 /// tailnet CGNAT reverse name, with an upstream configured, must STILL fail closed to NXDOMAIN —
1510 /// never forward. The PTR arm guards this, but the `QType::Other` path routes through
1511 /// `forward_or_nodata`, which must re-apply the reverse-zone guard or the tailnet IP leaks.
1512 #[test]
1513 fn exotic_qtype_for_tailnet_cgnat_reverse_is_nxdomain_not_forwarded() {
1514 let mut db = PeerDb::default();
1515 db.upsert(&test_node());
1516 let view = DnsView {
1517 cfg: DnsConfig {
1518 magic_dns: true,
1519 search_domains: vec!["user.ts.net".to_string()],
1520 fallback_resolvers: vec![DnsResolver {
1521 transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1522 use_with_exit_node: false,
1523 }],
1524 ..Default::default()
1525 },
1526 peers: Some(Arc::new(db)),
1527 self_node: None,
1528 exit_doh: None,
1529 enable_ipv6: false,
1530 accept_dns: true,
1531 };
1532
1533 // TXT (16) for a CGNAT reverse name => NXDOMAIN, never a Forward (no tailnet-IP leak).
1534 let buf = build_query(0x36, &["9", "0", "64", "100", "in-addr", "arpa"], 16, 1);
1535 match decide(&view, &buf).expect("decides") {
1536 Decision::Reply(resp) => {
1537 let (_, rcode, _) = parse_header(&resp);
1538 assert_eq!(rcode, 3, "NxDomain");
1539 }
1540 Decision::Forward { .. } => {
1541 panic!("a non-PTR query for a tailnet CGNAT reverse name must never forward")
1542 }
1543 }
1544 }
1545
1546 /// Same anti-leak guard for an `ip6.arpa` reverse name under an exotic qtype: must NXDOMAIN, not
1547 /// forward (revealing a tailnet ULA was probed).
1548 #[test]
1549 fn exotic_qtype_for_ip6_arpa_is_nxdomain_not_forwarded() {
1550 let view = view_with_routes(
1551 std::collections::BTreeMap::new(),
1552 vec![udp("9.9.9.9:53")],
1553 vec![],
1554 );
1555 // An ip6.arpa reverse name with a TXT (16) qtype must fail closed.
1556 let buf = build_query(
1557 0x37,
1558 &[
1559 "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
1560 "a", "7", "d", "f", "ip6", "arpa",
1561 ],
1562 16,
1563 1,
1564 );
1565 match decide(&view, &buf).expect("decides") {
1566 Decision::Reply(resp) => {
1567 let (_, rcode, _) = parse_header(&resp);
1568 assert_eq!(rcode, 3, "NxDomain");
1569 }
1570 Decision::Forward { .. } => panic!("an ip6.arpa exotic-qtype query must never forward"),
1571 }
1572 }
1573
1574 #[test]
1575 fn is_tailnet_cgnat_classifies_range() {
1576 assert!(is_tailnet_cgnat("100.64.0.0".parse().unwrap()));
1577 assert!(is_tailnet_cgnat("100.64.0.1".parse().unwrap()));
1578 assert!(is_tailnet_cgnat("100.127.255.255".parse().unwrap()));
1579 // Outside the /10:
1580 assert!(!is_tailnet_cgnat("100.63.255.255".parse().unwrap()));
1581 assert!(!is_tailnet_cgnat("100.128.0.0".parse().unwrap()));
1582 assert!(!is_tailnet_cgnat("9.9.9.9".parse().unwrap()));
1583 // The MagicDNS resolver IP 100.100.100.100 is itself inside the /10.
1584 assert!(is_tailnet_cgnat("100.100.100.100".parse().unwrap()));
1585 }
1586
1587 #[test]
1588 fn response_matches_query_validates_id_and_qr() {
1589 // query id 0x1234, QR=0
1590 let query = build_query(0x1234, &["a", "com"], 1, 1);
1591
1592 // A well-formed response: same id, QR=1.
1593 let mut good = query.clone();
1594 good[2] |= 0x80;
1595 assert!(response_matches_query(&query, &good));
1596
1597 // Same id but QR still 0 (not a response): rejected.
1598 assert!(!response_matches_query(&query, &query));
1599
1600 // QR=1 but a different transaction id: rejected (off-path forgery).
1601 let mut wrong_id = good.clone();
1602 wrong_id[0] ^= 0xFF;
1603 assert!(!response_matches_query(&query, &wrong_id));
1604
1605 // Too-short buffers: rejected.
1606 assert!(!response_matches_query(&query, &[0u8; 2]));
1607 assert!(!response_matches_query(&[0u8; 3], &good));
1608 }
1609
1610 #[test]
1611 fn self_node_resolves_when_no_peer_match() {
1612 // With the peer db empty but a self node set, the self node answers for its own name.
1613 let view = DnsView {
1614 cfg: DnsConfig {
1615 magic_dns: true,
1616 search_domains: vec![],
1617 ..Default::default()
1618 },
1619 peers: None,
1620 self_node: Some(test_node()),
1621 exit_doh: None,
1622 enable_ipv6: false,
1623 accept_dns: true,
1624 };
1625 let buf = build_query(0x44, &["host", "user", "ts", "net"], 1, 1);
1626
1627 let resp = answer(&view, &buf).expect("answers");
1628 let (_, rcode, ancount) = parse_header(&resp);
1629 assert_eq!(rcode, 0);
1630 assert_eq!(ancount, 1);
1631 let tail = &resp[resp.len() - 4..];
1632 assert_eq!(tail, &[100, 64, 0, 1]);
1633 }
1634
1635 #[test]
1636 fn partially_qualified_name_resolves_via_search_domain() {
1637 // "host.user" is not indexed directly, but the "user.ts.net" search domain qualifies it
1638 // to "host.user.user.ts.net"... which does NOT match. The realistic case is "host" (bare,
1639 // already indexed) and "host.user.ts.net" (fqdn). Verify a name needing suffix expansion:
1640 // with search domain "ts.net" the partially-qualified "host.user" => "host.user.ts.net".
1641 let mut view = view_with_peer();
1642 view.cfg.search_domains = vec!["ts.net".to_string()];
1643 let buf = build_query(0x55, &["host", "user"], 1, 1);
1644
1645 let resp = answer(&view, &buf).expect("answers");
1646 let (_, rcode, ancount) = parse_header(&resp);
1647 assert_eq!(rcode, 0, "NoError via search-domain expansion");
1648 assert_eq!(ancount, 1);
1649 let tail = &resp[resp.len() - 4..];
1650 assert_eq!(tail, &[100, 64, 0, 1]);
1651 }
1652
1653 #[test]
1654 fn extra_record_a_answers_when_no_peer_match() {
1655 // A control-pushed static A record answers for a non-peer name, fail-closed otherwise.
1656 let mut view = view_with_peer();
1657 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1658 name: "static.user.ts.net".to_string(),
1659 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1660 }];
1661 let buf = build_query(0x77, &["static", "user", "ts", "net"], 1, 1);
1662
1663 let resp = answer(&view, &buf).expect("answers");
1664 let (_, rcode, ancount) = parse_header(&resp);
1665 assert_eq!(rcode, 0, "NoError from extra record");
1666 assert_eq!(ancount, 1);
1667 let tail = &resp[resp.len() - 4..];
1668 assert_eq!(tail, &[100, 64, 0, 9]);
1669 }
1670
1671 #[test]
1672 fn extra_record_matches_query_case_insensitively() {
1673 // The query name is canonicalized (lowercased) at decode time, so a mixed-case query
1674 // matches a lowercase extra record.
1675 let mut view = view_with_peer();
1676 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1677 name: "static.user.ts.net".to_string(),
1678 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1679 }];
1680 let buf = build_query(0x7A, &["Static", "User", "TS", "net"], 1, 1);
1681
1682 let resp = answer(&view, &buf).expect("answers");
1683 let (_, rcode, ancount) = parse_header(&resp);
1684 assert_eq!(rcode, 0, "NoError: case-insensitive match");
1685 assert_eq!(ancount, 1);
1686 let tail = &resp[resp.len() - 4..];
1687 assert_eq!(tail, &[100, 64, 0, 9]);
1688 }
1689
1690 #[test]
1691 fn extra_record_not_expanded_by_search_domain() {
1692 // Unlike peer names, an extra record is matched as an FQDN only: a bare query that would
1693 // need search-domain expansion to reach the record name must NOT resolve.
1694 let mut view = view_with_peer();
1695 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1696 name: "static.user.ts.net".to_string(),
1697 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1698 }];
1699 // "static" would only reach "static.user.ts.net" via the "user.ts.net" search domain.
1700 let buf = build_query(0x7B, &["static"], 1, 1);
1701
1702 let resp = answer(&view, &buf).expect("answers");
1703 let (_, rcode, _) = parse_header(&resp);
1704 // Not search-expanded → treated as the bare off-tailnet name "static", which has no upstream
1705 // here, so SERVFAIL (soft). The point of the test — that the extra record is NOT reachable
1706 // via search expansion — holds regardless of the failure rcode.
1707 assert_eq!(
1708 rcode, 2,
1709 "ServFail: bare 'static' is not search-expanded to the extra record"
1710 );
1711 }
1712
1713 #[test]
1714 fn extra_record_aaaa_family_is_isolated() {
1715 // An A-only extra record must NOT answer an AAAA query for the same name (NxDomain).
1716 let mut view = view_with_peer();
1717 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1718 name: "v4only.user.ts.net".to_string(),
1719 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1720 }];
1721 let buf = build_query(0x78, &["v4only", "user", "ts", "net"], 28, 1);
1722
1723 let resp = answer(&view, &buf).expect("answers");
1724 let (_, rcode, _) = parse_header(&resp);
1725 assert_eq!(rcode, 3, "NxDomain: A record does not satisfy AAAA");
1726 }
1727
1728 #[test]
1729 fn extra_record_ignored_when_magic_dns_off() {
1730 // Fail closed: extra records are never served while MagicDNS is disabled.
1731 let mut view = view_with_peer();
1732 view.cfg.magic_dns = false;
1733 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1734 name: "static.user.ts.net".to_string(),
1735 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1736 }];
1737 let buf = build_query(0x79, &["static", "user", "ts", "net"], 1, 1);
1738
1739 let resp = answer(&view, &buf).expect("answers");
1740 let (_, rcode, _) = parse_header(&resp);
1741 assert_eq!(rcode, 5, "Refused");
1742 }
1743
1744 #[test]
1745 fn non_in_class_on_tailnet_name_is_nodata_not_answered_as_in() {
1746 // A CHAOS-class (3) query for a tailnet name must NOT be answered as IN (no overlay A), and
1747 // must NOT be REFUSED (Go does no class check on the local path). It's an unsupported
1748 // authoritative class -> NODATA (empty NOERROR), and never forwarded (tailnet name).
1749 let view = view_with_peer();
1750 let buf = build_query(0x66, &["host", "user", "ts", "net"], 1, 3);
1751
1752 let resp = answer(&view, &buf).expect("answers");
1753 let (_, rcode, ancount) = parse_header(&resp);
1754 assert_eq!(
1755 rcode, 0,
1756 "NoError (NODATA), not Refused and not an IN answer"
1757 );
1758 assert_eq!(
1759 ancount, 0,
1760 "must not hand out the overlay A for a non-IN class"
1761 );
1762 }
1763
1764 #[test]
1765 fn non_in_class_off_tailnet_forwards_or_servfails() {
1766 // A non-IN class for an OFF-tailnet name is forwardable (Go forwards it), never REFUSED.
1767 // No upstream here -> SERVFAIL, proving the class gate no longer short-circuits to Refused.
1768 let view = view_with_peer();
1769 let buf = build_query(0x66, &["example", "com"], 1, 3);
1770
1771 let resp = answer(&view, &buf).expect("answers");
1772 let (_, rcode, _) = parse_header(&resp);
1773 assert_eq!(
1774 rcode, 2,
1775 "off-tailnet non-IN class, no upstream -> SERVFAIL, not Refused"
1776 );
1777 }
1778
1779 /// A view with MagicDNS on, the `user.ts.net` search domain, and the given split-DNS routes
1780 /// + global resolvers.
1781 fn view_with_routes(
1782 routes: std::collections::BTreeMap<String, Vec<DnsResolver>>,
1783 resolvers: Vec<DnsResolver>,
1784 fallback: Vec<DnsResolver>,
1785 ) -> DnsView {
1786 DnsView {
1787 cfg: DnsConfig {
1788 magic_dns: true,
1789 search_domains: vec!["user.ts.net".to_string()],
1790 routes,
1791 resolvers,
1792 fallback_resolvers: fallback,
1793 ..Default::default()
1794 },
1795 peers: None,
1796 self_node: None,
1797 exit_doh: None,
1798 enable_ipv6: false,
1799 accept_dns: true,
1800 }
1801 }
1802
1803 fn udp(addr: &str) -> DnsResolver {
1804 DnsResolver {
1805 transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
1806 use_with_exit_node: false,
1807 }
1808 }
1809
1810 #[test]
1811 fn split_dns_route_forwards_to_matching_upstream() {
1812 let mut routes = std::collections::BTreeMap::new();
1813 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1814 let view = view_with_routes(routes, vec![], vec![]);
1815 let buf = build_query(0x100, &["api", "corp", "example"], 1, 1);
1816
1817 match decide(&view, &buf).expect("decides") {
1818 Decision::Forward { upstreams, .. } => {
1819 assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1820 }
1821 Decision::Reply(_) => panic!("expected forward to the split-DNS upstream"),
1822 }
1823 }
1824
1825 #[test]
1826 fn exotic_qtype_off_tailnet_forwards_to_upstream() {
1827 // The core of the fix: an HTTPS/SVCB (type 65) query for an off-tailnet name with a matching
1828 // route must FORWARD to the upstream (verbatim), exactly like an A query would — not REFUSE
1829 // and not NXDOMAIN. This is the browser HTTP/3 + ECH case the old blanket-REFUSE broke.
1830 let mut routes = std::collections::BTreeMap::new();
1831 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1832 let view = view_with_routes(routes, vec![], vec![]);
1833 let buf = build_query(0x102, &["api", "corp", "example"], 65, 1);
1834
1835 match decide(&view, &buf).expect("decides") {
1836 Decision::Forward {
1837 upstreams, query, ..
1838 } => {
1839 assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1840 assert_eq!(query, buf, "the exotic-qtype query is forwarded verbatim");
1841 }
1842 Decision::Reply(_) => {
1843 panic!("an off-tailnet HTTPS-record query must forward, not reply")
1844 }
1845 }
1846 }
1847
1848 #[test]
1849 fn non_in_class_off_tailnet_forwards_to_upstream() {
1850 // A non-IN class for an off-tailnet routed name forwards too (Go does no class check on the
1851 // local path). Proves the class gate no longer short-circuits to REFUSED before routing.
1852 let mut routes = std::collections::BTreeMap::new();
1853 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1854 let view = view_with_routes(routes, vec![], vec![]);
1855 let buf = build_query(0x103, &["api", "corp", "example"], 1, 3);
1856
1857 match decide(&view, &buf).expect("decides") {
1858 Decision::Forward { upstreams, .. } => {
1859 assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1860 }
1861 Decision::Reply(_) => {
1862 panic!("an off-tailnet non-IN-class query must forward, not reply")
1863 }
1864 }
1865 }
1866
1867 /// The local responder bounds concurrent in-flight forwards: `serve` acquires one
1868 /// `MAX_INFLIGHT_FORWARDS` permit per spawned forward task and drops the query fail-closed when
1869 /// the pool is exhausted (a client spraying forwardable names can't open unbounded overlay
1870 /// sockets). This pins the gating semantics `serve` relies on — drained pool refuses a new
1871 /// permit; releasing one restores capacity — and the cap constant itself. (The async `serve`
1872 /// loop has no netstack-free test seam, so the semaphore behavior is exercised directly here, the
1873 /// same `Arc<Semaphore>::try_acquire_owned` the loop uses.)
1874 #[test]
1875 fn forward_inflight_cap_fails_closed_when_saturated() {
1876 use std::sync::Arc;
1877
1878 use tokio::sync::Semaphore;
1879
1880 let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
1881
1882 // Drain every permit (one per concurrently in-flight forward).
1883 let mut held = Vec::with_capacity(MAX_INFLIGHT_FORWARDS);
1884 for _ in 0..MAX_INFLIGHT_FORWARDS {
1885 held.push(
1886 inflight
1887 .clone()
1888 .try_acquire_owned()
1889 .expect("permits available below the cap"),
1890 );
1891 }
1892
1893 // At the cap, the next forward is refused — `serve` would drop the query, not spawn.
1894 assert!(
1895 inflight.clone().try_acquire_owned().is_err(),
1896 "a saturated forward pool must refuse a new permit (fail closed)"
1897 );
1898
1899 // Completing an in-flight forward releases its permit and restores capacity.
1900 drop(held.pop());
1901 assert!(
1902 inflight.clone().try_acquire_owned().is_ok(),
1903 "releasing a permit must let the next forward proceed"
1904 );
1905 }
1906
1907 /// A permit moved into a spawned forward task (the `let _permit = permit;` shape `serve` uses)
1908 /// must stay held for the *whole* task body — across the `.await` on the upstream — and release
1909 /// only when the task completes. This guards the regression the saturation test above can't see:
1910 /// "tidying" `let _permit = permit;` to `let _ = permit;` would drop the permit immediately,
1911 /// re-opening unbounded concurrency while leaving the synchronous drain/restore test green. Here a
1912 /// 1-permit pool is consumed by a task that holds it across a yield; the pool must read empty
1913 /// while the task runs and refill once it finishes.
1914 #[tokio::test]
1915 async fn forward_permit_is_held_for_the_task_lifetime_not_dropped_early() {
1916 use std::sync::Arc;
1917
1918 use tokio::sync::Semaphore;
1919
1920 let inflight = Arc::new(Semaphore::new(1));
1921 let permit = inflight
1922 .clone()
1923 .try_acquire_owned()
1924 .expect("the sole permit is available");
1925
1926 let (started_tx, started_rx) = tokio::sync::oneshot::channel();
1927 let (release_tx, release_rx) = tokio::sync::oneshot::channel();
1928 let task = tokio::spawn(async move {
1929 // Same shape as `serve`'s spawned forward: the permit is a named binding moved into the
1930 // task, so it lives until the body ends — not dropped at the `let`.
1931 let _permit = permit;
1932 started_tx.send(()).unwrap();
1933 // Stand in for the `.await` on the upstream forward.
1934 release_rx.await.unwrap();
1935 });
1936
1937 started_rx.await.unwrap();
1938 // While the task runs, the permit it moved in is still held — the pool is empty.
1939 assert!(
1940 inflight.clone().try_acquire_owned().is_err(),
1941 "a permit moved into a running task must stay held across its await"
1942 );
1943
1944 // Let the task finish; its permit drops with the body and capacity returns.
1945 release_tx.send(()).unwrap();
1946 task.await.unwrap();
1947 assert!(
1948 inflight.clone().try_acquire_owned().is_ok(),
1949 "the permit must be released once the task body completes"
1950 );
1951 }
1952
1953 #[test]
1954 fn longest_suffix_route_wins() {
1955 let mut routes = std::collections::BTreeMap::new();
1956 routes.insert("example".to_string(), vec![udp("10.0.0.1:53")]);
1957 routes.insert("corp.example".to_string(), vec![udp("10.0.0.2:53")]);
1958 let view = view_with_routes(routes, vec![], vec![]);
1959 let buf = build_query(0x101, &["api", "corp", "example"], 1, 1);
1960
1961 match decide(&view, &buf).expect("decides") {
1962 Decision::Forward { upstreams, .. } => {
1963 assert_eq!(
1964 upstreams,
1965 vec!["10.0.0.2:53".parse().unwrap()],
1966 "longer suffix wins"
1967 );
1968 }
1969 Decision::Reply(_) => panic!("expected forward"),
1970 }
1971 }
1972
1973 #[test]
1974 fn negative_route_is_nxdomain_not_forwarded() {
1975 // An empty upstream list is a negative route: fail closed, never forward.
1976 let mut routes = std::collections::BTreeMap::new();
1977 routes.insert("blocked.example".to_string(), vec![]);
1978 let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
1979 let buf = build_query(0x102, &["x", "blocked", "example"], 1, 1);
1980
1981 match decide(&view, &buf).expect("decides") {
1982 Decision::Reply(resp) => {
1983 let (_, rcode, _) = parse_header(&resp);
1984 assert_eq!(rcode, 3, "NxDomain: negative route is not forwarded");
1985 }
1986 Decision::Forward { .. } => panic!("negative route must not forward"),
1987 }
1988 }
1989
1990 #[test]
1991 fn unrouted_name_forwards_to_fallback_then_global() {
1992 // No route matches: fallback resolvers are preferred over global resolvers.
1993 let view = view_with_routes(
1994 std::collections::BTreeMap::new(),
1995 vec![udp("8.8.8.8:53")],
1996 vec![udp("1.1.1.1:53")],
1997 );
1998 let buf = build_query(0x103, &["example", "com"], 1, 1);
1999
2000 match decide(&view, &buf).expect("decides") {
2001 Decision::Forward { upstreams, .. } => {
2002 assert_eq!(
2003 upstreams,
2004 vec!["1.1.1.1:53".parse().unwrap()],
2005 "fallback preferred"
2006 );
2007 }
2008 Decision::Reply(_) => panic!("expected forward to fallback"),
2009 }
2010 }
2011
2012 #[test]
2013 fn unrouted_name_forwards_to_global_when_no_fallback() {
2014 let view = view_with_routes(
2015 std::collections::BTreeMap::new(),
2016 vec![udp("8.8.8.8:53")],
2017 vec![],
2018 );
2019 let buf = build_query(0x104, &["example", "com"], 1, 1);
2020
2021 match decide(&view, &buf).expect("decides") {
2022 Decision::Forward { upstreams, .. } => {
2023 assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2024 }
2025 Decision::Reply(_) => panic!("expected forward to global resolver"),
2026 }
2027 }
2028
2029 #[test]
2030 fn tailnet_name_is_never_forwarded() {
2031 // Anti-leak: a name under a tailnet search domain that has no overlay match must fail
2032 // closed to NXDOMAIN, never leak to an upstream resolver, even with resolvers configured.
2033 let view = view_with_routes(
2034 std::collections::BTreeMap::new(),
2035 vec![udp("8.8.8.8:53")],
2036 vec![udp("1.1.1.1:53")],
2037 );
2038 // "ghost.user.ts.net" is under the tailnet suffix but matches no peer.
2039 let buf = build_query(0x105, &["ghost", "user", "ts", "net"], 1, 1);
2040
2041 match decide(&view, &buf).expect("decides") {
2042 Decision::Reply(resp) => {
2043 let (_, rcode, _) = parse_header(&resp);
2044 assert_eq!(rcode, 3, "NxDomain: tailnet name not leaked upstream");
2045 }
2046 Decision::Forward { .. } => panic!("tailnet name must never be forwarded"),
2047 }
2048 }
2049
2050 #[test]
2051 fn no_resolvers_off_tailnet_is_servfail_not_nxdomain() {
2052 // No route, no resolvers: an OFF-tailnet name cannot be forwarded. Go answers SERVFAIL
2053 // (forwarder.go:1207 "no upstream resolvers set, returning SERVFAIL"), NOT NXDOMAIN — a
2054 // cacheable non-existence for a real name we merely couldn't forward would poison downstream
2055 // stub caches. We still never forward (the name does not leak); we just soft-fail.
2056 let view = view_with_routes(std::collections::BTreeMap::new(), vec![], vec![]);
2057 let buf = build_query(0x106, &["example", "com"], 1, 1);
2058
2059 match decide(&view, &buf).expect("decides") {
2060 Decision::Reply(resp) => {
2061 let (_, rcode, _) = parse_header(&resp);
2062 assert_eq!(
2063 rcode, 2,
2064 "ServFail: off-tailnet name with no upstream to forward to"
2065 );
2066 }
2067 Decision::Forward { .. } => panic!("must not forward with no resolvers"),
2068 }
2069 }
2070
2071 #[test]
2072 fn route_with_only_ipv6_upstreams_off_tailnet_is_servfail() {
2073 // A split-DNS route exists but every resolver is IPv6 (filtered out under the IPv4-only
2074 // egress): we have a route yet nowhere to forward. That is an inability to forward an
2075 // off-tailnet name, so SERVFAIL (soft), not a fabricated NXDOMAIN.
2076 let mut routes = std::collections::BTreeMap::new();
2077 routes.insert("corp.example".to_string(), vec![udp("[2001:db8::53]:53")]);
2078 let view = view_with_routes(routes, vec![], vec![]);
2079 let buf = build_query(0x108, &["host", "corp", "example"], 1, 1);
2080
2081 match decide(&view, &buf).expect("decides") {
2082 Decision::Reply(resp) => {
2083 let (_, rcode, _) = parse_header(&resp);
2084 assert_eq!(
2085 rcode, 2,
2086 "ServFail: route's resolvers all filtered out (IPv6-only), cannot forward"
2087 );
2088 }
2089 Decision::Forward { .. } => panic!("must not forward when all upstreams are filtered"),
2090 }
2091 }
2092
2093 #[test]
2094 fn overlay_match_wins_over_forwarding() {
2095 // A known peer name resolves authoritatively even when upstream resolvers are configured.
2096 let mut db = PeerDb::default();
2097 db.upsert(&test_node());
2098 let view = DnsView {
2099 cfg: DnsConfig {
2100 magic_dns: true,
2101 search_domains: vec!["user.ts.net".to_string()],
2102 resolvers: vec![udp("8.8.8.8:53")],
2103 ..Default::default()
2104 },
2105 peers: Some(Arc::new(db)),
2106 self_node: None,
2107 exit_doh: None,
2108 enable_ipv6: false,
2109 accept_dns: true,
2110 };
2111 let buf = build_query(0x107, &["host", "user", "ts", "net"], 1, 1);
2112
2113 match decide(&view, &buf).expect("decides") {
2114 Decision::Reply(resp) => {
2115 let (_, rcode, ancount) = parse_header(&resp);
2116 assert_eq!(rcode, 0, "authoritative answer wins");
2117 assert_eq!(ancount, 1);
2118 }
2119 Decision::Forward { .. } => panic!("overlay match must not forward"),
2120 }
2121 }
2122
2123 #[test]
2124 fn ipv6_reverse_ptr_is_nxdomain_not_forwarded() {
2125 // Anti-leak: an `ip6.arpa` reverse PTR for a tailnet ULA (fd7a:…) must fail closed to
2126 // NXDOMAIN, never be forwarded — even with an upstream resolver configured. This fork is
2127 // IPv4-only on the tailnet; forwarding would reveal that a v6 address was probed.
2128 let view = view_with_routes(
2129 std::collections::BTreeMap::new(),
2130 vec![udp("8.8.8.8:53")],
2131 vec![udp("1.1.1.1:53")],
2132 );
2133 // Reverse name for fd7a::1 (nibble-reversed) under ip6.arpa. The exact nibble labels don't
2134 // matter to the guard — any name ending in ip6.arpa must fail closed.
2135 let labels = vec![
2136 "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
2137 "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "a", "7", "d", "f", "ip6",
2138 "arpa",
2139 ];
2140 let buf = build_query(0x200, &labels, 12, 1);
2141
2142 match decide(&view, &buf).expect("decides") {
2143 Decision::Reply(resp) => {
2144 let (_, rcode, _) = parse_header(&resp);
2145 assert_eq!(
2146 rcode, 3,
2147 "NxDomain: ip6.arpa reverse must not leak upstream"
2148 );
2149 }
2150 Decision::Forward { .. } => panic!("ip6.arpa PTR must never be forwarded"),
2151 }
2152 }
2153
2154 #[test]
2155 fn cap_response_sets_tc_when_truncated() {
2156 // An oversize upstream answer is capped to a single datagram AND marked truncated (TC bit)
2157 // so the stub resolver retries over TCP rather than trusting a chopped message.
2158 let mut big = build_query(0x300, &["example", "com"], 1, 1);
2159 big[2] |= 0x80; // make it a response (QR=1)
2160 big.resize(MAX_UPSTREAM_RESPONSE + 500, 0xAB);
2161
2162 let out = cap_response(big);
2163 assert_eq!(out.len(), MAX_UPSTREAM_RESPONSE, "capped to one datagram");
2164 assert_ne!(out[2] & 0x02, 0, "TC bit set on truncation");
2165 }
2166
2167 #[test]
2168 fn cap_response_leaves_small_response_untouched() {
2169 // A response that fits is returned verbatim with no TC bit forced on.
2170 let mut small = build_query(0x301, &["example", "com"], 1, 1);
2171 small[2] |= 0x80;
2172 let before = small.clone();
2173
2174 let out = cap_response(small);
2175 assert_eq!(out, before, "small response unchanged");
2176 assert_eq!(out[2] & 0x02, 0, "TC bit not set when no truncation");
2177 }
2178
2179 #[test]
2180 fn response_matches_query_rejects_mismatched_question() {
2181 // id + QR match but the echoed question differs (different QNAME) => rejected. This guards
2182 // against an off-path injector that guesses the id but answers a different question.
2183 let query = build_query(0x1234, &["a", "com"], 1, 1);
2184
2185 let mut wrong_question = build_query(0x1234, &["b", "com"], 1, 1);
2186 wrong_question[2] |= 0x80; // QR=1, same id
2187 assert!(
2188 !response_matches_query(&query, &wrong_question),
2189 "different QNAME must be rejected"
2190 );
2191
2192 // A different QTYPE with the same name is also rejected.
2193 let mut wrong_qtype = build_query(0x1234, &["a", "com"], 28, 1);
2194 wrong_qtype[2] |= 0x80;
2195 assert!(
2196 !response_matches_query(&query, &wrong_qtype),
2197 "different QTYPE must be rejected"
2198 );
2199
2200 // The exact echoed question with QR=1 is accepted.
2201 let mut good = query.clone();
2202 good[2] |= 0x80;
2203 assert!(
2204 response_matches_query(&query, &good),
2205 "matching question accepted"
2206 );
2207 }
2208
2209 #[test]
2210 fn suffix_matches_handles_boundaries_and_empty() {
2211 // Exact and label-boundary matches.
2212 assert!(suffix_matches("corp", "corp"));
2213 assert!(suffix_matches("a.corp", "corp"));
2214 assert!(suffix_matches("a.b.corp", "corp"));
2215 // Not a label boundary.
2216 assert!(!suffix_matches("acorp", "corp"));
2217 // Empty suffix never matches (defense-in-depth against `ends_with("")`).
2218 assert!(!suffix_matches("anything.example", ""));
2219 assert!(!suffix_matches("", ""));
2220 }
2221
2222 #[test]
2223 fn empty_search_domain_does_not_capture_everything() {
2224 // Defense-in-depth: an empty search domain must NOT make every name look like a tailnet
2225 // name (which would fail-close legitimate recursive queries / mis-route). With an empty
2226 // suffix present alongside a real resolver, an off-tailnet name still forwards.
2227 let mut view = view_with_routes(
2228 std::collections::BTreeMap::new(),
2229 vec![udp("8.8.8.8:53")],
2230 vec![],
2231 );
2232 view.cfg.search_domains = vec![String::new()];
2233 let buf = build_query(0x400, &["example", "com"], 1, 1);
2234
2235 match decide(&view, &buf).expect("decides") {
2236 Decision::Forward { upstreams, .. } => {
2237 assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2238 }
2239 Decision::Reply(_) => {
2240 panic!("empty search domain must not treat every name as tailnet")
2241 }
2242 }
2243 }
2244
2245 #[test]
2246 fn empty_route_suffix_does_not_capture_everything() {
2247 // Defense-in-depth: an empty route suffix must not match every name (which would route all
2248 // queries to that route's upstreams). With an empty-suffix route present, an unrelated name
2249 // still falls through to the global resolver.
2250 let mut routes = std::collections::BTreeMap::new();
2251 routes.insert(String::new(), vec![udp("10.9.9.9:53")]);
2252 let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2253 let buf = build_query(0x401, &["example", "com"], 1, 1);
2254
2255 match decide(&view, &buf).expect("decides") {
2256 Decision::Forward { upstreams, .. } => {
2257 assert_eq!(
2258 upstreams,
2259 vec!["8.8.8.8:53".parse().unwrap()],
2260 "empty route suffix must not capture; falls through to global"
2261 );
2262 }
2263 Decision::Reply(_) => panic!("expected forward to global resolver"),
2264 }
2265 }
2266
2267 fn udp_exit(addr: &str) -> DnsResolver {
2268 DnsResolver {
2269 transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
2270 use_with_exit_node: true,
2271 }
2272 }
2273
2274 #[test]
2275 fn recursive_forward_is_flagged_route_forward_is_not() {
2276 // A recursive (global/fallback) forward sets `recursive = true` (eligible for DoH
2277 // delegation); a deliberately-configured split-DNS route sets `recursive = false`.
2278 let mut routes = std::collections::BTreeMap::new();
2279 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
2280 let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2281
2282 let routed = build_query(0x500, &["api", "corp", "example"], 1, 1);
2283 match decide(&view, &routed).expect("decides") {
2284 Decision::Forward { recursive, .. } => {
2285 assert!(!recursive, "split-DNS route is not a recursive forward")
2286 }
2287 Decision::Reply(_) => panic!("expected route forward"),
2288 }
2289
2290 let global = build_query(0x501, &["example", "com"], 1, 1);
2291 match decide(&view, &global).expect("decides") {
2292 Decision::Forward { recursive, .. } => {
2293 assert!(recursive, "unrouted name is a recursive forward")
2294 }
2295 Decision::Reply(_) => panic!("expected recursive forward"),
2296 }
2297 }
2298
2299 #[test]
2300 fn recursive_plan_keeps_udp_without_exit_node() {
2301 // No active exit node: a recursive forward stays on its default UDP upstreams.
2302 let view = view_with_routes(
2303 std::collections::BTreeMap::new(),
2304 vec![udp("8.8.8.8:53")],
2305 vec![],
2306 );
2307 let default = vec!["8.8.8.8:53".parse().unwrap()];
2308 assert_eq!(
2309 recursive_plan(&view, default.clone()),
2310 RecursivePlan::Udp(default)
2311 );
2312 }
2313
2314 #[test]
2315 fn recursive_plan_delegates_to_doh_with_exit_node() {
2316 // Exit node active, no kept-local resolvers: recursive queries delegate to the exit node's
2317 // DoH endpoint so resolution egresses from the exit node, not this host.
2318 let mut view = view_with_routes(
2319 std::collections::BTreeMap::new(),
2320 vec![udp("8.8.8.8:53")],
2321 vec![],
2322 );
2323 let doh: SocketAddr = "100.64.0.5:8080".parse().unwrap();
2324 view.exit_doh = Some(doh);
2325 assert_eq!(
2326 recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2327 RecursivePlan::Doh(doh)
2328 );
2329 }
2330
2331 #[test]
2332 fn recursive_plan_keeps_use_with_exit_node_resolvers_local() {
2333 // Even with an exit node active, resolvers flagged `use_with_exit_node` stay local (Go keeps
2334 // UseWithExitNode resolvers). The plan forwards to those over UDP, never delegating to DoH.
2335 let mut view = view_with_routes(
2336 std::collections::BTreeMap::new(),
2337 vec![udp_exit("10.0.0.53:53"), udp("8.8.8.8:53")],
2338 vec![],
2339 );
2340 view.exit_doh = Some("100.64.0.5:8080".parse().unwrap());
2341 // The default upstreams the caller computed are irrelevant when kept-local resolvers exist;
2342 // the plan must use the kept-local ones.
2343 assert_eq!(
2344 recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2345 RecursivePlan::Udp(vec!["10.0.0.53:53".parse().unwrap()])
2346 );
2347 }
2348}