ts_runtime/magic_dns.rs
1//! MagicDNS responder with a split-DNS / recursive forwarder.
2//!
3//! An in-netstack DNS server bound to `100.100.100.100:53`. It is authoritative for in-tailnet
4//! peer names and control-pushed [`ExtraRecord`][ts_control::ExtraRecord]s, answering `A`/`AAAA`/
5//! `PTR` for those directly. For names it is *not* authoritative for, it brings tsnet-style
6//! split-DNS and recursive resolution:
7//!
8//! - **Split DNS** ([`DnsConfig::routes`]): the longest matching suffix route forwards the query
9//! to one of that route's upstream resolvers. A route with an **empty** upstream list is a
10//! negative route — names under it are `NXDOMAIN` (Go keeps them on the built-in resolver; for
11//! us that means fail-closed unless an overlay/extra record matched first).
12//! - **Recursive** ([`DnsConfig::fallback_resolvers`] / [`DnsConfig::resolvers`]): names matching
13//! no route are forwarded to the fallback resolvers, else the global resolvers.
14//! - **Fail closed**: if no route and no resolver is configured, an unknown name is `NXDOMAIN`.
15//!
16//! Anti-leak / IPv6-off posture: upstream forwarding binds `0.0.0.0:0` (UDP, IPv4 only) and never
17//! opens an IPv6 socket. AAAA handling is gated on [`DnsView::enable_ipv6`] (default off): with the
18//! gate OFF an AAAA query for a tailnet/overlay/self name returns NoError with an empty answer
19//! (NODATA) rather than the overlay v6 address — answering a v6 the IPv4-only client can't route
20//! would only create dead connections and a fingerprint. With the gate ON, AAAA is answered from
21//! overlay data (the v6 overlay addr), as historically. AAAA for tailnet names is never forwarded
22//! to a recursive upstream regardless of the gate.
23//!
24//! - MagicDNS disabled (`dns_config == None` or `magic_dns == false`), OR the node does not accept
25//! the tailnet DNS config ([`DnsView::accept_dns`] is `false`, i.e. `--accept-dns` / `CorpDNS`
26//! off) => `REFUSED` for every query (the responder serves nothing, mirroring Go applying an empty
27//! `dns.Config` when `CorpDNS` is off).
28//! - A qtype/class we don't serve authoritatively (anything but IN-class A/AAAA/PTR — TXT, SRV, MX,
29//! HTTPS/SVCB, a CHAOS-class query, …) => NODATA (empty NOERROR) for a tailnet-authoritative name,
30//! forwarded verbatim to upstream for an off-tailnet name — exactly like Go's resolver, NOT
31//! `REFUSED` (a stub reads REFUSED as "won't serve me" and abandons the resolver). Tailnet reverse
32//! zones (CGNAT `in-addr.arpa` / any `ip6.arpa`) still fail closed to NXDOMAIN for every qtype
33//! (never forwarded — anti-leak).
34//! - Malformed query => dropped (no response).
35
36use std::{
37 net::{IpAddr, Ipv4Addr, SocketAddr},
38 sync::Arc,
39 time::Duration,
40};
41
42use kameo::{
43 actor::ActorRef,
44 message::{Context, Message},
45};
46use netstack::{CreateSocket, netcore::Channel};
47use tokio::{
48 sync::{Semaphore, watch},
49 task::JoinSet,
50 time::timeout,
51};
52use ts_control::{DnsConfig, DnsResolver, Node};
53use ts_dns_wire::{Name, QType, RData, Rcode, decode_query, encode_response};
54
55use crate::{
56 Error,
57 env::Env,
58 peer_tracker::{PeerDb, PeerState},
59};
60
61/// How long to wait for an upstream resolver to answer a forwarded query before giving up.
62const UPSTREAM_TIMEOUT: Duration = Duration::from_secs(5);
63/// Cap on concurrent in-flight forwarded queries on the local `100.100.100.100:53` responder.
64///
65/// Each forward is spawned onto a task that holds an overlay UDP socket until the upstream answers
66/// or [`UPSTREAM_TIMEOUT`] elapses. Without a cap, a local/tailnet client spraying distinct
67/// forwardable names opens unbounded concurrent overlay sockets + tasks (a resource-exhaustion DoS
68/// on a slow/black-holed upstream, since each lingers for the full timeout). Bound it the same way
69/// the peerAPI DoH server bounds its request handlers ([`crate::peerapi`]'s `MAX_INFLIGHT`): acquire
70/// a permit before spawning and drop the query fail-closed when saturated. A dropped DNS query is a
71/// benign outcome — the stub resolver simply retries or times out — and Go's resolver likewise
72/// bounds outstanding forwards rather than spawning without limit.
73const MAX_INFLIGHT_FORWARDS: usize = 512;
74/// Cap on a forwarded upstream response we read into memory (a single UDP datagram).
75///
76/// Matches Go's forwarder read buffer (`maxResponseBytes`, ~4 KiB). The client's query is forwarded
77/// verbatim, so a client advertising a large EDNS UDP size can elicit a legitimately large
78/// (1300–4096 byte) UDP answer (big TXT sets, DNSSEC, many-record round-robins). Capping at the old
79/// 1232 truncated those and set TC, forcing a TCP retry this fork's UDP-only forwarder can't serve —
80/// so the large answer became unreachable. 4096 relays them intact.
81const MAX_UPSTREAM_RESPONSE: usize = 4096;
82
83/// The MagicDNS service IP. The netstack interface owns this address, so a `udp_bind` here
84/// receives the tailnet's DNS traffic.
85const MAGIC_DNS_IP: Ipv4Addr = Ipv4Addr::new(100, 100, 100, 100);
86/// The DNS service port.
87const MAGIC_DNS_PORT: u16 = 53;
88
89/// The latest view the answer loop resolves queries against.
90///
91/// Updated by the actor's message handlers (from control `StateUpdate` and peer `PeerState`
92/// updates) and read fresh by the answer loop for every packet.
93#[derive(Clone, Default)]
94pub(crate) struct DnsView {
95 /// The DNS configuration. `magic_dns == false` (the default) means serve nothing.
96 pub(crate) cfg: DnsConfig,
97 /// The current peer database, if we've seen a peer update.
98 pub(crate) peers: Option<Arc<PeerDb>>,
99 /// This node, if we've seen a self-node update.
100 pub(crate) self_node: Option<Node>,
101 /// The peerAPI DoH socket address of the currently-selected exit node, if one is active and can
102 /// proxy DNS ([`Node::peerapi_doh_addr`]). When set, the MagicDNS *client* serve loop delegates
103 /// recursive resolution to this address over the overlay instead of forwarding to the locally
104 /// configured upstream resolvers — so recursive DNS egresses from the exit node, not this host.
105 ///
106 /// Only consumed by the local MagicDNS responder's serve loop (the client side). The peerAPI
107 /// DoH *server* shares this same view but ignores this field: an exit-node DNS proxy resolves
108 /// recursively itself (gated by `forward_exit_egress`), it never re-delegates to its own exit
109 /// node. `None` means no active exit node / no DoH delegation — recursion stays local.
110 pub(crate) exit_doh: Option<SocketAddr>,
111 /// Whether IPv6 is enabled on the tailnet overlay (from [`Env::enable_ipv6`], default `false`).
112 ///
113 /// Governs the AAAA answer path only: with the gate OFF (default) an AAAA query for a
114 /// tailnet/overlay/self name is answered NoError-with-empty-answer (NODATA) instead of the
115 /// overlay v6 address; with it ON, AAAA is answered from overlay data as historically. Set once
116 /// from the runtime `Env` when the actor starts; never changes for the life of the runtime.
117 pub(crate) enable_ipv6: bool,
118 /// Whether the tailnet's DNS configuration is accepted (`--accept-dns` / `CorpDNS`, from
119 /// [`Env::accept_dns`]). When `false`, [`decide`] refuses every query (the responder serves
120 /// nothing), mirroring Go applying an empty `dns.Config` when `CorpDNS` is off — so a node can
121 /// join for connectivity without taking over DNS.
122 ///
123 /// Unlike [`enable_ipv6`](DnsView::enable_ipv6) (snapshotted once at actor spawn), this is
124 /// runtime-settable via `Device::set_accept_dns`, so it is re-read from the live
125 /// [`Env::accept_dns`] cell on **every** view rebuild (the `StateUpdate` and `PeerState`
126 /// handlers), not just at spawn — otherwise a runtime toggle would never reach the served view.
127 pub(crate) accept_dns: bool,
128}
129
130impl DnsView {
131 /// Find the node (peer or self) that answers to `name`, case/dot-insensitively.
132 fn node_by_name(&self, name: &str) -> Option<Node> {
133 if let Some(node) = self
134 .peers
135 .as_ref()
136 .and_then(|p| p.get(&name).map(|(_, n)| n.clone()))
137 {
138 return Some(node);
139 }
140
141 self.self_node
142 .as_ref()
143 .filter(|n| n.matches_name(name))
144 .cloned()
145 }
146
147 /// Resolve `canon` to an answer address of the requested family. A tailnet peer/self match
148 /// wins first — tried as written and then qualified by each tailnet search domain (so a
149 /// short/partially-qualified name like `host` or `host.user` still resolves to
150 /// `host.user.ts.net`). Failing that, a control-pushed [`ExtraRecord`] of the matching family
151 /// answers, matched as a fully-qualified name only (no search-domain expansion — like Go tsnet,
152 /// ExtraRecords are authoritative FQDN entries, not subject to client search-list qualification).
153 /// Still fail-closed: only ever resolves to a known tailnet peer/self or an explicitly
154 /// control-pushed static record — never anything else.
155 fn resolve_addr(&self, canon: &str, want_v4: bool) -> Option<IpAddr> {
156 let addr_of = |node: Node| -> IpAddr {
157 if want_v4 {
158 IpAddr::from(node.tailnet_address.ipv4.addr())
159 } else {
160 IpAddr::from(node.tailnet_address.ipv6.addr())
161 }
162 };
163
164 if let Some(node) = self.node_by_name(canon) {
165 return Some(addr_of(node));
166 }
167 for suffix in &self.cfg.search_domains {
168 if let Some(node) = self.node_by_name(&format!("{canon}.{suffix}")) {
169 return Some(addr_of(node));
170 }
171 }
172
173 // Control-pushed static records match the fully-qualified query name only.
174 self.cfg.extra_records.iter().find_map(|rec| {
175 let family_ok = matches!(
176 (rec.addr, want_v4),
177 (IpAddr::V4(_), true) | (IpAddr::V6(_), false)
178 );
179 (rec.name == canon && family_ok).then_some(rec.addr)
180 })
181 }
182
183 /// Find the node (peer or self) that owns the tailnet IP `ip`.
184 fn node_by_ip(&self, ip: IpAddr) -> Option<Node> {
185 if let Some(node) = self
186 .peers
187 .as_ref()
188 .and_then(|p| p.get(&ip).map(|(_, n)| n.clone()))
189 {
190 return Some(node);
191 }
192
193 self.self_node
194 .as_ref()
195 .filter(|n| {
196 IpAddr::from(n.tailnet_address.ipv4.addr()) == ip
197 || IpAddr::from(n.tailnet_address.ipv6.addr()) == ip
198 })
199 .cloned()
200 }
201
202 /// Decide how to resolve a non-overlay `name` against the split-DNS routes and recursive
203 /// resolvers, returning the upstreams to forward to.
204 ///
205 /// Longest-suffix wins among [`DnsConfig::routes`]: a route's suffix matches `name` if `name`
206 /// equals it or ends with `.suffix`. A matched route with a non-empty upstream list forwards
207 /// there; a matched route with an **empty** list is a negative route ([`Upstreams::Block`] =>
208 /// NXDOMAIN). With no route match, [`DnsConfig::fallback_resolvers`] (preferred) or
209 /// [`DnsConfig::resolvers`] resolve recursively; if neither is configured we stay fail-closed
210 /// ([`Upstreams::None`] => NXDOMAIN).
211 fn route_for(&self, name: &str) -> Upstreams<'_> {
212 let mut best: Option<(&str, &Vec<DnsResolver>)> = None;
213 for (suffix, upstreams) in &self.cfg.routes {
214 if suffix_matches(name, suffix) && best.is_none_or(|(b, _)| suffix.len() > b.len()) {
215 best = Some((suffix.as_str(), upstreams));
216 }
217 }
218
219 if let Some((_, upstreams)) = best {
220 return if upstreams.is_empty() {
221 Upstreams::Block
222 } else {
223 // A deliberately-configured split-DNS route: not eligible for exit-node DoH
224 // delegation — these upstreams (e.g. an internal resolver reachable over a subnet
225 // route) must keep receiving the query directly.
226 Upstreams::Route(upstreams)
227 };
228 }
229
230 if !self.cfg.fallback_resolvers.is_empty() {
231 return Upstreams::Recursive(&self.cfg.fallback_resolvers);
232 }
233 if !self.cfg.resolvers.is_empty() {
234 return Upstreams::Recursive(&self.cfg.resolvers);
235 }
236 Upstreams::None
237 }
238}
239
240/// The upstreams a non-overlay query should be forwarded to (or why it should not be forwarded).
241enum Upstreams<'a> {
242 /// A split-DNS route matched: forward to these route-specific upstreams (never DoH-delegated).
243 Route(&'a [DnsResolver]),
244 /// No route matched: forward to these recursive (fallback/global) resolvers. Eligible for
245 /// exit-node DoH delegation in the client serve loop.
246 Recursive(&'a [DnsResolver]),
247 /// A negative split-DNS route matched: do not resolve (NXDOMAIN).
248 Block,
249 /// No route and no resolver configured: fail closed (NXDOMAIN).
250 None,
251}
252
253/// What the (sync) decision step concluded for a query: either a complete response to send back,
254/// or a request to forward the original query to an upstream resolver.
255pub(crate) enum Decision {
256 /// A fully-formed response is ready to send.
257 Reply(Vec<u8>),
258 /// Forward the original query datagram to one of these upstream UDP resolvers; on success
259 /// relay the upstream answer, on failure/timeout answer with the prebuilt `servfail` buffer
260 /// (an off-tailnet name we failed to forward is a soft failure, not a cacheable non-existence —
261 /// Go forwarder.go:1297-1307).
262 Forward {
263 /// UDP upstreams to try, in order.
264 upstreams: Vec<SocketAddr>,
265 /// The original query bytes to forward verbatim.
266 query: Vec<u8>,
267 /// Fallback SERVFAIL response if every upstream fails or times out.
268 servfail: Vec<u8>,
269 /// Whether this is a *recursive* (catch-all fallback/global resolver) forward, as opposed
270 /// to a deliberately-configured split-DNS route. Only recursive forwards are eligible for
271 /// exit-node DoH delegation in the client serve loop (see [`DnsView::exit_doh`]); split-DNS
272 /// routes always stay on their configured upstreams (typically subnet-reachable internal
273 /// resolvers). The peerAPI DoH *server* ignores this flag entirely.
274 recursive: bool,
275 },
276}
277
278/// Whether `name` is `suffix` or sits under it at a label boundary: `"a.corp"` matches `"corp"`,
279/// `"acorp"` does not. An **empty** suffix never matches (defense-in-depth: an empty suffix would
280/// otherwise make `ends_with("")` match every name and either over-route or treat everything as a
281/// tailnet name — both leak-prone).
282fn suffix_matches(name: &str, suffix: &str) -> bool {
283 if suffix.is_empty() {
284 return false;
285 }
286 name == suffix
287 || (name.len() > suffix.len()
288 && name.ends_with(suffix)
289 && name.as_bytes()[name.len() - suffix.len() - 1] == b'.')
290}
291
292/// Returns `true` if `name` falls under one of the tailnet search domains. Such names are
293/// authoritative MagicDNS names and are NEVER forwarded to an upstream resolver — anti-leak: a
294/// tailnet name (and the fact that it was queried) must not escape to a third-party resolver.
295fn is_tailnet_name(view: &DnsView, name: &str) -> bool {
296 view.cfg
297 .search_domains
298 .iter()
299 .any(|suffix| suffix_matches(name, suffix))
300}
301
302/// Whether `name` is an IPv6 reverse-DNS (`PTR`) name (ends in `ip6.arpa`). This fork is IPv4-only
303/// on the tailnet; an IPv6 reverse lookup must NEVER be forwarded to a third-party resolver
304/// (anti-leak: it would reveal that a tailnet v6 address — e.g. a ULA `fd7a:…` — was probed). All
305/// such queries fail closed to NXDOMAIN.
306fn is_ip6_arpa(name: &str) -> bool {
307 suffix_matches(name, "ip6.arpa")
308}
309
310/// Whether `ip` is in the Tailscale CGNAT range `100.64.0.0/10` (RFC 6598, the tailnet IPv4 space).
311/// Reverse (`PTR`) queries for these addresses are authoritative to MagicDNS: if no peer owns the
312/// IP we fail closed to NXDOMAIN rather than forwarding the probe to a third-party resolver.
313fn is_tailnet_cgnat(ip: Ipv4Addr) -> bool {
314 let o = ip.octets();
315 o[0] == 100 && (64..=127).contains(&o[1])
316}
317
318/// Decide what to do with a single DNS query against `view`: either a complete response is ready
319/// ([`Decision::Reply`]), the query should be forwarded to upstream resolvers
320/// ([`Decision::Forward`]), or the packet should be dropped without answering (`None`).
321///
322/// Pure (no I/O), factored out of the socket loop so it can be unit-tested without a netstack. It
323/// never panics and fails closed: an unknown, unroutable, or tailnet-suffix name resolves to
324/// NXDOMAIN rather than leaking to an upstream resolver.
325pub(crate) fn decide(view: &DnsView, buf: &[u8]) -> Option<Decision> {
326 // Malformed / non-query input is dropped: we never answer something we can't parse.
327 let query = decode_query(buf).ok()?;
328 let q = &query.question;
329 let id = query.id;
330 // Echo the query's RD bit (and set RA when set) on the response — Go derives the response header
331 // from the query header.
332 let rd = query.recursion_desired;
333
334 let reply =
335 |rcode, answers: &[RData]| Decision::Reply(encode_response(id, q, rd, rcode, answers));
336
337 // Fail closed: MagicDNS off, or the node doesn't accept the tailnet's DNS config
338 // (`--accept-dns` / `CorpDNS` is false) => serve nothing. The `accept_dns` gate mirrors Go
339 // applying an empty `dns.Config` when `CorpDNS` is off: the node ignores the control-pushed DNS
340 // config and refuses every query. This one read site covers the netstack responder, the peerAPI
341 // DoH server that shares the view, and (via `tun_actor::plan_intercept`) the TUN query path.
342 if !view.cfg.magic_dns || !view.accept_dns {
343 return Some(reply(Rcode::Refused, &[]));
344 }
345
346 let canon = q.name.to_canon();
347
348 // We only serve the internet (IN) class authoritatively. A non-IN class (CHAOS, HESIOD, the
349 // ANY/255 class, ...) is NOT refused outright: Go's local resolver does no class check and
350 // forwards such a query like any other name. Treat it as an unsupported authoritative type —
351 // NODATA for a tailnet name, forward for an off-tailnet name — so a `CH TXT version.bind`
352 // diagnostic or a `qclass=ANY` probe reaches upstream instead of getting REFUSED.
353 const CLASS_IN: u16 = 1;
354 if q.qclass != CLASS_IN {
355 return Some(forward_or_nodata(view, &canon, buf, id, q, rd));
356 }
357
358 Some(match &q.qtype {
359 QType::A => match view.resolve_addr(&canon, true) {
360 Some(IpAddr::V4(v4)) => reply(Rcode::NoError, &[RData::A(v4.octets())]),
361 // No overlay/extra-record answer: try split-DNS / recursive upstreams.
362 _ => forward_or_nxdomain(view, &canon, buf, id, q, rd),
363 },
364 QType::Aaaa => match view.resolve_addr(&canon, false) {
365 // A tailnet/overlay/self (or extra-record) AAAA match. Gate on IPv6: with IPv6 OFF
366 // (default) the client is IPv4-only, so answering with the overlay v6 address would
367 // only hand out an unroutable address — dead connections plus a fingerprint. Return
368 // NoError with an empty answer (NODATA) instead. With the gate ON, answer from overlay
369 // data as historically. We never forward this name to a recursive upstream either way:
370 // a positive overlay match is authoritative.
371 Some(IpAddr::V6(v6)) if view.enable_ipv6 => {
372 reply(Rcode::NoError, &[RData::Aaaa(v6.octets())])
373 }
374 Some(IpAddr::V6(_)) => reply(Rcode::NoError, &[]),
375 // No overlay/extra-record answer: split-DNS / recursive upstreams (off-tailnet names);
376 // tailnet names fail closed to NXDOMAIN inside `forward_or_nxdomain`.
377 _ => forward_or_nxdomain(view, &canon, buf, id, q, rd),
378 },
379 QType::Ptr => match q.name.ptr_to_ipv4() {
380 Some(octets) => {
381 let v4: Ipv4Addr = octets.into();
382 let ip = IpAddr::V4(v4);
383 match view.node_by_ip(ip) {
384 Some(node) => {
385 let fqdn = node.fqdn(false);
386 let labels: Vec<String> = fqdn.split('.').map(str::to_owned).collect();
387 reply(Rcode::NoError, &[RData::Ptr(Name(labels))])
388 }
389 // Anti-leak: a reverse query for an IP in the tailnet CGNAT range
390 // (100.64.0.0/10) that misses the peer set is authoritative-but-unknown; fail
391 // closed to NXDOMAIN rather than leaking the probed tailnet IP upstream. Only
392 // genuinely off-tailnet reverse queries are forwarded.
393 None if is_tailnet_cgnat(v4) => reply(Rcode::NxDomain, &[]),
394 None => forward_or_nxdomain(view, &canon, buf, id, q, rd),
395 }
396 }
397 // Anti-leak / IPv4-only-tailnet: an IPv6 reverse (`ip6.arpa`) PTR must never be
398 // forwarded — relaying it would reveal that a tailnet v6 address (e.g. a ULA `fd7a:…`)
399 // was probed. Fail closed to NXDOMAIN, exactly like the IPv4 CGNAT guard above.
400 None if is_ip6_arpa(&canon) => reply(Rcode::NxDomain, &[]),
401 None => forward_or_nxdomain(view, &canon, buf, id, q, rd),
402 },
403 // Anything else (TXT, SRV, MX, HTTPS/SVCB, CNAME, ...): we hold no authoritative record of
404 // that type, so — like Go's resolver — forward it to upstream for an off-tailnet name and
405 // return NODATA (empty NOERROR) for a tailnet-authoritative name. NOT REFUSED: a stub reads
406 // REFUSED as "this server won't serve me" and abandons the resolver, which would break
407 // ordinary client lookups (notably HTTPS/SVCB type 65, issued routinely by browsers for
408 // HTTP/3 + ECH) for the same off-tailnet names whose A/AAAA already forward.
409 QType::Other(_) => forward_or_nodata(view, &canon, buf, id, q, rd),
410 })
411}
412
413/// For a name with no overlay answer, consult the split-DNS routes + recursive resolvers and
414/// either forward (to UDP upstreams), answer authoritatively absent (NXDOMAIN), or fail soft
415/// (SERVFAIL) when an off-tailnet name simply can't be forwarded.
416///
417/// Rcode parity with Go's resolver (`net/dns/resolver/tsdns.go` resolution order + `forwarder.go`):
418/// - A **tailnet-authoritative** name (search-domain suffix) or a **negative split-DNS route**
419/// (`Upstreams::Block` — a route configured with no resolvers, which Go answers authoritatively
420/// from Hosts, so an unmatched name under it is authoritatively absent) → **NXDOMAIN**.
421/// - An **off-tailnet** name we cannot forward — no route and no resolver configured
422/// (`Upstreams::None`), or a route whose resolvers are all filtered out (IPv6-only under the
423/// IPv4-only egress) → **SERVFAIL**, matching Go forwarder.go:1207 ("no upstream resolvers set,
424/// returning SERVFAIL"). A cacheable NXDOMAIN on a transient/structural inability to forward would
425/// make a downstream stub cache the *non-existence* of a real name; SERVFAIL is a soft failure the
426/// stub retries.
427///
428/// Anti-leak: a tailnet-suffix name is authoritative and is never forwarded — neither the name nor
429/// the query leaks to a third-party resolver. (The CGNAT `in-addr.arpa` / `ip6.arpa` reverse-zone
430/// NXDOMAIN guards live in the PTR arm of [`decide`] and are likewise unaffected.)
431fn forward_or_nxdomain(
432 view: &DnsView,
433 canon: &str,
434 buf: &[u8],
435 id: u16,
436 q: &ts_dns_wire::Question,
437 rd: bool,
438) -> Decision {
439 // NXDOMAIN for authoritative-absent names; SERVFAIL for an off-tailnet name we can't forward.
440 let nxdomain = encode_response(id, q, rd, Rcode::NxDomain, &[]);
441 let servfail = encode_response(id, q, rd, Rcode::ServFail, &[]);
442
443 if is_tailnet_name(view, canon) {
444 return Decision::Reply(nxdomain);
445 }
446
447 let (resolvers, recursive) = match view.route_for(canon) {
448 Upstreams::Route(resolvers) => (resolvers, false),
449 Upstreams::Recursive(resolvers) => (resolvers, true),
450 // A negative split-DNS route is authoritative-absent (Go answers it from Hosts): NXDOMAIN.
451 Upstreams::Block => return Decision::Reply(nxdomain),
452 // No route and no resolver: an off-tailnet name we have nowhere to forward — SERVFAIL, not
453 // a cacheable non-existence (Go forwarder.go:1207).
454 Upstreams::None => return Decision::Reply(servfail),
455 };
456
457 let upstreams: Vec<SocketAddr> = resolvers
458 .iter()
459 .map(DnsResolver::udp_addr)
460 // Anti-leak / IPv6-off: only forward over IPv4 upstreams; never open a v6 socket.
461 .filter(SocketAddr::is_ipv4)
462 .collect();
463 if upstreams.is_empty() {
464 // We had a route but every resolver was filtered out (IPv6-only): we cannot forward this
465 // off-tailnet name, so soft-fail rather than assert non-existence.
466 Decision::Reply(servfail)
467 } else {
468 Decision::Forward {
469 upstreams,
470 query: buf.to_vec(),
471 // All upstreams failing at runtime is also an inability to forward, not a non-existence
472 // (Go forwarder.go:1297-1307): hand the forwarder a SERVFAIL fallback, not NXDOMAIN.
473 servfail,
474 recursive,
475 }
476 }
477}
478
479/// The DNS query types Go's resolver explicitly leaves unimplemented for a tailnet-authoritative
480/// name, answering `RCodeNotImplemented` (NOTIMP) rather than NODATA (`net/dns/resolver/tsdns.go`
481/// `resolveLocal`: `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO`). The numeric type
482/// codes: NS=2, SOA=6, HINFO=13, AXFR=252.
483fn is_unimplemented_tailnet_qtype(qtype: &ts_dns_wire::QType) -> bool {
484 matches!(qtype, ts_dns_wire::QType::Other(2 | 6 | 13 | 252))
485}
486
487/// For a query whose *qtype/qclass* we don't serve authoritatively (anything other than an IN-class
488/// A/AAAA/PTR — e.g. TXT, SRV, MX, HTTPS/SVCB, or a CHAOS-class query): forward it to upstream like
489/// any other name, but for a tailnet-authoritative name return an empty NOERROR (NODATA) instead of
490/// NXDOMAIN — except the NS/SOA/HINFO/AXFR types Go answers NOTIMP for
491/// ([`is_unimplemented_tailnet_qtype`]).
492///
493/// This mirrors Go's resolver: an authoritative name with no record of the requested type returns
494/// `RCodeSuccess` with no answers ("the name exists, but no records of that type"), NOT NXDOMAIN and
495/// NOT REFUSED; a non-authoritative name is forwarded verbatim regardless of qtype. The fork
496/// previously REFUSED every non-A/AAAA/PTR qtype (and every non-IN class) for *all* names, which a
497/// stub resolver reads as "this server won't serve me" — so it would abandon the resolver, breaking
498/// ordinary client lookups (HTTPS/SVCB type 65 issued routinely by browsers for HTTP/3 + ECH, plus
499/// MX/TXT/SRV) for off-tailnet names that A/AAAA queries already forward. Refusing these was never an
500/// anti-leak measure (the same name's A/AAAA already egresses); it was just broken interop.
501///
502/// Anti-leak is preserved: a tailnet-suffix name still never leaves this node (NODATA, not forward),
503/// exactly as the A/AAAA path keeps a positive overlay match authoritative.
504fn forward_or_nodata(
505 view: &DnsView,
506 canon: &str,
507 buf: &[u8],
508 id: u16,
509 q: &ts_dns_wire::Question,
510 rd: bool,
511) -> Decision {
512 // Authoritative tailnet name. For most unsupported types we answer NODATA (empty NOERROR) — the
513 // name exists, we just hold no record of that type. But a small set of types Go's resolver
514 // *explicitly* leaves unimplemented (`net/dns/resolver/tsdns.go` `resolveLocal`:
515 // `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO: return RCodeNotImplemented`) must
516 // answer NOTIMP, not NODATA — a `dig NS`/`SOA`/`HINFO` against the tailnet zone is otherwise a
517 // clean fingerprint distinguishing this fork from real tailscaled. Off-tailnet names are
518 // unaffected (they forward below regardless of type); this NOTIMP applies only to a name we are
519 // authoritative for.
520 if is_tailnet_name(view, canon) {
521 let rcode = if is_unimplemented_tailnet_qtype(&q.qtype) {
522 Rcode::NotImpl
523 } else {
524 Rcode::NoError
525 };
526 return Decision::Reply(encode_response(id, q, rd, rcode, &[]));
527 }
528 // Anti-leak parity with the `QType::Ptr` arm: a reverse query for a tailnet CGNAT IPv4
529 // (100.64.0.0/10) or ANY `ip6.arpa` name must NEVER egress to an upstream resolver, regardless
530 // of qtype/class — forwarding it would reveal that a specific tailnet IP was probed. The PTR arm
531 // enforces this (NXDOMAIN) but its guards live only inside that arm; without re-checking here, an
532 // exotic-qtype (TXT/ANY/…) or non-IN-class query for a tailnet reverse name would slip through to
533 // the forward path below. Fail closed to NXDOMAIN, matching the PTR arm's disposition.
534 if is_ip6_arpa(canon) {
535 return Decision::Reply(encode_response(id, q, rd, Rcode::NxDomain, &[]));
536 }
537 if let Some(octets) = q.name.ptr_to_ipv4()
538 && is_tailnet_cgnat(octets.into())
539 {
540 return Decision::Reply(encode_response(id, q, rd, Rcode::NxDomain, &[]));
541 }
542 // Off-tailnet, non-reverse-zone: forward verbatim. `forward_or_nxdomain` already forwards
543 // non-tailnet names and soft-fails (SERVFAIL) when no upstream is configured/routable; reuse it
544 // (the tailnet branch above is already handled, so its tailnet→NXDOMAIN and negative-route paths
545 // are unreachable here — this only exercises its off-tailnet forward / SERVFAIL dispositions).
546 forward_or_nxdomain(view, canon, buf, id, q, rd)
547}
548
549/// Client-side plan for a *recursive* forward: keep resolving over local UDP upstreams, or delegate
550/// the query to the active exit node's peerAPI DoH endpoint over the overlay.
551#[derive(Debug, PartialEq, Eq)]
552pub(crate) enum RecursivePlan {
553 /// Forward over UDP to these upstreams. Used when no exit node is active, or when the config
554 /// has `use_with_exit_node` resolvers (kept local even with an exit node selected).
555 Udp(Vec<SocketAddr>),
556 /// Delegate the query to the exit node's peerAPI DoH server at this overlay address.
557 Doh(SocketAddr),
558}
559
560/// Decide whether a recursive forward should stay on local UDP upstreams or be delegated to the
561/// active exit node's DoH endpoint. Pure (no I/O) so the delegation rule is unit-testable.
562///
563/// - No active exit node ([`DnsView::exit_doh`] is `None`) => keep `default_upstreams` (UDP).
564/// - Exit node active, but the config has [`use_with_exit_node`][ts_control::DnsResolver::use_with_exit_node]
565/// resolvers => those resolvers stay local (Go keeps `UseWithExitNode` resolvers when an exit node
566/// is selected); forward to them over UDP, do NOT delegate.
567/// - Exit node active, no kept-local resolvers => delegate to the exit node's DoH. Recursive DNS
568/// then egresses from the exit node, not this host (the whole point of routing through an exit
569/// node: this node's real IP is never used to resolve the peer's public names).
570pub(crate) fn recursive_plan(view: &DnsView, default_upstreams: Vec<SocketAddr>) -> RecursivePlan {
571 let Some(doh) = view.exit_doh else {
572 return RecursivePlan::Udp(default_upstreams);
573 };
574 let kept: Vec<SocketAddr> = view
575 .cfg
576 .resolvers_with_exit_node()
577 .map(DnsResolver::udp_addr)
578 // Anti-leak / IPv6-off: only ever resolve over IPv4 upstreams; never open a v6 socket.
579 .filter(SocketAddr::is_ipv4)
580 .collect();
581 if kept.is_empty() {
582 RecursivePlan::Doh(doh)
583 } else {
584 RecursivePlan::Udp(kept)
585 }
586}
587
588/// Cap a forwarded upstream response to a single UDP datagram ([`MAX_UPSTREAM_RESPONSE`]). When the
589/// response is too large it is truncated mid-message, so we set the `TC` (truncation) flag in the
590/// DNS header (byte 2, bit `0x02`) telling the stub resolver to retry over TCP — relaying a chopped
591/// answer without `TC` would surface a malformed-but-"complete" message. The flag is only set when
592/// truncation actually occurs.
593fn cap_response(mut resp: Vec<u8>) -> Vec<u8> {
594 if resp.len() > MAX_UPSTREAM_RESPONSE {
595 resp.truncate(MAX_UPSTREAM_RESPONSE);
596 // The header is 12 bytes; the TC bit lives in the second flags byte (header byte 2). A
597 // capped datagram is always >= the header length, but guard anyway to never panic.
598 if let Some(flags_hi) = resp.get_mut(2) {
599 *flags_hi |= 0x02;
600 }
601 }
602 resp
603}
604
605/// The byte length of a fixed DNS header.
606const DNS_HEADER_LEN: usize = 12;
607
608/// Return the byte range of the first question section (QNAME + QTYPE + QCLASS) within `msg`,
609/// starting just after the 12-byte header. Returns [`None`] if the name is malformed, uses a
610/// compression pointer (illegal in a question), or runs past the buffer. Used to byte-compare a
611/// forwarded query's question against the upstream response's question.
612fn question_range(msg: &[u8]) -> Option<std::ops::Range<usize>> {
613 let mut off = DNS_HEADER_LEN;
614 // Walk the QNAME label sequence to the terminating root label (0x00).
615 loop {
616 let len = *msg.get(off)? as usize;
617 // A compression pointer (top two bits set) is not valid in a question section.
618 if len & 0xC0 != 0 {
619 return None;
620 }
621 off += 1;
622 if len == 0 {
623 break; // root label: QNAME complete.
624 }
625 off = off.checked_add(len)?;
626 if off > msg.len() {
627 return None;
628 }
629 }
630 // QTYPE (2) + QCLASS (2) follow the name.
631 let end = off.checked_add(4)?;
632 if end > msg.len() {
633 return None;
634 }
635 Some(DNS_HEADER_LEN..end)
636}
637
638/// Whether `resp` is a plausible DNS response to `query`: same 16-bit transaction id, the QR
639/// (response) bit set, and a byte-identical question section (QNAME + QTYPE + QCLASS). Both buffers
640/// carry the DNS header in the first 12 bytes (id at [0..2], flags at [2..4], QR is the high bit of
641/// byte 2). Used to reject off-path/forged datagrams before relaying them back to the stub resolver
642/// as authoritative: matching only the id + QR lets an injector that guesses the id swap in an
643/// answer for a different question, so we also require the echoed question to match.
644fn response_matches_query(query: &[u8], resp: &[u8]) -> bool {
645 if query.len() < DNS_HEADER_LEN || resp.len() < DNS_HEADER_LEN {
646 return false;
647 }
648 let id_matches = query[0..2] == resp[0..2];
649 let is_response = resp[2] & 0x80 != 0;
650 if !id_matches || !is_response {
651 return false;
652 }
653 // The response must echo the exact question we asked. Parse both question sections and compare
654 // their bytes; a parse failure on either side is treated as a non-match (fail closed).
655 match (question_range(query), question_range(resp)) {
656 (Some(q), Some(r)) => query[q] == resp[r],
657 _ => false,
658 }
659}
660
661/// Forward `query` to each upstream in order over the **overlay** netstack, returning the first
662/// well-formed response, or the prebuilt `fallback` buffer if every upstream times out or errors.
663///
664/// The caller supplies `fallback` (a SERVFAIL response for a forwarded off-tailnet name — an
665/// all-upstream failure is a soft "couldn't resolve", not a cacheable non-existence, matching Go
666/// forwarder.go:1297-1307). Keeping it caller-supplied means this fn is rcode-agnostic.
667///
668/// Anti-leak: forwarding goes through the overlay netstack `channel` (a fresh `0.0.0.0:0` overlay
669/// UDP socket per query), NEVER a host socket — so the real origin IP can't leak to the resolver,
670/// and split-DNS upstreams reachable only over the tailnet/subnet-router work. Each upstream is
671/// bounded by [`UPSTREAM_TIMEOUT`]; responses are capped at [`MAX_UPSTREAM_RESPONSE`].
672pub(crate) async fn forward_query(
673 channel: &Channel,
674 upstreams: &[SocketAddr],
675 query: &[u8],
676 fallback: Vec<u8>,
677) -> Vec<u8> {
678 for upstream in upstreams {
679 let socket = match channel
680 .udp_bind(SocketAddr::from((Ipv4Addr::UNSPECIFIED, 0)))
681 .await
682 {
683 Ok(s) => s,
684 Err(e) => {
685 tracing::warn!(error = %e, %upstream, "magic dns upstream bind failed");
686 continue;
687 }
688 };
689
690 if let Err(e) = socket.send_to(*upstream, query).await {
691 tracing::warn!(error = %e, %upstream, "magic dns upstream send failed");
692 continue;
693 }
694
695 match timeout(UPSTREAM_TIMEOUT, socket.recv_from_bytes()).await {
696 Ok(Ok((from, resp))) if !resp.is_empty() => {
697 // Anti-poisoning: only accept a datagram that came from the upstream we queried
698 // and whose DNS header matches this query (same transaction id, QR=response bit
699 // set). An off-path injector racing the real answer is otherwise relayed straight
700 // back to the stub resolver as authoritative.
701 if from.ip() != upstream.ip() || !response_matches_query(query, &resp) {
702 tracing::debug!(%upstream, %from, "magic dns dropping unsolicited/mismatched response");
703 continue;
704 }
705 return cap_response(resp.to_vec());
706 }
707 Ok(Ok(_)) => continue,
708 Ok(Err(e)) => {
709 tracing::warn!(error = %e, %upstream, "magic dns upstream recv failed");
710 continue;
711 }
712 Err(_) => {
713 tracing::debug!(%upstream, "magic dns upstream timed out");
714 continue;
715 }
716 }
717 }
718 fallback
719}
720
721/// Run the receive/answer loop for the bound socket until it (or the netstack) goes away.
722///
723/// Authoritative answers are sent inline. Forwarded queries are handled on spawned tasks (each
724/// cloning the overlay `channel`) so a slow upstream never blocks other queries.
725async fn serve(
726 socket: netstack::netsock::UdpSocket,
727 rx: watch::Receiver<Arc<DnsView>>,
728 channel: Channel,
729) {
730 let socket = Arc::new(socket);
731 let mut forwards = JoinSet::new();
732 // Bounds concurrent in-flight forwards (see `MAX_INFLIGHT_FORWARDS`); a permit is held for the
733 // lifetime of each spawned forward task and released on completion.
734 let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
735 loop {
736 let (src, buf) = match socket.recv_from_bytes().await {
737 Ok(pkt) => pkt,
738 Err(e) => {
739 tracing::warn!(error = %e, "magic dns socket recv failed, stopping responder");
740 return;
741 }
742 };
743
744 // Read the freshest view per packet.
745 let view = rx.borrow().clone();
746
747 match decide(&view, &buf) {
748 // Malformed query: drop silently.
749 None => continue,
750 Some(Decision::Reply(resp)) => {
751 if let Err(e) = socket.send_to(src, &resp).await {
752 tracing::warn!(error = %e, %src, "magic dns response send failed");
753 }
754 }
755 Some(Decision::Forward {
756 upstreams,
757 query,
758 servfail,
759 recursive,
760 }) => {
761 // A recursive forward is eligible for exit-node DoH delegation; a split-DNS route
762 // always stays on its configured upstreams. Decide the plan against the current
763 // view so a query routed while an exit node is active egresses from that exit node.
764 let plan = if recursive {
765 recursive_plan(&view, upstreams)
766 } else {
767 RecursivePlan::Udp(upstreams)
768 };
769 // Fail closed at the in-flight cap: drop the query (the stub resolver retries or
770 // times out) rather than spawn an unbounded task that pins an overlay socket for up
771 // to UPSTREAM_TIMEOUT. The permit is moved into the task as a named `_permit` binding
772 // (NOT `let _ =`, which would drop it immediately) so it is released only when the
773 // task body completes.
774 let Ok(permit) = inflight.clone().try_acquire_owned() else {
775 tracing::warn!(
776 %src,
777 max = MAX_INFLIGHT_FORWARDS,
778 "magic dns drop: at max in-flight forwarded queries"
779 );
780 continue;
781 };
782 let socket = socket.clone();
783 let channel = channel.clone();
784 forwards.spawn(async move {
785 let _permit = permit;
786 let resp = match plan {
787 RecursivePlan::Udp(upstreams) => {
788 forward_query(&channel, &upstreams, &query, servfail).await
789 }
790 RecursivePlan::Doh(doh_addr) => {
791 crate::peerapi_doh::forward_doh(&channel, doh_addr, &query, servfail)
792 .await
793 }
794 };
795 if let Err(e) = socket.send_to(src, &resp).await {
796 tracing::warn!(error = %e, %src, "magic dns forwarded response send failed");
797 }
798 });
799 }
800 }
801
802 // Reap finished forward tasks without blocking. The unreaped completed-handle backlog is
803 // bounded by MAX_INFLIGHT_FORWARDS (a task spawns only after acquiring a permit, and there
804 // are at most that many), so this bounds JoinSet memory too — not just the reap cadence.
805 while forwards.try_join_next().is_some() {}
806 }
807}
808
809/// The MagicDNS responder actor.
810///
811/// Subscribes to control state (for the DNS config + self node) and peer state (for the peer
812/// database), keeping a [`DnsView`] that the spawned answer loop reads for every query.
813pub struct MagicDnsActor {
814 /// Keeps the socket-serving task alive for the lifetime of the actor.
815 _joinset: JoinSet<()>,
816 /// The latest view, shared with the answer loop.
817 view_tx: watch::Sender<Arc<DnsView>>,
818 /// The runtime [`Env`], retained so each view rebuild (the `StateUpdate` / `PeerState` handlers)
819 /// can re-read the live [`Env::accept_dns`] cell. Unlike `enable_ipv6` (snapshotted once at
820 /// spawn), `accept_dns` is runtime-settable via `Device::set_accept_dns`, so it must be read at
821 /// rebuild time — not captured once — for a toggle to reach the served view.
822 env: Env,
823 /// The overlay channel, retained so the [`Query`] handler can run a query through the same
824 /// forward path the serve loop uses ([`forward_query`] / [`forward_doh`], both binding
825 /// `0.0.0.0:0` on this channel — never a host socket).
826 channel: Channel,
827}
828
829/// A programmatic DNS query routed through the live MagicDNS responder (the `100.100.100.100` path),
830/// for [`Device::query_dns`](crate::Device::query_dns). The handler synthesizes a query packet and
831/// drives it through the exact same [`decide`]/forward logic as an on-the-wire query, so the result
832/// (and its anti-leak posture) matches what a tailnet client would observe.
833pub struct Query {
834 /// The canonical name to resolve (e.g. `example.com`, no trailing dot).
835 pub name: String,
836 /// The DNS query type (`1`=A, `28`=AAAA, `12`=PTR, or any other RFC 1035 TYPE).
837 pub qtype: u16,
838}
839
840/// The outcome of a `Query`: the raw DNS response bytes, the RCODE, and which upstream resolvers
841/// (if any) were consulted. The response is returned as raw bytes (matching Go `LocalClient.QueryDNS`)
842/// rather than parsed records — this fork's wire codec has no answer-record decoder.
843///
844/// (`Query` is the crate-internal actor message; not linked here as it is a private item — a
845/// `pub` doc cannot intra-doc-link to it without erroring under the doc-lint gate.)
846#[derive(Debug, Clone, kameo::Reply)]
847pub struct DnsQueryResult {
848 /// The raw DNS response datagram (header + question + any answer records).
849 pub response: Vec<u8>,
850 /// The RCODE from the response header's low 4 bits (`0`=NoError, `2`=SERVFAIL, `3`=NXDOMAIN,
851 /// `5`=Refused, …).
852 pub rcode: u8,
853 /// The upstream resolver(s) the query was forwarded to. For a UDP forward this is the candidate
854 /// list tried in order (the forwarder returns on the first that answers); for an exit-node DoH
855 /// forward it is the single DoH endpoint. Empty for a locally-answered query (an authoritative
856 /// tailnet name, a NODATA, or a fail-closed NXDOMAIN — nothing egressed).
857 pub resolvers_consulted: Vec<SocketAddr>,
858}
859
860impl kameo::Actor for MagicDnsActor {
861 type Args = (Env, Channel);
862 type Error = Error;
863
864 async fn on_start(
865 (env, channel): Self::Args,
866 slf: ActorRef<Self>,
867 ) -> Result<Self, Self::Error> {
868 env.subscribe::<Arc<ts_control::StateUpdate>>(&slf).await?;
869 env.subscribe::<Arc<PeerState>>(&slf).await?;
870 env.subscribe::<crate::route_updater::ActiveExitNode>(&slf)
871 .await?;
872
873 // Seed the view with the runtime's IPv6 gate (default off) and the current accept-dns value.
874 // Subsequent control/peer updates clone-and-modify this view: `enable_ipv6` (set once here)
875 // is preserved, while `accept_dns` is re-read live from `Env` on every rebuild (it is
876 // runtime-settable). The seed value is moot — no query is served before the first
877 // StateUpdate — but seeding it keeps the pre-update view internally consistent.
878 let (view_tx, view_rx) = watch::channel(Arc::new(DnsView {
879 enable_ipv6: env.enable_ipv6,
880 accept_dns: env.accept_dns(),
881 ..DnsView::default()
882 }));
883
884 let mut joinset = JoinSet::new();
885
886 // Bind the MagicDNS socket. If the bind fails we still start (fail closed: the actor just
887 // never answers anything) so a transient bind error doesn't take down the runtime.
888 let addr = SocketAddr::from((MAGIC_DNS_IP, MAGIC_DNS_PORT));
889 match channel.udp_bind(addr).await {
890 Ok(socket) => {
891 tracing::debug!(%addr, "magic dns responder bound");
892 joinset.spawn(serve(socket, view_rx.clone(), channel.clone()));
893 }
894 Err(e) => {
895 tracing::error!(error = %e, %addr, "magic dns udp bind failed; responder inert");
896 }
897 }
898
899 // When this node advertises a peerAPI port, run the single peerAPI server on the same shared
900 // view. It routes `/dns-query` to the exit-node DoH handler (recursive resolution gated by
901 // `forward_exit_egress`, see `peerapi_doh`) and `/v0/put/<name>` to the Taildrop receive
902 // handler when a store is configured (access-gated, fail-closed, see `peerapi`).
903 if let Some(port) = env.peerapi_port {
904 let channel = channel.clone();
905 let view_rx = view_rx.clone();
906 let forward_exit_egress = env.forward_exit_egress;
907 let taildrop = env.taildrop_store.clone();
908 let funnel_ingress = env.funnel_ingress.clone();
909 joinset.spawn(crate::peerapi::serve(
910 channel,
911 port,
912 view_rx,
913 forward_exit_egress,
914 taildrop,
915 funnel_ingress,
916 ));
917 }
918
919 Ok(Self {
920 _joinset: joinset,
921 view_tx,
922 env,
923 channel,
924 })
925 }
926}
927
928/// A bare SERVFAIL response header for a [`Query`] whose name could not be encoded into a
929/// well-formed query (a non-ASCII label or an over-255-byte name). A 12-byte header with QR=1 (this
930/// is a response) and RCODE=2 (server failure); no question or answer section (we never produced a
931/// parseable question). Lets `query_dns` return a definite, honest RCODE instead of an empty buffer
932/// that would read back as a fabricated NoError.
933fn servfail_response() -> Vec<u8> {
934 let mut resp = vec![0u8; 12];
935 // Flags: QR=1 (byte 2, 0x80) + RCODE=2 (low nibble of byte 3). All other bits clear.
936 resp[2] = 0x80;
937 resp[3] = 0x02;
938 resp
939}
940
941impl Message<Query> for MagicDnsActor {
942 type Reply = DnsQueryResult;
943
944 async fn handle(&mut self, query: Query, _ctx: &mut Context<Self, Self::Reply>) -> Self::Reply {
945 // Synthesize a query packet and drive it through the SAME decide/forward path the serve loop
946 // uses, against the freshest view — so the result and its anti-leak posture exactly match an
947 // on-the-wire query. The id is fixed (0): a programmatic query has no concurrent-demux need,
948 // and `response_matches_query` validates the echoed id against this same buffer.
949 //
950 // Normalize the name into labels: strip a single trailing dot (an FQDN's root marker — Go's
951 // `dnsname.ToFQDN` does the same) and drop empty labels. An empty label would otherwise encode
952 // as a lone `0x00`, identical to the QNAME root terminator, truncating the wire query and
953 // corrupting the QTYPE/QCLASS that follow.
954 let trimmed = query.name.strip_suffix('.').unwrap_or(&query.name);
955 let labels: Vec<String> = trimmed
956 .split('.')
957 .filter(|label| !label.is_empty())
958 .map(str::to_owned)
959 .collect();
960 let qtype = match query.qtype {
961 1 => ts_dns_wire::QType::A,
962 28 => ts_dns_wire::QType::Aaaa,
963 12 => ts_dns_wire::QType::Ptr,
964 other => ts_dns_wire::QType::Other(other),
965 };
966 // Class IN (1) — the only class the responder serves authoritatively (a non-IN class still
967 // forwards via `forward_or_nodata`, matching the on-the-wire path).
968 let buf = ts_dns_wire::encode_query(0, &ts_dns_wire::Name(labels), &qtype, 1);
969
970 let view = self.view_tx.borrow().clone();
971
972 let (response, resolvers_consulted) = match decide(&view, &buf) {
973 // `decide` returns `None` only when `decode_query` rejects the buffer we just built. With
974 // the name normalized above that can still happen for a name `encode_query` accepts but
975 // `decode_query` rejects — a non-ASCII/IDN label (the caller must pass punycode) or a name
976 // whose wire form exceeds 255 bytes. Surface a SERVFAIL (RCODE 2: "could not process")
977 // rather than an empty buffer that would read back as a fabricated NoError. The serve loop
978 // silently drops here (the on-wire client times out); a programmatic caller gets a
979 // definite, honest error instead.
980 None => (servfail_response(), Vec::new()),
981 Some(Decision::Reply(resp)) => (resp, Vec::new()),
982 Some(Decision::Forward {
983 upstreams,
984 query,
985 servfail,
986 recursive,
987 }) => {
988 let plan = if recursive {
989 recursive_plan(&view, upstreams)
990 } else {
991 RecursivePlan::Udp(upstreams)
992 };
993 match plan {
994 RecursivePlan::Udp(upstreams) => {
995 let resp = forward_query(&self.channel, &upstreams, &query, servfail).await;
996 (resp, upstreams)
997 }
998 RecursivePlan::Doh(doh_addr) => {
999 let resp = crate::peerapi_doh::forward_doh(
1000 &self.channel,
1001 doh_addr,
1002 &query,
1003 servfail,
1004 )
1005 .await;
1006 // The query egressed via the exit node's DoH endpoint, not a local UDP
1007 // upstream — report the DoH address as the resolver consulted.
1008 (resp, vec![doh_addr])
1009 }
1010 }
1011 }
1012 };
1013
1014 // RCODE is the low 4 bits of the second flags byte (header byte 3).
1015 let rcode = response.get(3).map(|b| b & 0x0F).unwrap_or(0);
1016
1017 DnsQueryResult {
1018 response,
1019 rcode,
1020 resolvers_consulted,
1021 }
1022 }
1023}
1024
1025impl Message<Arc<ts_control::StateUpdate>> for MagicDnsActor {
1026 type Reply = ();
1027
1028 async fn handle(
1029 &mut self,
1030 update: Arc<ts_control::StateUpdate>,
1031 _ctx: &mut Context<Self, Self::Reply>,
1032 ) {
1033 // Re-read the live accept-dns cell on every rebuild (it is runtime-settable via
1034 // `Device::set_accept_dns`); `enable_ipv6` is preserved from the seed (set once at spawn).
1035 let accept_dns = self.env.accept_dns();
1036 self.view_tx.send_modify(|view| {
1037 let mut next = (**view).clone();
1038 next.cfg = update.dns_config.clone().unwrap_or_default();
1039 next.self_node = update.node.clone();
1040 next.accept_dns = accept_dns;
1041 *view = Arc::new(next);
1042 });
1043 }
1044}
1045
1046impl Message<Arc<PeerState>> for MagicDnsActor {
1047 type Reply = ();
1048
1049 async fn handle(&mut self, state: Arc<PeerState>, _ctx: &mut Context<Self, Self::Reply>) {
1050 // Re-read the live accept-dns cell on every rebuild: `Device::set_accept_dns` triggers a
1051 // `RepublishState` that lands here, so this is the path that re-applies the gate after a
1052 // runtime toggle (covers the netstack responder AND the peerAPI DoH server sharing the view).
1053 let accept_dns = self.env.accept_dns();
1054 self.view_tx.send_modify(|view| {
1055 let mut next = (**view).clone();
1056 next.peers = Some(state.peers.clone());
1057 next.accept_dns = accept_dns;
1058 *view = Arc::new(next);
1059 });
1060 }
1061}
1062
1063impl Message<crate::route_updater::ActiveExitNode> for MagicDnsActor {
1064 type Reply = ();
1065
1066 async fn handle(
1067 &mut self,
1068 active: crate::route_updater::ActiveExitNode,
1069 _ctx: &mut Context<Self, Self::Reply>,
1070 ) {
1071 // Cache the active exit node's DoH endpoint so the serve loop delegates recursive queries
1072 // to it. `None` (no exit node, or one that can't proxy DNS) keeps recursion local. Resolving
1073 // the address here — once, from the route updater's authoritative selection — means the
1074 // serve loop never re-resolves the selector.
1075 let exit_doh = active.node.as_ref().and_then(|n| n.peerapi_doh_addr());
1076 self.view_tx.send_modify(|view| {
1077 let mut next = (**view).clone();
1078 next.exit_doh = exit_doh;
1079 *view = Arc::new(next);
1080 });
1081 }
1082}
1083
1084#[cfg(test)]
1085mod tests {
1086 use ts_control::{StableNodeId, TailnetAddress};
1087
1088 use super::*;
1089
1090 /// Test wrapper: run [`decide`] and extract the reply bytes. These tests configure no
1091 /// upstream resolvers, so an unresolved name fails closed to a `Reply` (NXDOMAIN), never a
1092 /// `Forward`; a `Forward` here is a bug and panics.
1093 fn answer(view: &DnsView, buf: &[u8]) -> Option<Vec<u8>> {
1094 match decide(view, buf)? {
1095 Decision::Reply(resp) => Some(resp),
1096 Decision::Forward { .. } => panic!("unexpected forward in authoritative-only test"),
1097 }
1098 }
1099
1100 /// Build a `Node` named `host.user.ts.net` with a known v4/v6 tailnet address.
1101 fn test_node() -> Node {
1102 Node {
1103 id: 1,
1104 stable_id: StableNodeId("n1".to_string()),
1105 hostname: "host".to_string(),
1106 user_id: 0,
1107 tailnet: Some("user.ts.net".to_string()),
1108 tags: vec![],
1109 tailnet_address: TailnetAddress {
1110 ipv4: "100.64.0.1/32".parse().unwrap(),
1111 ipv6: "fd7a::1/128".parse().unwrap(),
1112 },
1113 node_key: [0u8; 32].into(),
1114 node_key_expiry: None,
1115 online: None,
1116 last_seen: None,
1117 key_signature: vec![],
1118 machine_key: None,
1119 disco_key: None,
1120 accepted_routes: vec![],
1121 underlay_addresses: vec![],
1122 derp_region: None,
1123 cap: Default::default(),
1124 cap_map: Default::default(),
1125 peerapi_port: None,
1126 peerapi_dns_proxy: false,
1127 is_wireguard_only: false,
1128 exit_node_dns_resolvers: vec![],
1129 peer_relay: false,
1130 ssh_host_keys: vec![],
1131 service_vips: Default::default(),
1132 }
1133 }
1134
1135 /// A view with MagicDNS on and a single peer in the db.
1136 fn view_with_peer() -> DnsView {
1137 let mut db = PeerDb::default();
1138 db.upsert(&test_node());
1139
1140 DnsView {
1141 cfg: DnsConfig {
1142 magic_dns: true,
1143 search_domains: vec!["user.ts.net".to_string()],
1144 ..Default::default()
1145 },
1146 peers: Some(Arc::new(db)),
1147 self_node: None,
1148 exit_doh: None,
1149 enable_ipv6: false,
1150 accept_dns: true,
1151 }
1152 }
1153
1154 /// Build a raw DNS query buffer for `labels` with the given id, qtype, qclass.
1155 fn build_query(id: u16, labels: &[&str], qtype: u16, qclass: u16) -> Vec<u8> {
1156 let mut buf: Vec<u8> = Vec::new();
1157 buf.extend_from_slice(&id.to_be_bytes());
1158 buf.extend_from_slice(&0u16.to_be_bytes()); // flags: QR=0 (query)
1159 buf.extend_from_slice(&1u16.to_be_bytes()); // QDCOUNT
1160 buf.extend_from_slice(&0u16.to_be_bytes()); // ANCOUNT
1161 buf.extend_from_slice(&0u16.to_be_bytes()); // NSCOUNT
1162 buf.extend_from_slice(&0u16.to_be_bytes()); // ARCOUNT
1163 for label in labels {
1164 buf.push(label.len() as u8);
1165 buf.extend_from_slice(label.as_bytes());
1166 }
1167 buf.push(0); // root label
1168 buf.extend_from_slice(&qtype.to_be_bytes());
1169 buf.extend_from_slice(&qclass.to_be_bytes());
1170 buf
1171 }
1172
1173 /// Parse a response header: returns `(id, rcode, ancount)`.
1174 fn parse_header(resp: &[u8]) -> (u16, u8, u16) {
1175 let id = u16::from_be_bytes([resp[0], resp[1]]);
1176 let flags = u16::from_be_bytes([resp[2], resp[3]]);
1177 let ancount = u16::from_be_bytes([resp[6], resp[7]]);
1178 (id, (flags & 0x000F) as u8, ancount)
1179 }
1180
1181 #[test]
1182 fn a_query_for_known_peer_answers_v4() {
1183 let view = view_with_peer();
1184 let buf = build_query(0x1234, &["host", "user", "ts", "net"], 1, 1);
1185
1186 let resp = answer(&view, &buf).expect("answers");
1187 let (id, rcode, ancount) = parse_header(&resp);
1188 assert_eq!(id, 0x1234);
1189 assert_eq!(rcode, 0, "NoError");
1190 assert_eq!(ancount, 1);
1191
1192 // The trailing RDATA of the single A record is the peer's tailnet v4 octets.
1193 let tail = &resp[resp.len() - 4..];
1194 assert_eq!(tail, &[100, 64, 0, 1]);
1195 }
1196
1197 #[test]
1198 fn aaaa_query_for_known_peer_is_nodata_when_ipv6_off() {
1199 // Gate OFF (default): an AAAA query for a known overlay peer must return NoError with an
1200 // empty answer (NODATA) — NOT the overlay v6 address, which the IPv4-only client can't
1201 // route. This is the anti-fingerprint / no-dead-connections posture.
1202 let view = view_with_peer();
1203 assert!(!view.enable_ipv6, "default gate is off");
1204 let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1205
1206 let resp = answer(&view, &buf).expect("answers");
1207 let (_, rcode, ancount) = parse_header(&resp);
1208 assert_eq!(rcode, 0, "NoError (NODATA)");
1209 assert_eq!(ancount, 0, "empty answer: no AAAA handed out with IPv6 off");
1210 }
1211
1212 #[test]
1213 fn a_query_still_resolves_when_ipv6_off() {
1214 // Gate OFF must not touch the A (v4) path: the v4 answer is byte-for-byte unchanged.
1215 let view = view_with_peer();
1216 let buf = build_query(0x6, &["host", "user", "ts", "net"], 1, 1);
1217
1218 let resp = answer(&view, &buf).expect("answers");
1219 let (_, rcode, ancount) = parse_header(&resp);
1220 assert_eq!(rcode, 0, "NoError");
1221 assert_eq!(ancount, 1);
1222 let tail = &resp[resp.len() - 4..];
1223 assert_eq!(tail, &[100, 64, 0, 1]);
1224 }
1225
1226 #[test]
1227 fn aaaa_query_for_known_peer_answers_v6_when_ipv6_on() {
1228 // Gate ON: historical behavior — answer AAAA from the overlay v6 address.
1229 let mut view = view_with_peer();
1230 view.enable_ipv6 = true;
1231 let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1232
1233 let resp = answer(&view, &buf).expect("answers");
1234 let (_, rcode, ancount) = parse_header(&resp);
1235 assert_eq!(rcode, 0, "NoError");
1236 assert_eq!(ancount, 1);
1237
1238 let expected = "fd7a::1".parse::<std::net::Ipv6Addr>().unwrap().octets();
1239 let tail = &resp[resp.len() - 16..];
1240 assert_eq!(tail, expected);
1241 }
1242
1243 #[test]
1244 fn aaaa_for_unknown_tailnet_name_is_nxdomain_not_forwarded_with_ipv6_off() {
1245 // Anti-leak, unchanged by the gate: an AAAA for a name under the tailnet suffix that has no
1246 // overlay match still fails closed to NXDOMAIN — never forwarded to a recursive upstream,
1247 // even with resolvers configured. (Gate OFF only changes the *positive* overlay match into
1248 // NODATA; a non-match still routes through `forward_or_nxdomain`.)
1249 let mut db = PeerDb::default();
1250 db.upsert(&test_node());
1251 let view = DnsView {
1252 cfg: DnsConfig {
1253 magic_dns: true,
1254 search_domains: vec!["user.ts.net".to_string()],
1255 fallback_resolvers: vec![DnsResolver {
1256 transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1257 use_with_exit_node: false,
1258 }],
1259 ..Default::default()
1260 },
1261 peers: Some(Arc::new(db)),
1262 self_node: None,
1263 exit_doh: None,
1264 enable_ipv6: false,
1265 accept_dns: true,
1266 };
1267 let buf = build_query(0x5A, &["ghost", "user", "ts", "net"], 28, 1);
1268
1269 match decide(&view, &buf).expect("decides") {
1270 Decision::Reply(resp) => {
1271 let (_, rcode, _) = parse_header(&resp);
1272 assert_eq!(rcode, 3, "NxDomain: tailnet AAAA not leaked upstream");
1273 }
1274 Decision::Forward { .. } => panic!("tailnet AAAA must never be forwarded"),
1275 }
1276 }
1277
1278 #[test]
1279 fn bare_hostname_resolves() {
1280 // The name index also stores the bare hostname.
1281 let view = view_with_peer();
1282 let buf = build_query(0x7, &["host"], 1, 1);
1283
1284 let resp = answer(&view, &buf).expect("answers");
1285 let (_, rcode, ancount) = parse_header(&resp);
1286 assert_eq!(rcode, 0);
1287 assert_eq!(ancount, 1);
1288 }
1289
1290 #[test]
1291 fn unknown_off_tailnet_name_with_no_upstream_is_servfail() {
1292 // An off-tailnet name with no resolver configured cannot be forwarded. Go answers SERVFAIL
1293 // (a soft "couldn't resolve"), not NXDOMAIN — asserting non-existence of a real name we
1294 // simply have no upstream for would poison a downstream stub's negative cache. (A *tailnet*
1295 // name with no overlay match stays NXDOMAIN — see `tailnet_name_is_never_forwarded` — and a
1296 // negative split-DNS route stays NXDOMAIN — see `negative_route_is_nxdomain_not_forwarded`.)
1297 let view = view_with_peer();
1298 let buf = build_query(0x9, &["nope", "example", "com"], 1, 1);
1299
1300 let resp = answer(&view, &buf).expect("answers");
1301 let (_, rcode, ancount) = parse_header(&resp);
1302 assert_eq!(
1303 rcode, 2,
1304 "ServFail: off-tailnet name, nothing to forward to"
1305 );
1306 assert_eq!(ancount, 0);
1307 }
1308
1309 #[test]
1310 fn magic_dns_off_is_refused() {
1311 // Fail closed: with MagicDNS disabled, even a known name is refused.
1312 let mut view = view_with_peer();
1313 view.cfg.magic_dns = false;
1314 let buf = build_query(0xAB, &["host", "user", "ts", "net"], 1, 1);
1315
1316 let resp = answer(&view, &buf).expect("answers");
1317 let (_, rcode, ancount) = parse_header(&resp);
1318 assert_eq!(rcode, 5, "Refused");
1319 assert_eq!(ancount, 0);
1320 }
1321
1322 #[test]
1323 fn accept_dns_false_refuses_otherwise_answerable_query() {
1324 // The accept-dns gate (Go `CorpDNS`): with `accept_dns == false` the node ignores the
1325 // tailnet DNS config, so even a known peer name that would normally answer authoritatively is
1326 // REFUSED (the responder serves nothing) — mirroring Go applying an empty `dns.Config`.
1327 let mut view = view_with_peer();
1328 assert!(view.cfg.magic_dns, "MagicDNS itself is on");
1329 view.accept_dns = false;
1330 let buf = build_query(0xDD, &["host", "user", "ts", "net"], 1, 1);
1331
1332 let resp = answer(&view, &buf).expect("answers");
1333 let (_, rcode, ancount) = parse_header(&resp);
1334 assert_eq!(rcode, 5, "Refused: accept_dns off ⇒ serve nothing");
1335 assert_eq!(ancount, 0);
1336
1337 // Flip accept_dns back ON (the config was never destroyed, only gated): the same query now
1338 // answers authoritatively — proving the OFF→ON restore is automatic.
1339 view.accept_dns = true;
1340 let resp = answer(&view, &buf).expect("answers");
1341 let (_, rcode, ancount) = parse_header(&resp);
1342 assert_eq!(rcode, 0, "NoError: accept_dns on ⇒ the known peer answers");
1343 assert_eq!(ancount, 1);
1344 let tail = &resp[resp.len() - 4..];
1345 assert_eq!(tail, &[100, 64, 0, 1], "the peer's tailnet v4 is served");
1346 }
1347
1348 #[test]
1349 fn default_view_serves_nothing() {
1350 // The default (no dns_config seen) has magic_dns == false: fail closed.
1351 let view = DnsView::default();
1352 let buf = build_query(0x1, &["host", "user", "ts", "net"], 1, 1);
1353
1354 let resp = answer(&view, &buf).expect("answers");
1355 let (_, rcode, _) = parse_header(&resp);
1356 assert_eq!(rcode, 5, "Refused");
1357 }
1358
1359 #[test]
1360 fn unsupported_qtype_on_tailnet_name_is_nodata_not_refused() {
1361 // TXT (type 16) for a tailnet-authoritative name: the name exists but we hold no TXT, so —
1362 // like Go — return NODATA (empty NOERROR), NOT REFUSED (which would make a stub abandon the
1363 // resolver) and NOT NXDOMAIN (the name exists). The name is never forwarded (anti-leak).
1364 let view = view_with_peer();
1365 let buf = build_query(0x1, &["host", "user", "ts", "net"], 16, 1);
1366
1367 let resp = answer(&view, &buf).expect("answers");
1368 let (_, rcode, ancount) = parse_header(&resp);
1369 assert_eq!(rcode, 0, "NoError (NODATA), not Refused");
1370 assert_eq!(ancount, 0, "no answer records (NODATA)");
1371 }
1372
1373 #[test]
1374 fn unsupported_qtype_off_tailnet_forwards_or_servfails() {
1375 // A non-A/AAAA/PTR qtype for an OFF-tailnet name must be forwardable like A/AAAA — never
1376 // REFUSED. With no upstream configured in this view it soft-fails to SERVFAIL (the same
1377 // disposition an off-tailnet A query gets here), proving the qtype no longer short-circuits
1378 // to REFUSED. HTTPS/SVCB is type 65 (the browser HTTP/3 + ECH case the old REFUSED broke).
1379 let view = view_with_peer();
1380 let buf = build_query(0x1, &["example", "com"], 65, 1);
1381
1382 let resp = answer(&view, &buf).expect("answers");
1383 let (_, rcode, _) = parse_header(&resp);
1384 assert_eq!(
1385 rcode, 2,
1386 "off-tailnet, no upstream -> SERVFAIL (forwardable, not Refused)"
1387 );
1388 }
1389
1390 #[test]
1391 fn unimplemented_qtype_on_tailnet_name_is_notimp() {
1392 // NS (2), SOA (6), HINFO (13), AXFR (252) for a tailnet-authoritative name must answer NOTIMP
1393 // (rcode 4), matching Go `resolveLocal`'s `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR,
1394 // dns.TypeHINFO: return RCodeNotImplemented`. Returning NODATA (rcode 0) here was a clean
1395 // fingerprint (a `dig SOA user.ts.net` answer differs from real tailscaled). The name is
1396 // still never forwarded (anti-leak).
1397 let view = view_with_peer();
1398 for qtype in [2u16, 6, 13, 252] {
1399 let buf = build_query(0x1, &["host", "user", "ts", "net"], qtype, 1);
1400 let resp = answer(&view, &buf).expect("answers");
1401 let (_, rcode, ancount) = parse_header(&resp);
1402 assert_eq!(rcode, 4, "qtype {qtype} on a tailnet name must be NOTIMP");
1403 assert_eq!(ancount, 0, "NOTIMP carries no answer records");
1404 }
1405 }
1406
1407 #[test]
1408 fn unimplemented_qtype_off_tailnet_still_forwards_not_notimp() {
1409 // The NOTIMP disposition is ONLY for a name we are authoritative for. An NS query for an
1410 // off-tailnet name must still forward (here: SERVFAIL, no upstream) — NOT NOTIMP — exactly
1411 // like the off-tailnet HTTPS/SVCB case above. Guards the NOTIMP change against over-reach.
1412 let view = view_with_peer();
1413 let buf = build_query(0x1, &["example", "com"], 2, 1); // NS, off-tailnet
1414 let resp = answer(&view, &buf).expect("answers");
1415 let (_, rcode, _) = parse_header(&resp);
1416 assert_eq!(
1417 rcode, 2,
1418 "off-tailnet NS -> SERVFAIL (forwardable), not NOTIMP"
1419 );
1420 }
1421
1422 #[test]
1423 fn malformed_query_is_dropped() {
1424 // A response (QR bit set) is not a query; we drop it (no answer).
1425 let mut buf = build_query(0x1, &["host"], 1, 1);
1426 buf[2] = 0x80; // set QR bit
1427 assert!(answer(&view_with_peer(), &buf).is_none());
1428 }
1429
1430 #[test]
1431 fn ptr_for_known_ip_answers_fqdn() {
1432 let view = view_with_peer();
1433 // Reverse name for 100.64.0.1 => 1.0.64.100.in-addr.arpa
1434 let buf = build_query(0x33, &["1", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1435
1436 let resp = answer(&view, &buf).expect("answers");
1437 let (_, rcode, ancount) = parse_header(&resp);
1438 assert_eq!(rcode, 0, "NoError");
1439 assert_eq!(ancount, 1);
1440
1441 // The PTR rdata encodes the peer's fqdn "host.user.ts.net" as length-prefixed labels.
1442 let expected = {
1443 let mut out = Vec::new();
1444 for label in ["host", "user", "ts", "net"] {
1445 out.push(label.len() as u8);
1446 out.extend_from_slice(label.as_bytes());
1447 }
1448 out.push(0);
1449 out
1450 };
1451 let tail = &resp[resp.len() - expected.len()..];
1452 assert_eq!(tail, expected.as_slice());
1453 }
1454
1455 #[test]
1456 fn ptr_for_unknown_public_ip_off_tailnet_is_servfail() {
1457 let view = view_with_peer();
1458 // 9.9.9.9 is a public IP, not a known tailnet IP and not in the CGNAT reverse zone — so its
1459 // reverse query is an ordinary off-tailnet name. With no upstream to forward it to, that is
1460 // SERVFAIL (soft), not NXDOMAIN. (A CGNAT/ip6.arpa reverse for an unmatched tailnet IP still
1461 // fails closed to NXDOMAIN as an anti-leak guard — see `ptr_for_unknown_tailnet_ip_*`.)
1462 let buf = build_query(0x34, &["9", "9", "9", "9", "in-addr", "arpa"], 12, 1);
1463
1464 let resp = answer(&view, &buf).expect("answers");
1465 let (_, rcode, _) = parse_header(&resp);
1466 assert_eq!(
1467 rcode, 2,
1468 "ServFail: off-tailnet public-IP reverse, no upstream"
1469 );
1470 }
1471
1472 #[test]
1473 fn ptr_for_unknown_tailnet_ip_is_nxdomain_not_forwarded() {
1474 // A view WITH an upstream resolver: an off-tailnet reverse query would forward, but a
1475 // reverse query for an unmatched IP in the CGNAT range (100.64.0.0/10) must fail closed to
1476 // NXDOMAIN — the probed tailnet IP must never leak upstream.
1477 let mut db = PeerDb::default();
1478 db.upsert(&test_node());
1479 let view = DnsView {
1480 cfg: DnsConfig {
1481 magic_dns: true,
1482 search_domains: vec!["user.ts.net".to_string()],
1483 fallback_resolvers: vec![DnsResolver {
1484 transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1485 use_with_exit_node: false,
1486 }],
1487 ..Default::default()
1488 },
1489 peers: Some(Arc::new(db)),
1490 self_node: None,
1491 exit_doh: None,
1492 enable_ipv6: false,
1493 accept_dns: true,
1494 };
1495
1496 // 100.64.0.9 is in CGNAT range but owned by no peer => NXDOMAIN, never a Forward.
1497 let buf = build_query(0x35, &["9", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1498 match decide(&view, &buf).expect("decides") {
1499 Decision::Reply(resp) => {
1500 let (_, rcode, _) = parse_header(&resp);
1501 assert_eq!(rcode, 3, "NxDomain");
1502 }
1503 Decision::Forward { .. } => {
1504 panic!("tailnet CGNAT PTR must never be forwarded upstream")
1505 }
1506 }
1507 }
1508
1509 /// Anti-leak regression for the exotic-qtype forward path: a NON-PTR query (TXT, type 16) for a
1510 /// tailnet CGNAT reverse name, with an upstream configured, must STILL fail closed to NXDOMAIN —
1511 /// never forward. The PTR arm guards this, but the `QType::Other` path routes through
1512 /// `forward_or_nodata`, which must re-apply the reverse-zone guard or the tailnet IP leaks.
1513 #[test]
1514 fn exotic_qtype_for_tailnet_cgnat_reverse_is_nxdomain_not_forwarded() {
1515 let mut db = PeerDb::default();
1516 db.upsert(&test_node());
1517 let view = DnsView {
1518 cfg: DnsConfig {
1519 magic_dns: true,
1520 search_domains: vec!["user.ts.net".to_string()],
1521 fallback_resolvers: vec![DnsResolver {
1522 transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1523 use_with_exit_node: false,
1524 }],
1525 ..Default::default()
1526 },
1527 peers: Some(Arc::new(db)),
1528 self_node: None,
1529 exit_doh: None,
1530 enable_ipv6: false,
1531 accept_dns: true,
1532 };
1533
1534 // TXT (16) for a CGNAT reverse name => NXDOMAIN, never a Forward (no tailnet-IP leak).
1535 let buf = build_query(0x36, &["9", "0", "64", "100", "in-addr", "arpa"], 16, 1);
1536 match decide(&view, &buf).expect("decides") {
1537 Decision::Reply(resp) => {
1538 let (_, rcode, _) = parse_header(&resp);
1539 assert_eq!(rcode, 3, "NxDomain");
1540 }
1541 Decision::Forward { .. } => {
1542 panic!("a non-PTR query for a tailnet CGNAT reverse name must never forward")
1543 }
1544 }
1545 }
1546
1547 /// Same anti-leak guard for an `ip6.arpa` reverse name under an exotic qtype: must NXDOMAIN, not
1548 /// forward (revealing a tailnet ULA was probed).
1549 #[test]
1550 fn exotic_qtype_for_ip6_arpa_is_nxdomain_not_forwarded() {
1551 let view = view_with_routes(
1552 std::collections::BTreeMap::new(),
1553 vec![udp("9.9.9.9:53")],
1554 vec![],
1555 );
1556 // An ip6.arpa reverse name with a TXT (16) qtype must fail closed.
1557 let buf = build_query(
1558 0x37,
1559 &[
1560 "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
1561 "a", "7", "d", "f", "ip6", "arpa",
1562 ],
1563 16,
1564 1,
1565 );
1566 match decide(&view, &buf).expect("decides") {
1567 Decision::Reply(resp) => {
1568 let (_, rcode, _) = parse_header(&resp);
1569 assert_eq!(rcode, 3, "NxDomain");
1570 }
1571 Decision::Forward { .. } => panic!("an ip6.arpa exotic-qtype query must never forward"),
1572 }
1573 }
1574
1575 #[test]
1576 fn is_tailnet_cgnat_classifies_range() {
1577 assert!(is_tailnet_cgnat("100.64.0.0".parse().unwrap()));
1578 assert!(is_tailnet_cgnat("100.64.0.1".parse().unwrap()));
1579 assert!(is_tailnet_cgnat("100.127.255.255".parse().unwrap()));
1580 // Outside the /10:
1581 assert!(!is_tailnet_cgnat("100.63.255.255".parse().unwrap()));
1582 assert!(!is_tailnet_cgnat("100.128.0.0".parse().unwrap()));
1583 assert!(!is_tailnet_cgnat("9.9.9.9".parse().unwrap()));
1584 // The MagicDNS resolver IP 100.100.100.100 is itself inside the /10.
1585 assert!(is_tailnet_cgnat("100.100.100.100".parse().unwrap()));
1586 }
1587
1588 #[test]
1589 fn response_matches_query_validates_id_and_qr() {
1590 // query id 0x1234, QR=0
1591 let query = build_query(0x1234, &["a", "com"], 1, 1);
1592
1593 // A well-formed response: same id, QR=1.
1594 let mut good = query.clone();
1595 good[2] |= 0x80;
1596 assert!(response_matches_query(&query, &good));
1597
1598 // Same id but QR still 0 (not a response): rejected.
1599 assert!(!response_matches_query(&query, &query));
1600
1601 // QR=1 but a different transaction id: rejected (off-path forgery).
1602 let mut wrong_id = good.clone();
1603 wrong_id[0] ^= 0xFF;
1604 assert!(!response_matches_query(&query, &wrong_id));
1605
1606 // Too-short buffers: rejected.
1607 assert!(!response_matches_query(&query, &[0u8; 2]));
1608 assert!(!response_matches_query(&[0u8; 3], &good));
1609 }
1610
1611 #[test]
1612 fn self_node_resolves_when_no_peer_match() {
1613 // With the peer db empty but a self node set, the self node answers for its own name.
1614 let view = DnsView {
1615 cfg: DnsConfig {
1616 magic_dns: true,
1617 search_domains: vec![],
1618 ..Default::default()
1619 },
1620 peers: None,
1621 self_node: Some(test_node()),
1622 exit_doh: None,
1623 enable_ipv6: false,
1624 accept_dns: true,
1625 };
1626 let buf = build_query(0x44, &["host", "user", "ts", "net"], 1, 1);
1627
1628 let resp = answer(&view, &buf).expect("answers");
1629 let (_, rcode, ancount) = parse_header(&resp);
1630 assert_eq!(rcode, 0);
1631 assert_eq!(ancount, 1);
1632 let tail = &resp[resp.len() - 4..];
1633 assert_eq!(tail, &[100, 64, 0, 1]);
1634 }
1635
1636 #[test]
1637 fn partially_qualified_name_resolves_via_search_domain() {
1638 // "host.user" is not indexed directly, but the "user.ts.net" search domain qualifies it
1639 // to "host.user.user.ts.net"... which does NOT match. The realistic case is "host" (bare,
1640 // already indexed) and "host.user.ts.net" (fqdn). Verify a name needing suffix expansion:
1641 // with search domain "ts.net" the partially-qualified "host.user" => "host.user.ts.net".
1642 let mut view = view_with_peer();
1643 view.cfg.search_domains = vec!["ts.net".to_string()];
1644 let buf = build_query(0x55, &["host", "user"], 1, 1);
1645
1646 let resp = answer(&view, &buf).expect("answers");
1647 let (_, rcode, ancount) = parse_header(&resp);
1648 assert_eq!(rcode, 0, "NoError via search-domain expansion");
1649 assert_eq!(ancount, 1);
1650 let tail = &resp[resp.len() - 4..];
1651 assert_eq!(tail, &[100, 64, 0, 1]);
1652 }
1653
1654 #[test]
1655 fn extra_record_a_answers_when_no_peer_match() {
1656 // A control-pushed static A record answers for a non-peer name, fail-closed otherwise.
1657 let mut view = view_with_peer();
1658 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1659 name: "static.user.ts.net".to_string(),
1660 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1661 }];
1662 let buf = build_query(0x77, &["static", "user", "ts", "net"], 1, 1);
1663
1664 let resp = answer(&view, &buf).expect("answers");
1665 let (_, rcode, ancount) = parse_header(&resp);
1666 assert_eq!(rcode, 0, "NoError from extra record");
1667 assert_eq!(ancount, 1);
1668 let tail = &resp[resp.len() - 4..];
1669 assert_eq!(tail, &[100, 64, 0, 9]);
1670 }
1671
1672 #[test]
1673 fn extra_record_matches_query_case_insensitively() {
1674 // The query name is canonicalized (lowercased) at decode time, so a mixed-case query
1675 // matches a lowercase extra record.
1676 let mut view = view_with_peer();
1677 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1678 name: "static.user.ts.net".to_string(),
1679 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1680 }];
1681 let buf = build_query(0x7A, &["Static", "User", "TS", "net"], 1, 1);
1682
1683 let resp = answer(&view, &buf).expect("answers");
1684 let (_, rcode, ancount) = parse_header(&resp);
1685 assert_eq!(rcode, 0, "NoError: case-insensitive match");
1686 assert_eq!(ancount, 1);
1687 let tail = &resp[resp.len() - 4..];
1688 assert_eq!(tail, &[100, 64, 0, 9]);
1689 }
1690
1691 #[test]
1692 fn extra_record_not_expanded_by_search_domain() {
1693 // Unlike peer names, an extra record is matched as an FQDN only: a bare query that would
1694 // need search-domain expansion to reach the record name must NOT resolve.
1695 let mut view = view_with_peer();
1696 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1697 name: "static.user.ts.net".to_string(),
1698 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1699 }];
1700 // "static" would only reach "static.user.ts.net" via the "user.ts.net" search domain.
1701 let buf = build_query(0x7B, &["static"], 1, 1);
1702
1703 let resp = answer(&view, &buf).expect("answers");
1704 let (_, rcode, _) = parse_header(&resp);
1705 // Not search-expanded → treated as the bare off-tailnet name "static", which has no upstream
1706 // here, so SERVFAIL (soft). The point of the test — that the extra record is NOT reachable
1707 // via search expansion — holds regardless of the failure rcode.
1708 assert_eq!(
1709 rcode, 2,
1710 "ServFail: bare 'static' is not search-expanded to the extra record"
1711 );
1712 }
1713
1714 #[test]
1715 fn extra_record_aaaa_family_is_isolated() {
1716 // An A-only extra record must NOT answer an AAAA query for the same name (NxDomain).
1717 let mut view = view_with_peer();
1718 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1719 name: "v4only.user.ts.net".to_string(),
1720 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1721 }];
1722 let buf = build_query(0x78, &["v4only", "user", "ts", "net"], 28, 1);
1723
1724 let resp = answer(&view, &buf).expect("answers");
1725 let (_, rcode, _) = parse_header(&resp);
1726 assert_eq!(rcode, 3, "NxDomain: A record does not satisfy AAAA");
1727 }
1728
1729 #[test]
1730 fn extra_record_ignored_when_magic_dns_off() {
1731 // Fail closed: extra records are never served while MagicDNS is disabled.
1732 let mut view = view_with_peer();
1733 view.cfg.magic_dns = false;
1734 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1735 name: "static.user.ts.net".to_string(),
1736 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1737 }];
1738 let buf = build_query(0x79, &["static", "user", "ts", "net"], 1, 1);
1739
1740 let resp = answer(&view, &buf).expect("answers");
1741 let (_, rcode, _) = parse_header(&resp);
1742 assert_eq!(rcode, 5, "Refused");
1743 }
1744
1745 #[test]
1746 fn non_in_class_on_tailnet_name_is_nodata_not_answered_as_in() {
1747 // A CHAOS-class (3) query for a tailnet name must NOT be answered as IN (no overlay A), and
1748 // must NOT be REFUSED (Go does no class check on the local path). It's an unsupported
1749 // authoritative class -> NODATA (empty NOERROR), and never forwarded (tailnet name).
1750 let view = view_with_peer();
1751 let buf = build_query(0x66, &["host", "user", "ts", "net"], 1, 3);
1752
1753 let resp = answer(&view, &buf).expect("answers");
1754 let (_, rcode, ancount) = parse_header(&resp);
1755 assert_eq!(
1756 rcode, 0,
1757 "NoError (NODATA), not Refused and not an IN answer"
1758 );
1759 assert_eq!(
1760 ancount, 0,
1761 "must not hand out the overlay A for a non-IN class"
1762 );
1763 }
1764
1765 #[test]
1766 fn non_in_class_off_tailnet_forwards_or_servfails() {
1767 // A non-IN class for an OFF-tailnet name is forwardable (Go forwards it), never REFUSED.
1768 // No upstream here -> SERVFAIL, proving the class gate no longer short-circuits to Refused.
1769 let view = view_with_peer();
1770 let buf = build_query(0x66, &["example", "com"], 1, 3);
1771
1772 let resp = answer(&view, &buf).expect("answers");
1773 let (_, rcode, _) = parse_header(&resp);
1774 assert_eq!(
1775 rcode, 2,
1776 "off-tailnet non-IN class, no upstream -> SERVFAIL, not Refused"
1777 );
1778 }
1779
1780 /// A view with MagicDNS on, the `user.ts.net` search domain, and the given split-DNS routes
1781 /// + global resolvers.
1782 fn view_with_routes(
1783 routes: std::collections::BTreeMap<String, Vec<DnsResolver>>,
1784 resolvers: Vec<DnsResolver>,
1785 fallback: Vec<DnsResolver>,
1786 ) -> DnsView {
1787 DnsView {
1788 cfg: DnsConfig {
1789 magic_dns: true,
1790 search_domains: vec!["user.ts.net".to_string()],
1791 routes,
1792 resolvers,
1793 fallback_resolvers: fallback,
1794 ..Default::default()
1795 },
1796 peers: None,
1797 self_node: None,
1798 exit_doh: None,
1799 enable_ipv6: false,
1800 accept_dns: true,
1801 }
1802 }
1803
1804 fn udp(addr: &str) -> DnsResolver {
1805 DnsResolver {
1806 transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
1807 use_with_exit_node: false,
1808 }
1809 }
1810
1811 #[test]
1812 fn split_dns_route_forwards_to_matching_upstream() {
1813 let mut routes = std::collections::BTreeMap::new();
1814 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1815 let view = view_with_routes(routes, vec![], vec![]);
1816 let buf = build_query(0x100, &["api", "corp", "example"], 1, 1);
1817
1818 match decide(&view, &buf).expect("decides") {
1819 Decision::Forward { upstreams, .. } => {
1820 assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1821 }
1822 Decision::Reply(_) => panic!("expected forward to the split-DNS upstream"),
1823 }
1824 }
1825
1826 #[test]
1827 fn exotic_qtype_off_tailnet_forwards_to_upstream() {
1828 // The core of the fix: an HTTPS/SVCB (type 65) query for an off-tailnet name with a matching
1829 // route must FORWARD to the upstream (verbatim), exactly like an A query would — not REFUSE
1830 // and not NXDOMAIN. This is the browser HTTP/3 + ECH case the old blanket-REFUSE broke.
1831 let mut routes = std::collections::BTreeMap::new();
1832 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1833 let view = view_with_routes(routes, vec![], vec![]);
1834 let buf = build_query(0x102, &["api", "corp", "example"], 65, 1);
1835
1836 match decide(&view, &buf).expect("decides") {
1837 Decision::Forward {
1838 upstreams, query, ..
1839 } => {
1840 assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1841 assert_eq!(query, buf, "the exotic-qtype query is forwarded verbatim");
1842 }
1843 Decision::Reply(_) => {
1844 panic!("an off-tailnet HTTPS-record query must forward, not reply")
1845 }
1846 }
1847 }
1848
1849 #[test]
1850 fn non_in_class_off_tailnet_forwards_to_upstream() {
1851 // A non-IN class for an off-tailnet routed name forwards too (Go does no class check on the
1852 // local path). Proves the class gate no longer short-circuits to REFUSED before routing.
1853 let mut routes = std::collections::BTreeMap::new();
1854 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1855 let view = view_with_routes(routes, vec![], vec![]);
1856 let buf = build_query(0x103, &["api", "corp", "example"], 1, 3);
1857
1858 match decide(&view, &buf).expect("decides") {
1859 Decision::Forward { upstreams, .. } => {
1860 assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1861 }
1862 Decision::Reply(_) => {
1863 panic!("an off-tailnet non-IN-class query must forward, not reply")
1864 }
1865 }
1866 }
1867
1868 /// The local responder bounds concurrent in-flight forwards: `serve` acquires one
1869 /// `MAX_INFLIGHT_FORWARDS` permit per spawned forward task and drops the query fail-closed when
1870 /// the pool is exhausted (a client spraying forwardable names can't open unbounded overlay
1871 /// sockets). This pins the gating semantics `serve` relies on — drained pool refuses a new
1872 /// permit; releasing one restores capacity — and the cap constant itself. (The async `serve`
1873 /// loop has no netstack-free test seam, so the semaphore behavior is exercised directly here, the
1874 /// same `Arc<Semaphore>::try_acquire_owned` the loop uses.)
1875 #[test]
1876 fn forward_inflight_cap_fails_closed_when_saturated() {
1877 use std::sync::Arc;
1878
1879 use tokio::sync::Semaphore;
1880
1881 let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
1882
1883 // Drain every permit (one per concurrently in-flight forward).
1884 let mut held = Vec::with_capacity(MAX_INFLIGHT_FORWARDS);
1885 for _ in 0..MAX_INFLIGHT_FORWARDS {
1886 held.push(
1887 inflight
1888 .clone()
1889 .try_acquire_owned()
1890 .expect("permits available below the cap"),
1891 );
1892 }
1893
1894 // At the cap, the next forward is refused — `serve` would drop the query, not spawn.
1895 assert!(
1896 inflight.clone().try_acquire_owned().is_err(),
1897 "a saturated forward pool must refuse a new permit (fail closed)"
1898 );
1899
1900 // Completing an in-flight forward releases its permit and restores capacity.
1901 drop(held.pop());
1902 assert!(
1903 inflight.clone().try_acquire_owned().is_ok(),
1904 "releasing a permit must let the next forward proceed"
1905 );
1906 }
1907
1908 /// A permit moved into a spawned forward task (the `let _permit = permit;` shape `serve` uses)
1909 /// must stay held for the *whole* task body — across the `.await` on the upstream — and release
1910 /// only when the task completes. This guards the regression the saturation test above can't see:
1911 /// "tidying" `let _permit = permit;` to `let _ = permit;` would drop the permit immediately,
1912 /// re-opening unbounded concurrency while leaving the synchronous drain/restore test green. Here a
1913 /// 1-permit pool is consumed by a task that holds it across a yield; the pool must read empty
1914 /// while the task runs and refill once it finishes.
1915 #[tokio::test]
1916 async fn forward_permit_is_held_for_the_task_lifetime_not_dropped_early() {
1917 use std::sync::Arc;
1918
1919 use tokio::sync::Semaphore;
1920
1921 let inflight = Arc::new(Semaphore::new(1));
1922 let permit = inflight
1923 .clone()
1924 .try_acquire_owned()
1925 .expect("the sole permit is available");
1926
1927 let (started_tx, started_rx) = tokio::sync::oneshot::channel();
1928 let (release_tx, release_rx) = tokio::sync::oneshot::channel();
1929 let task = tokio::spawn(async move {
1930 // Same shape as `serve`'s spawned forward: the permit is a named binding moved into the
1931 // task, so it lives until the body ends — not dropped at the `let`.
1932 let _permit = permit;
1933 started_tx.send(()).unwrap();
1934 // Stand in for the `.await` on the upstream forward.
1935 release_rx.await.unwrap();
1936 });
1937
1938 started_rx.await.unwrap();
1939 // While the task runs, the permit it moved in is still held — the pool is empty.
1940 assert!(
1941 inflight.clone().try_acquire_owned().is_err(),
1942 "a permit moved into a running task must stay held across its await"
1943 );
1944
1945 // Let the task finish; its permit drops with the body and capacity returns.
1946 release_tx.send(()).unwrap();
1947 task.await.unwrap();
1948 assert!(
1949 inflight.clone().try_acquire_owned().is_ok(),
1950 "the permit must be released once the task body completes"
1951 );
1952 }
1953
1954 #[test]
1955 fn longest_suffix_route_wins() {
1956 let mut routes = std::collections::BTreeMap::new();
1957 routes.insert("example".to_string(), vec![udp("10.0.0.1:53")]);
1958 routes.insert("corp.example".to_string(), vec![udp("10.0.0.2:53")]);
1959 let view = view_with_routes(routes, vec![], vec![]);
1960 let buf = build_query(0x101, &["api", "corp", "example"], 1, 1);
1961
1962 match decide(&view, &buf).expect("decides") {
1963 Decision::Forward { upstreams, .. } => {
1964 assert_eq!(
1965 upstreams,
1966 vec!["10.0.0.2:53".parse().unwrap()],
1967 "longer suffix wins"
1968 );
1969 }
1970 Decision::Reply(_) => panic!("expected forward"),
1971 }
1972 }
1973
1974 #[test]
1975 fn negative_route_is_nxdomain_not_forwarded() {
1976 // An empty upstream list is a negative route: fail closed, never forward.
1977 let mut routes = std::collections::BTreeMap::new();
1978 routes.insert("blocked.example".to_string(), vec![]);
1979 let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
1980 let buf = build_query(0x102, &["x", "blocked", "example"], 1, 1);
1981
1982 match decide(&view, &buf).expect("decides") {
1983 Decision::Reply(resp) => {
1984 let (_, rcode, _) = parse_header(&resp);
1985 assert_eq!(rcode, 3, "NxDomain: negative route is not forwarded");
1986 }
1987 Decision::Forward { .. } => panic!("negative route must not forward"),
1988 }
1989 }
1990
1991 #[test]
1992 fn unrouted_name_forwards_to_fallback_then_global() {
1993 // No route matches: fallback resolvers are preferred over global resolvers.
1994 let view = view_with_routes(
1995 std::collections::BTreeMap::new(),
1996 vec![udp("8.8.8.8:53")],
1997 vec![udp("1.1.1.1:53")],
1998 );
1999 let buf = build_query(0x103, &["example", "com"], 1, 1);
2000
2001 match decide(&view, &buf).expect("decides") {
2002 Decision::Forward { upstreams, .. } => {
2003 assert_eq!(
2004 upstreams,
2005 vec!["1.1.1.1:53".parse().unwrap()],
2006 "fallback preferred"
2007 );
2008 }
2009 Decision::Reply(_) => panic!("expected forward to fallback"),
2010 }
2011 }
2012
2013 #[test]
2014 fn unrouted_name_forwards_to_global_when_no_fallback() {
2015 let view = view_with_routes(
2016 std::collections::BTreeMap::new(),
2017 vec![udp("8.8.8.8:53")],
2018 vec![],
2019 );
2020 let buf = build_query(0x104, &["example", "com"], 1, 1);
2021
2022 match decide(&view, &buf).expect("decides") {
2023 Decision::Forward { upstreams, .. } => {
2024 assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2025 }
2026 Decision::Reply(_) => panic!("expected forward to global resolver"),
2027 }
2028 }
2029
2030 #[test]
2031 fn tailnet_name_is_never_forwarded() {
2032 // Anti-leak: a name under a tailnet search domain that has no overlay match must fail
2033 // closed to NXDOMAIN, never leak to an upstream resolver, even with resolvers configured.
2034 let view = view_with_routes(
2035 std::collections::BTreeMap::new(),
2036 vec![udp("8.8.8.8:53")],
2037 vec![udp("1.1.1.1:53")],
2038 );
2039 // "ghost.user.ts.net" is under the tailnet suffix but matches no peer.
2040 let buf = build_query(0x105, &["ghost", "user", "ts", "net"], 1, 1);
2041
2042 match decide(&view, &buf).expect("decides") {
2043 Decision::Reply(resp) => {
2044 let (_, rcode, _) = parse_header(&resp);
2045 assert_eq!(rcode, 3, "NxDomain: tailnet name not leaked upstream");
2046 }
2047 Decision::Forward { .. } => panic!("tailnet name must never be forwarded"),
2048 }
2049 }
2050
2051 #[test]
2052 fn no_resolvers_off_tailnet_is_servfail_not_nxdomain() {
2053 // No route, no resolvers: an OFF-tailnet name cannot be forwarded. Go answers SERVFAIL
2054 // (forwarder.go:1207 "no upstream resolvers set, returning SERVFAIL"), NOT NXDOMAIN — a
2055 // cacheable non-existence for a real name we merely couldn't forward would poison downstream
2056 // stub caches. We still never forward (the name does not leak); we just soft-fail.
2057 let view = view_with_routes(std::collections::BTreeMap::new(), vec![], vec![]);
2058 let buf = build_query(0x106, &["example", "com"], 1, 1);
2059
2060 match decide(&view, &buf).expect("decides") {
2061 Decision::Reply(resp) => {
2062 let (_, rcode, _) = parse_header(&resp);
2063 assert_eq!(
2064 rcode, 2,
2065 "ServFail: off-tailnet name with no upstream to forward to"
2066 );
2067 }
2068 Decision::Forward { .. } => panic!("must not forward with no resolvers"),
2069 }
2070 }
2071
2072 #[test]
2073 fn route_with_only_ipv6_upstreams_off_tailnet_is_servfail() {
2074 // A split-DNS route exists but every resolver is IPv6 (filtered out under the IPv4-only
2075 // egress): we have a route yet nowhere to forward. That is an inability to forward an
2076 // off-tailnet name, so SERVFAIL (soft), not a fabricated NXDOMAIN.
2077 let mut routes = std::collections::BTreeMap::new();
2078 routes.insert("corp.example".to_string(), vec![udp("[2001:db8::53]:53")]);
2079 let view = view_with_routes(routes, vec![], vec![]);
2080 let buf = build_query(0x108, &["host", "corp", "example"], 1, 1);
2081
2082 match decide(&view, &buf).expect("decides") {
2083 Decision::Reply(resp) => {
2084 let (_, rcode, _) = parse_header(&resp);
2085 assert_eq!(
2086 rcode, 2,
2087 "ServFail: route's resolvers all filtered out (IPv6-only), cannot forward"
2088 );
2089 }
2090 Decision::Forward { .. } => panic!("must not forward when all upstreams are filtered"),
2091 }
2092 }
2093
2094 #[test]
2095 fn overlay_match_wins_over_forwarding() {
2096 // A known peer name resolves authoritatively even when upstream resolvers are configured.
2097 let mut db = PeerDb::default();
2098 db.upsert(&test_node());
2099 let view = DnsView {
2100 cfg: DnsConfig {
2101 magic_dns: true,
2102 search_domains: vec!["user.ts.net".to_string()],
2103 resolvers: vec![udp("8.8.8.8:53")],
2104 ..Default::default()
2105 },
2106 peers: Some(Arc::new(db)),
2107 self_node: None,
2108 exit_doh: None,
2109 enable_ipv6: false,
2110 accept_dns: true,
2111 };
2112 let buf = build_query(0x107, &["host", "user", "ts", "net"], 1, 1);
2113
2114 match decide(&view, &buf).expect("decides") {
2115 Decision::Reply(resp) => {
2116 let (_, rcode, ancount) = parse_header(&resp);
2117 assert_eq!(rcode, 0, "authoritative answer wins");
2118 assert_eq!(ancount, 1);
2119 }
2120 Decision::Forward { .. } => panic!("overlay match must not forward"),
2121 }
2122 }
2123
2124 #[test]
2125 fn ipv6_reverse_ptr_is_nxdomain_not_forwarded() {
2126 // Anti-leak: an `ip6.arpa` reverse PTR for a tailnet ULA (fd7a:…) must fail closed to
2127 // NXDOMAIN, never be forwarded — even with an upstream resolver configured. This fork is
2128 // IPv4-only on the tailnet; forwarding would reveal that a v6 address was probed.
2129 let view = view_with_routes(
2130 std::collections::BTreeMap::new(),
2131 vec![udp("8.8.8.8:53")],
2132 vec![udp("1.1.1.1:53")],
2133 );
2134 // Reverse name for fd7a::1 (nibble-reversed) under ip6.arpa. The exact nibble labels don't
2135 // matter to the guard — any name ending in ip6.arpa must fail closed.
2136 let labels = vec![
2137 "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
2138 "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "a", "7", "d", "f", "ip6",
2139 "arpa",
2140 ];
2141 let buf = build_query(0x200, &labels, 12, 1);
2142
2143 match decide(&view, &buf).expect("decides") {
2144 Decision::Reply(resp) => {
2145 let (_, rcode, _) = parse_header(&resp);
2146 assert_eq!(
2147 rcode, 3,
2148 "NxDomain: ip6.arpa reverse must not leak upstream"
2149 );
2150 }
2151 Decision::Forward { .. } => panic!("ip6.arpa PTR must never be forwarded"),
2152 }
2153 }
2154
2155 #[test]
2156 fn cap_response_sets_tc_when_truncated() {
2157 // An oversize upstream answer is capped to a single datagram AND marked truncated (TC bit)
2158 // so the stub resolver retries over TCP rather than trusting a chopped message.
2159 let mut big = build_query(0x300, &["example", "com"], 1, 1);
2160 big[2] |= 0x80; // make it a response (QR=1)
2161 big.resize(MAX_UPSTREAM_RESPONSE + 500, 0xAB);
2162
2163 let out = cap_response(big);
2164 assert_eq!(out.len(), MAX_UPSTREAM_RESPONSE, "capped to one datagram");
2165 assert_ne!(out[2] & 0x02, 0, "TC bit set on truncation");
2166 }
2167
2168 #[test]
2169 fn cap_response_leaves_small_response_untouched() {
2170 // A response that fits is returned verbatim with no TC bit forced on.
2171 let mut small = build_query(0x301, &["example", "com"], 1, 1);
2172 small[2] |= 0x80;
2173 let before = small.clone();
2174
2175 let out = cap_response(small);
2176 assert_eq!(out, before, "small response unchanged");
2177 assert_eq!(out[2] & 0x02, 0, "TC bit not set when no truncation");
2178 }
2179
2180 #[test]
2181 fn response_matches_query_rejects_mismatched_question() {
2182 // id + QR match but the echoed question differs (different QNAME) => rejected. This guards
2183 // against an off-path injector that guesses the id but answers a different question.
2184 let query = build_query(0x1234, &["a", "com"], 1, 1);
2185
2186 let mut wrong_question = build_query(0x1234, &["b", "com"], 1, 1);
2187 wrong_question[2] |= 0x80; // QR=1, same id
2188 assert!(
2189 !response_matches_query(&query, &wrong_question),
2190 "different QNAME must be rejected"
2191 );
2192
2193 // A different QTYPE with the same name is also rejected.
2194 let mut wrong_qtype = build_query(0x1234, &["a", "com"], 28, 1);
2195 wrong_qtype[2] |= 0x80;
2196 assert!(
2197 !response_matches_query(&query, &wrong_qtype),
2198 "different QTYPE must be rejected"
2199 );
2200
2201 // The exact echoed question with QR=1 is accepted.
2202 let mut good = query.clone();
2203 good[2] |= 0x80;
2204 assert!(
2205 response_matches_query(&query, &good),
2206 "matching question accepted"
2207 );
2208 }
2209
2210 #[test]
2211 fn suffix_matches_handles_boundaries_and_empty() {
2212 // Exact and label-boundary matches.
2213 assert!(suffix_matches("corp", "corp"));
2214 assert!(suffix_matches("a.corp", "corp"));
2215 assert!(suffix_matches("a.b.corp", "corp"));
2216 // Not a label boundary.
2217 assert!(!suffix_matches("acorp", "corp"));
2218 // Empty suffix never matches (defense-in-depth against `ends_with("")`).
2219 assert!(!suffix_matches("anything.example", ""));
2220 assert!(!suffix_matches("", ""));
2221 }
2222
2223 #[test]
2224 fn empty_search_domain_does_not_capture_everything() {
2225 // Defense-in-depth: an empty search domain must NOT make every name look like a tailnet
2226 // name (which would fail-close legitimate recursive queries / mis-route). With an empty
2227 // suffix present alongside a real resolver, an off-tailnet name still forwards.
2228 let mut view = view_with_routes(
2229 std::collections::BTreeMap::new(),
2230 vec![udp("8.8.8.8:53")],
2231 vec![],
2232 );
2233 view.cfg.search_domains = vec![String::new()];
2234 let buf = build_query(0x400, &["example", "com"], 1, 1);
2235
2236 match decide(&view, &buf).expect("decides") {
2237 Decision::Forward { upstreams, .. } => {
2238 assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2239 }
2240 Decision::Reply(_) => {
2241 panic!("empty search domain must not treat every name as tailnet")
2242 }
2243 }
2244 }
2245
2246 #[test]
2247 fn empty_route_suffix_does_not_capture_everything() {
2248 // Defense-in-depth: an empty route suffix must not match every name (which would route all
2249 // queries to that route's upstreams). With an empty-suffix route present, an unrelated name
2250 // still falls through to the global resolver.
2251 let mut routes = std::collections::BTreeMap::new();
2252 routes.insert(String::new(), vec![udp("10.9.9.9:53")]);
2253 let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2254 let buf = build_query(0x401, &["example", "com"], 1, 1);
2255
2256 match decide(&view, &buf).expect("decides") {
2257 Decision::Forward { upstreams, .. } => {
2258 assert_eq!(
2259 upstreams,
2260 vec!["8.8.8.8:53".parse().unwrap()],
2261 "empty route suffix must not capture; falls through to global"
2262 );
2263 }
2264 Decision::Reply(_) => panic!("expected forward to global resolver"),
2265 }
2266 }
2267
2268 fn udp_exit(addr: &str) -> DnsResolver {
2269 DnsResolver {
2270 transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
2271 use_with_exit_node: true,
2272 }
2273 }
2274
2275 #[test]
2276 fn recursive_forward_is_flagged_route_forward_is_not() {
2277 // A recursive (global/fallback) forward sets `recursive = true` (eligible for DoH
2278 // delegation); a deliberately-configured split-DNS route sets `recursive = false`.
2279 let mut routes = std::collections::BTreeMap::new();
2280 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
2281 let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2282
2283 let routed = build_query(0x500, &["api", "corp", "example"], 1, 1);
2284 match decide(&view, &routed).expect("decides") {
2285 Decision::Forward { recursive, .. } => {
2286 assert!(!recursive, "split-DNS route is not a recursive forward")
2287 }
2288 Decision::Reply(_) => panic!("expected route forward"),
2289 }
2290
2291 let global = build_query(0x501, &["example", "com"], 1, 1);
2292 match decide(&view, &global).expect("decides") {
2293 Decision::Forward { recursive, .. } => {
2294 assert!(recursive, "unrouted name is a recursive forward")
2295 }
2296 Decision::Reply(_) => panic!("expected recursive forward"),
2297 }
2298 }
2299
2300 #[test]
2301 fn recursive_plan_keeps_udp_without_exit_node() {
2302 // No active exit node: a recursive forward stays on its default UDP upstreams.
2303 let view = view_with_routes(
2304 std::collections::BTreeMap::new(),
2305 vec![udp("8.8.8.8:53")],
2306 vec![],
2307 );
2308 let default = vec!["8.8.8.8:53".parse().unwrap()];
2309 assert_eq!(
2310 recursive_plan(&view, default.clone()),
2311 RecursivePlan::Udp(default)
2312 );
2313 }
2314
2315 #[test]
2316 fn recursive_plan_delegates_to_doh_with_exit_node() {
2317 // Exit node active, no kept-local resolvers: recursive queries delegate to the exit node's
2318 // DoH endpoint so resolution egresses from the exit node, not this host.
2319 let mut view = view_with_routes(
2320 std::collections::BTreeMap::new(),
2321 vec![udp("8.8.8.8:53")],
2322 vec![],
2323 );
2324 let doh: SocketAddr = "100.64.0.5:8080".parse().unwrap();
2325 view.exit_doh = Some(doh);
2326 assert_eq!(
2327 recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2328 RecursivePlan::Doh(doh)
2329 );
2330 }
2331
2332 #[test]
2333 fn recursive_plan_keeps_use_with_exit_node_resolvers_local() {
2334 // Even with an exit node active, resolvers flagged `use_with_exit_node` stay local (Go keeps
2335 // UseWithExitNode resolvers). The plan forwards to those over UDP, never delegating to DoH.
2336 let mut view = view_with_routes(
2337 std::collections::BTreeMap::new(),
2338 vec![udp_exit("10.0.0.53:53"), udp("8.8.8.8:53")],
2339 vec![],
2340 );
2341 view.exit_doh = Some("100.64.0.5:8080".parse().unwrap());
2342 // The default upstreams the caller computed are irrelevant when kept-local resolvers exist;
2343 // the plan must use the kept-local ones.
2344 assert_eq!(
2345 recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2346 RecursivePlan::Udp(vec!["10.0.0.53:53".parse().unwrap()])
2347 );
2348 }
2349}