ts_runtime/magic_dns.rs
1//! MagicDNS responder with a split-DNS / recursive forwarder.
2//!
3//! An in-netstack DNS server bound to `100.100.100.100:53`. It is authoritative for in-tailnet
4//! peer names and control-pushed [`ExtraRecord`][ts_control::ExtraRecord]s, answering `A`/`AAAA`/
5//! `PTR` for those directly. For names it is *not* authoritative for, it brings tsnet-style
6//! split-DNS and recursive resolution:
7//!
8//! - **Split DNS** ([`DnsConfig::routes`]): the longest matching suffix route forwards the query
9//! to one of that route's upstream resolvers. A route with an **empty** upstream list is a
10//! negative route — names under it are `NXDOMAIN` (Go keeps them on the built-in resolver; for
11//! us that means fail-closed unless an overlay/extra record matched first).
12//! - **Recursive** ([`DnsConfig::fallback_resolvers`] / [`DnsConfig::resolvers`]): names matching
13//! no route are forwarded to the fallback resolvers, else the global resolvers.
14//! - **Fail closed**: if no route and no resolver is configured, an unknown name is `NXDOMAIN`.
15//!
16//! Anti-leak / IPv6-off posture: upstream forwarding binds `0.0.0.0:0` (UDP, IPv4 only) and never
17//! opens an IPv6 socket. AAAA handling is gated on [`DnsView::enable_ipv6`] (default off): with the
18//! gate OFF an AAAA query for a tailnet/overlay/self name returns NoError with an empty answer
19//! (NODATA) rather than the overlay v6 address — answering a v6 the IPv4-only client can't route
20//! would only create dead connections and a fingerprint. With the gate ON, AAAA is answered from
21//! overlay data (the v6 overlay addr), as historically. AAAA for tailnet names is never forwarded
22//! to a recursive upstream regardless of the gate.
23//!
24//! - MagicDNS disabled (`dns_config == None` or `magic_dns == false`), OR the node does not accept
25//! the tailnet DNS config ([`DnsView::accept_dns`] is `false`, i.e. `--accept-dns` / `CorpDNS`
26//! off) => `REFUSED` for every query (the responder serves nothing, mirroring Go applying an empty
27//! `dns.Config` when `CorpDNS` is off).
28//! - A qtype/class we don't serve authoritatively (anything but IN-class A/AAAA/PTR — TXT, SRV, MX,
29//! HTTPS/SVCB, a CHAOS-class query, …) => NODATA (empty NOERROR) for a tailnet-authoritative name,
30//! forwarded verbatim to upstream for an off-tailnet name — exactly like Go's resolver, NOT
31//! `REFUSED` (a stub reads REFUSED as "won't serve me" and abandons the resolver). Tailnet reverse
32//! zones (CGNAT `in-addr.arpa` / any `ip6.arpa`) still fail closed to NXDOMAIN for every qtype
33//! (never forwarded — anti-leak).
34//! - Malformed query => dropped (no response).
35
36use std::{
37 net::{IpAddr, Ipv4Addr, SocketAddr},
38 sync::Arc,
39 time::Duration,
40};
41
42use kameo::{
43 actor::ActorRef,
44 message::{Context, Message},
45};
46use netstack::{CreateSocket, netcore::Channel};
47use tokio::{
48 sync::{Semaphore, watch},
49 task::JoinSet,
50 time::timeout,
51};
52use ts_control::{DnsConfig, DnsResolver, Node};
53use ts_dns_wire::{Name, QType, RData, Rcode, decode_query, encode_response};
54
55use crate::{
56 Error,
57 env::Env,
58 peer_tracker::{PeerDb, PeerState},
59};
60
61/// How long to wait for an upstream resolver to answer a forwarded query before giving up.
62const UPSTREAM_TIMEOUT: Duration = Duration::from_secs(5);
63/// Cap on concurrent in-flight forwarded queries on the local `100.100.100.100:53` responder.
64///
65/// Each forward is spawned onto a task that holds an overlay UDP socket until the upstream answers
66/// or [`UPSTREAM_TIMEOUT`] elapses. Without a cap, a local/tailnet client spraying distinct
67/// forwardable names opens unbounded concurrent overlay sockets + tasks (a resource-exhaustion DoS
68/// on a slow/black-holed upstream, since each lingers for the full timeout). Bound it the same way
69/// the peerAPI DoH server bounds its request handlers ([`crate::peerapi`]'s `MAX_INFLIGHT`): acquire
70/// a permit before spawning and drop the query fail-closed when saturated. A dropped DNS query is a
71/// benign outcome — the stub resolver simply retries or times out — and Go's resolver likewise
72/// bounds outstanding forwards rather than spawning without limit.
73const MAX_INFLIGHT_FORWARDS: usize = 512;
74/// Cap on a forwarded upstream response we read into memory (a single UDP datagram).
75///
76/// Matches Go's forwarder read buffer (`maxResponseBytes`, ~4 KiB). The client's query is forwarded
77/// verbatim, so a client advertising a large EDNS UDP size can elicit a legitimately large
78/// (1300–4096 byte) UDP answer (big TXT sets, DNSSEC, many-record round-robins). Capping at the old
79/// 1232 truncated those and set TC, forcing a TCP retry this fork's UDP-only forwarder can't serve —
80/// so the large answer became unreachable. 4096 relays them intact.
81const MAX_UPSTREAM_RESPONSE: usize = 4096;
82
83/// The MagicDNS service IP. The netstack interface owns this address, so a `udp_bind` here
84/// receives the tailnet's DNS traffic.
85const MAGIC_DNS_IP: Ipv4Addr = Ipv4Addr::new(100, 100, 100, 100);
86/// The DNS service port.
87const MAGIC_DNS_PORT: u16 = 53;
88
89/// The latest view the answer loop resolves queries against.
90///
91/// Updated by the actor's message handlers (from control `StateUpdate` and peer `PeerState`
92/// updates) and read fresh by the answer loop for every packet.
93#[derive(Clone, Default)]
94pub(crate) struct DnsView {
95 /// The DNS configuration. `magic_dns == false` (the default) means serve nothing.
96 pub(crate) cfg: DnsConfig,
97 /// The current peer database, if we've seen a peer update.
98 pub(crate) peers: Option<Arc<PeerDb>>,
99 /// This node, if we've seen a self-node update.
100 pub(crate) self_node: Option<Node>,
101 /// The peerAPI DoH socket address of the currently-selected exit node, if one is active and can
102 /// proxy DNS ([`Node::peerapi_doh_addr`]). When set, the MagicDNS *client* serve loop delegates
103 /// recursive resolution to this address over the overlay instead of forwarding to the locally
104 /// configured upstream resolvers — so recursive DNS egresses from the exit node, not this host.
105 ///
106 /// Only consumed by the local MagicDNS responder's serve loop (the client side). The peerAPI
107 /// DoH *server* shares this same view but ignores this field: an exit-node DNS proxy resolves
108 /// recursively itself (gated by `forward_exit_egress`), it never re-delegates to its own exit
109 /// node. `None` means no active exit node / no DoH delegation — recursion stays local.
110 pub(crate) exit_doh: Option<SocketAddr>,
111 /// Whether IPv6 is enabled on the tailnet overlay (from [`Env::enable_ipv6`], default `false`).
112 ///
113 /// Governs the AAAA answer path only: with the gate OFF (default) an AAAA query for a
114 /// tailnet/overlay/self name is answered NoError-with-empty-answer (NODATA) instead of the
115 /// overlay v6 address; with it ON, AAAA is answered from overlay data as historically. Set once
116 /// from the runtime `Env` when the actor starts; never changes for the life of the runtime.
117 pub(crate) enable_ipv6: bool,
118 /// Whether the tailnet's DNS configuration is accepted (`--accept-dns` / `CorpDNS`, from
119 /// [`Env::accept_dns`]). When `false`, [`decide`] refuses every query (the responder serves
120 /// nothing), mirroring Go applying an empty `dns.Config` when `CorpDNS` is off — so a node can
121 /// join for connectivity without taking over DNS.
122 ///
123 /// Unlike [`enable_ipv6`](DnsView::enable_ipv6) (snapshotted once at actor spawn), this is
124 /// runtime-settable via `Device::set_accept_dns`, so it is re-read from the live
125 /// [`Env::accept_dns`] cell on **every** view rebuild (the `StateUpdate` and `PeerState`
126 /// handlers), not just at spawn — otherwise a runtime toggle would never reach the served view.
127 pub(crate) accept_dns: bool,
128}
129
130impl DnsView {
131 /// Find the node (peer or self) that answers to `name`, case/dot-insensitively.
132 fn node_by_name(&self, name: &str) -> Option<Node> {
133 if let Some(node) = self
134 .peers
135 .as_ref()
136 .and_then(|p| p.get(&name).map(|(_, n)| n.clone()))
137 {
138 return Some(node);
139 }
140
141 self.self_node
142 .as_ref()
143 .filter(|n| n.matches_name(name))
144 .cloned()
145 }
146
147 /// Resolve `canon` to an answer address of the requested family. A tailnet peer/self match
148 /// wins first — tried as written and then qualified by each tailnet search domain (so a
149 /// short/partially-qualified name like `host` or `host.user` still resolves to
150 /// `host.user.ts.net`). Failing that, a control-pushed [`ExtraRecord`] of the matching family
151 /// answers, matched as a fully-qualified name only (no search-domain expansion — like Go tsnet,
152 /// ExtraRecords are authoritative FQDN entries, not subject to client search-list qualification).
153 /// Still fail-closed: only ever resolves to a known tailnet peer/self or an explicitly
154 /// control-pushed static record — never anything else.
155 fn resolve_addr(&self, canon: &str, want_v4: bool) -> Option<IpAddr> {
156 let addr_of = |node: Node| -> IpAddr {
157 if want_v4 {
158 IpAddr::from(node.tailnet_address.ipv4.addr())
159 } else {
160 IpAddr::from(node.tailnet_address.ipv6.addr())
161 }
162 };
163
164 if let Some(node) = self.node_by_name(canon) {
165 return Some(addr_of(node));
166 }
167 for suffix in &self.cfg.search_domains {
168 if let Some(node) = self.node_by_name(&format!("{canon}.{suffix}")) {
169 return Some(addr_of(node));
170 }
171 }
172
173 // Control-pushed static records match the fully-qualified query name only.
174 self.cfg.extra_records.iter().find_map(|rec| {
175 let family_ok = matches!(
176 (rec.addr, want_v4),
177 (IpAddr::V4(_), true) | (IpAddr::V6(_), false)
178 );
179 (rec.name == canon && family_ok).then_some(rec.addr)
180 })
181 }
182
183 /// Find the node (peer or self) that owns the tailnet IP `ip`.
184 fn node_by_ip(&self, ip: IpAddr) -> Option<Node> {
185 if let Some(node) = self
186 .peers
187 .as_ref()
188 .and_then(|p| p.get(&ip).map(|(_, n)| n.clone()))
189 {
190 return Some(node);
191 }
192
193 self.self_node
194 .as_ref()
195 .filter(|n| {
196 IpAddr::from(n.tailnet_address.ipv4.addr()) == ip
197 || IpAddr::from(n.tailnet_address.ipv6.addr()) == ip
198 })
199 .cloned()
200 }
201
202 /// Decide how to resolve a non-overlay `name` against the split-DNS routes and recursive
203 /// resolvers, returning the upstreams to forward to.
204 ///
205 /// Longest-suffix wins among [`DnsConfig::routes`]: a route's suffix matches `name` if `name`
206 /// equals it or ends with `.suffix`. A matched route with a non-empty upstream list forwards
207 /// there; a matched route with an **empty** list is a negative route ([`Upstreams::Block`] =>
208 /// NXDOMAIN). With no route match, [`DnsConfig::fallback_resolvers`] (preferred) or
209 /// [`DnsConfig::resolvers`] resolve recursively; if neither is configured we stay fail-closed
210 /// ([`Upstreams::None`] => NXDOMAIN).
211 fn route_for(&self, name: &str) -> Upstreams<'_> {
212 let mut best: Option<(&str, &Vec<DnsResolver>)> = None;
213 for (suffix, upstreams) in &self.cfg.routes {
214 if suffix_matches(name, suffix) && best.is_none_or(|(b, _)| suffix.len() > b.len()) {
215 best = Some((suffix.as_str(), upstreams));
216 }
217 }
218
219 if let Some((_, upstreams)) = best {
220 return if upstreams.is_empty() {
221 Upstreams::Block
222 } else {
223 // A deliberately-configured split-DNS route: not eligible for exit-node DoH
224 // delegation — these upstreams (e.g. an internal resolver reachable over a subnet
225 // route) must keep receiving the query directly.
226 Upstreams::Route(upstreams)
227 };
228 }
229
230 if !self.cfg.fallback_resolvers.is_empty() {
231 return Upstreams::Recursive(&self.cfg.fallback_resolvers);
232 }
233 if !self.cfg.resolvers.is_empty() {
234 return Upstreams::Recursive(&self.cfg.resolvers);
235 }
236 Upstreams::None
237 }
238}
239
240/// The upstreams a non-overlay query should be forwarded to (or why it should not be forwarded).
241enum Upstreams<'a> {
242 /// A split-DNS route matched: forward to these route-specific upstreams (never DoH-delegated).
243 Route(&'a [DnsResolver]),
244 /// No route matched: forward to these recursive (fallback/global) resolvers. Eligible for
245 /// exit-node DoH delegation in the client serve loop.
246 Recursive(&'a [DnsResolver]),
247 /// A negative split-DNS route matched: do not resolve (NXDOMAIN).
248 Block,
249 /// No route and no resolver configured: fail closed (NXDOMAIN).
250 None,
251}
252
253/// What the (sync) decision step concluded for a query: either a complete response to send back,
254/// or a request to forward the original query to an upstream resolver.
255pub(crate) enum Decision {
256 /// A fully-formed response is ready to send.
257 Reply(Vec<u8>),
258 /// Forward the original query datagram to one of these upstream UDP resolvers; on success
259 /// relay the upstream answer, on failure/timeout answer NXDOMAIN with the given id+question.
260 Forward {
261 /// UDP upstreams to try, in order.
262 upstreams: Vec<SocketAddr>,
263 /// The original query bytes to forward verbatim.
264 query: Vec<u8>,
265 /// Fallback NXDOMAIN response if every upstream fails.
266 nxdomain: Vec<u8>,
267 /// Whether this is a *recursive* (catch-all fallback/global resolver) forward, as opposed
268 /// to a deliberately-configured split-DNS route. Only recursive forwards are eligible for
269 /// exit-node DoH delegation in the client serve loop (see [`DnsView::exit_doh`]); split-DNS
270 /// routes always stay on their configured upstreams (typically subnet-reachable internal
271 /// resolvers). The peerAPI DoH *server* ignores this flag entirely.
272 recursive: bool,
273 },
274}
275
276/// Whether `name` is `suffix` or sits under it at a label boundary: `"a.corp"` matches `"corp"`,
277/// `"acorp"` does not. An **empty** suffix never matches (defense-in-depth: an empty suffix would
278/// otherwise make `ends_with("")` match every name and either over-route or treat everything as a
279/// tailnet name — both leak-prone).
280fn suffix_matches(name: &str, suffix: &str) -> bool {
281 if suffix.is_empty() {
282 return false;
283 }
284 name == suffix
285 || (name.len() > suffix.len()
286 && name.ends_with(suffix)
287 && name.as_bytes()[name.len() - suffix.len() - 1] == b'.')
288}
289
290/// Returns `true` if `name` falls under one of the tailnet search domains. Such names are
291/// authoritative MagicDNS names and are NEVER forwarded to an upstream resolver — anti-leak: a
292/// tailnet name (and the fact that it was queried) must not escape to a third-party resolver.
293fn is_tailnet_name(view: &DnsView, name: &str) -> bool {
294 view.cfg
295 .search_domains
296 .iter()
297 .any(|suffix| suffix_matches(name, suffix))
298}
299
300/// Whether `name` is an IPv6 reverse-DNS (`PTR`) name (ends in `ip6.arpa`). This fork is IPv4-only
301/// on the tailnet; an IPv6 reverse lookup must NEVER be forwarded to a third-party resolver
302/// (anti-leak: it would reveal that a tailnet v6 address — e.g. a ULA `fd7a:…` — was probed). All
303/// such queries fail closed to NXDOMAIN.
304fn is_ip6_arpa(name: &str) -> bool {
305 suffix_matches(name, "ip6.arpa")
306}
307
308/// Whether `ip` is in the Tailscale CGNAT range `100.64.0.0/10` (RFC 6598, the tailnet IPv4 space).
309/// Reverse (`PTR`) queries for these addresses are authoritative to MagicDNS: if no peer owns the
310/// IP we fail closed to NXDOMAIN rather than forwarding the probe to a third-party resolver.
311fn is_tailnet_cgnat(ip: Ipv4Addr) -> bool {
312 let o = ip.octets();
313 o[0] == 100 && (64..=127).contains(&o[1])
314}
315
316/// Decide what to do with a single DNS query against `view`: either a complete response is ready
317/// ([`Decision::Reply`]), the query should be forwarded to upstream resolvers
318/// ([`Decision::Forward`]), or the packet should be dropped without answering (`None`).
319///
320/// Pure (no I/O), factored out of the socket loop so it can be unit-tested without a netstack. It
321/// never panics and fails closed: an unknown, unroutable, or tailnet-suffix name resolves to
322/// NXDOMAIN rather than leaking to an upstream resolver.
323pub(crate) fn decide(view: &DnsView, buf: &[u8]) -> Option<Decision> {
324 // Malformed / non-query input is dropped: we never answer something we can't parse.
325 let query = decode_query(buf).ok()?;
326 let q = &query.question;
327 let id = query.id;
328
329 let reply = |rcode, answers: &[RData]| Decision::Reply(encode_response(id, q, rcode, answers));
330
331 // Fail closed: MagicDNS off, or the node doesn't accept the tailnet's DNS config
332 // (`--accept-dns` / `CorpDNS` is false) => serve nothing. The `accept_dns` gate mirrors Go
333 // applying an empty `dns.Config` when `CorpDNS` is off: the node ignores the control-pushed DNS
334 // config and refuses every query. This one read site covers the netstack responder, the peerAPI
335 // DoH server that shares the view, and (via `tun_actor::plan_intercept`) the TUN query path.
336 if !view.cfg.magic_dns || !view.accept_dns {
337 return Some(reply(Rcode::Refused, &[]));
338 }
339
340 let canon = q.name.to_canon();
341
342 // We only serve the internet (IN) class authoritatively. A non-IN class (CHAOS, HESIOD, the
343 // ANY/255 class, ...) is NOT refused outright: Go's local resolver does no class check and
344 // forwards such a query like any other name. Treat it as an unsupported authoritative type —
345 // NODATA for a tailnet name, forward for an off-tailnet name — so a `CH TXT version.bind`
346 // diagnostic or a `qclass=ANY` probe reaches upstream instead of getting REFUSED.
347 const CLASS_IN: u16 = 1;
348 if q.qclass != CLASS_IN {
349 return Some(forward_or_nodata(view, &canon, buf, id, q));
350 }
351
352 Some(match &q.qtype {
353 QType::A => match view.resolve_addr(&canon, true) {
354 Some(IpAddr::V4(v4)) => reply(Rcode::NoError, &[RData::A(v4.octets())]),
355 // No overlay/extra-record answer: try split-DNS / recursive upstreams.
356 _ => forward_or_nxdomain(view, &canon, buf, id, q),
357 },
358 QType::Aaaa => match view.resolve_addr(&canon, false) {
359 // A tailnet/overlay/self (or extra-record) AAAA match. Gate on IPv6: with IPv6 OFF
360 // (default) the client is IPv4-only, so answering with the overlay v6 address would
361 // only hand out an unroutable address — dead connections plus a fingerprint. Return
362 // NoError with an empty answer (NODATA) instead. With the gate ON, answer from overlay
363 // data as historically. We never forward this name to a recursive upstream either way:
364 // a positive overlay match is authoritative.
365 Some(IpAddr::V6(v6)) if view.enable_ipv6 => {
366 reply(Rcode::NoError, &[RData::Aaaa(v6.octets())])
367 }
368 Some(IpAddr::V6(_)) => reply(Rcode::NoError, &[]),
369 // No overlay/extra-record answer: split-DNS / recursive upstreams (off-tailnet names);
370 // tailnet names fail closed to NXDOMAIN inside `forward_or_nxdomain`.
371 _ => forward_or_nxdomain(view, &canon, buf, id, q),
372 },
373 QType::Ptr => match q.name.ptr_to_ipv4() {
374 Some(octets) => {
375 let v4: Ipv4Addr = octets.into();
376 let ip = IpAddr::V4(v4);
377 match view.node_by_ip(ip) {
378 Some(node) => {
379 let fqdn = node.fqdn(false);
380 let labels: Vec<String> = fqdn.split('.').map(str::to_owned).collect();
381 reply(Rcode::NoError, &[RData::Ptr(Name(labels))])
382 }
383 // Anti-leak: a reverse query for an IP in the tailnet CGNAT range
384 // (100.64.0.0/10) that misses the peer set is authoritative-but-unknown; fail
385 // closed to NXDOMAIN rather than leaking the probed tailnet IP upstream. Only
386 // genuinely off-tailnet reverse queries are forwarded.
387 None if is_tailnet_cgnat(v4) => reply(Rcode::NxDomain, &[]),
388 None => forward_or_nxdomain(view, &canon, buf, id, q),
389 }
390 }
391 // Anti-leak / IPv4-only-tailnet: an IPv6 reverse (`ip6.arpa`) PTR must never be
392 // forwarded — relaying it would reveal that a tailnet v6 address (e.g. a ULA `fd7a:…`)
393 // was probed. Fail closed to NXDOMAIN, exactly like the IPv4 CGNAT guard above.
394 None if is_ip6_arpa(&canon) => reply(Rcode::NxDomain, &[]),
395 None => forward_or_nxdomain(view, &canon, buf, id, q),
396 },
397 // Anything else (TXT, SRV, MX, HTTPS/SVCB, CNAME, ...): we hold no authoritative record of
398 // that type, so — like Go's resolver — forward it to upstream for an off-tailnet name and
399 // return NODATA (empty NOERROR) for a tailnet-authoritative name. NOT REFUSED: a stub reads
400 // REFUSED as "this server won't serve me" and abandons the resolver, which would break
401 // ordinary client lookups (notably HTTPS/SVCB type 65, issued routinely by browsers for
402 // HTTP/3 + ECH) for the same off-tailnet names whose A/AAAA already forward.
403 QType::Other(_) => forward_or_nodata(view, &canon, buf, id, q),
404 })
405}
406
407/// For a name with no overlay answer, consult the split-DNS routes + recursive resolvers and
408/// either forward (to UDP upstreams) or fail closed with NXDOMAIN.
409///
410/// Anti-leak: a name under a tailnet search domain is authoritative and is never forwarded — it
411/// fails closed to NXDOMAIN so neither the name nor the query leaks to a third-party resolver.
412fn forward_or_nxdomain(
413 view: &DnsView,
414 canon: &str,
415 buf: &[u8],
416 id: u16,
417 q: &ts_dns_wire::Question,
418) -> Decision {
419 let nxdomain = encode_response(id, q, Rcode::NxDomain, &[]);
420
421 if is_tailnet_name(view, canon) {
422 return Decision::Reply(nxdomain);
423 }
424
425 let (resolvers, recursive) = match view.route_for(canon) {
426 Upstreams::Route(resolvers) => (resolvers, false),
427 Upstreams::Recursive(resolvers) => (resolvers, true),
428 // Negative route or nothing configured: fail closed.
429 Upstreams::Block | Upstreams::None => return Decision::Reply(nxdomain),
430 };
431
432 let upstreams: Vec<SocketAddr> = resolvers
433 .iter()
434 .map(DnsResolver::udp_addr)
435 // Anti-leak / IPv6-off: only forward over IPv4 upstreams; never open a v6 socket.
436 .filter(SocketAddr::is_ipv4)
437 .collect();
438 if upstreams.is_empty() {
439 Decision::Reply(nxdomain)
440 } else {
441 Decision::Forward {
442 upstreams,
443 query: buf.to_vec(),
444 nxdomain,
445 recursive,
446 }
447 }
448}
449
450/// The DNS query types Go's resolver explicitly leaves unimplemented for a tailnet-authoritative
451/// name, answering `RCodeNotImplemented` (NOTIMP) rather than NODATA (`net/dns/resolver/tsdns.go`
452/// `resolveLocal`: `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO`). The numeric type
453/// codes: NS=2, SOA=6, HINFO=13, AXFR=252.
454fn is_unimplemented_tailnet_qtype(qtype: &ts_dns_wire::QType) -> bool {
455 matches!(qtype, ts_dns_wire::QType::Other(2 | 6 | 13 | 252))
456}
457
458/// For a query whose *qtype/qclass* we don't serve authoritatively (anything other than an IN-class
459/// A/AAAA/PTR — e.g. TXT, SRV, MX, HTTPS/SVCB, or a CHAOS-class query): forward it to upstream like
460/// any other name, but for a tailnet-authoritative name return an empty NOERROR (NODATA) instead of
461/// NXDOMAIN — except the NS/SOA/HINFO/AXFR types Go answers NOTIMP for
462/// ([`is_unimplemented_tailnet_qtype`]).
463///
464/// This mirrors Go's resolver: an authoritative name with no record of the requested type returns
465/// `RCodeSuccess` with no answers ("the name exists, but no records of that type"), NOT NXDOMAIN and
466/// NOT REFUSED; a non-authoritative name is forwarded verbatim regardless of qtype. The fork
467/// previously REFUSED every non-A/AAAA/PTR qtype (and every non-IN class) for *all* names, which a
468/// stub resolver reads as "this server won't serve me" — so it would abandon the resolver, breaking
469/// ordinary client lookups (HTTPS/SVCB type 65 issued routinely by browsers for HTTP/3 + ECH, plus
470/// MX/TXT/SRV) for off-tailnet names that A/AAAA queries already forward. Refusing these was never an
471/// anti-leak measure (the same name's A/AAAA already egresses); it was just broken interop.
472///
473/// Anti-leak is preserved: a tailnet-suffix name still never leaves this node (NODATA, not forward),
474/// exactly as the A/AAAA path keeps a positive overlay match authoritative.
475fn forward_or_nodata(
476 view: &DnsView,
477 canon: &str,
478 buf: &[u8],
479 id: u16,
480 q: &ts_dns_wire::Question,
481) -> Decision {
482 // Authoritative tailnet name. For most unsupported types we answer NODATA (empty NOERROR) — the
483 // name exists, we just hold no record of that type. But a small set of types Go's resolver
484 // *explicitly* leaves unimplemented (`net/dns/resolver/tsdns.go` `resolveLocal`:
485 // `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR, dns.TypeHINFO: return RCodeNotImplemented`) must
486 // answer NOTIMP, not NODATA — a `dig NS`/`SOA`/`HINFO` against the tailnet zone is otherwise a
487 // clean fingerprint distinguishing this fork from real tailscaled. Off-tailnet names are
488 // unaffected (they forward below regardless of type); this NOTIMP applies only to a name we are
489 // authoritative for.
490 if is_tailnet_name(view, canon) {
491 let rcode = if is_unimplemented_tailnet_qtype(&q.qtype) {
492 Rcode::NotImpl
493 } else {
494 Rcode::NoError
495 };
496 return Decision::Reply(encode_response(id, q, rcode, &[]));
497 }
498 // Anti-leak parity with the `QType::Ptr` arm: a reverse query for a tailnet CGNAT IPv4
499 // (100.64.0.0/10) or ANY `ip6.arpa` name must NEVER egress to an upstream resolver, regardless
500 // of qtype/class — forwarding it would reveal that a specific tailnet IP was probed. The PTR arm
501 // enforces this (NXDOMAIN) but its guards live only inside that arm; without re-checking here, an
502 // exotic-qtype (TXT/ANY/…) or non-IN-class query for a tailnet reverse name would slip through to
503 // the forward path below. Fail closed to NXDOMAIN, matching the PTR arm's disposition.
504 if is_ip6_arpa(canon) {
505 return Decision::Reply(encode_response(id, q, Rcode::NxDomain, &[]));
506 }
507 if let Some(octets) = q.name.ptr_to_ipv4()
508 && is_tailnet_cgnat(octets.into())
509 {
510 return Decision::Reply(encode_response(id, q, Rcode::NxDomain, &[]));
511 }
512 // Off-tailnet, non-reverse-zone: forward verbatim. `forward_or_nxdomain` already forwards
513 // non-tailnet names and fails closed (NXDOMAIN) when no upstream is configured/routable; reuse it
514 // (the tailnet branch above is already handled, so its tailnet→NXDOMAIN path is unreachable here).
515 forward_or_nxdomain(view, canon, buf, id, q)
516}
517
518/// Client-side plan for a *recursive* forward: keep resolving over local UDP upstreams, or delegate
519/// the query to the active exit node's peerAPI DoH endpoint over the overlay.
520#[derive(Debug, PartialEq, Eq)]
521pub(crate) enum RecursivePlan {
522 /// Forward over UDP to these upstreams. Used when no exit node is active, or when the config
523 /// has `use_with_exit_node` resolvers (kept local even with an exit node selected).
524 Udp(Vec<SocketAddr>),
525 /// Delegate the query to the exit node's peerAPI DoH server at this overlay address.
526 Doh(SocketAddr),
527}
528
529/// Decide whether a recursive forward should stay on local UDP upstreams or be delegated to the
530/// active exit node's DoH endpoint. Pure (no I/O) so the delegation rule is unit-testable.
531///
532/// - No active exit node ([`DnsView::exit_doh`] is `None`) => keep `default_upstreams` (UDP).
533/// - Exit node active, but the config has [`use_with_exit_node`][ts_control::DnsResolver::use_with_exit_node]
534/// resolvers => those resolvers stay local (Go keeps `UseWithExitNode` resolvers when an exit node
535/// is selected); forward to them over UDP, do NOT delegate.
536/// - Exit node active, no kept-local resolvers => delegate to the exit node's DoH. Recursive DNS
537/// then egresses from the exit node, not this host (the whole point of routing through an exit
538/// node: this node's real IP is never used to resolve the peer's public names).
539pub(crate) fn recursive_plan(view: &DnsView, default_upstreams: Vec<SocketAddr>) -> RecursivePlan {
540 let Some(doh) = view.exit_doh else {
541 return RecursivePlan::Udp(default_upstreams);
542 };
543 let kept: Vec<SocketAddr> = view
544 .cfg
545 .resolvers_with_exit_node()
546 .map(DnsResolver::udp_addr)
547 // Anti-leak / IPv6-off: only ever resolve over IPv4 upstreams; never open a v6 socket.
548 .filter(SocketAddr::is_ipv4)
549 .collect();
550 if kept.is_empty() {
551 RecursivePlan::Doh(doh)
552 } else {
553 RecursivePlan::Udp(kept)
554 }
555}
556
557/// Cap a forwarded upstream response to a single UDP datagram ([`MAX_UPSTREAM_RESPONSE`]). When the
558/// response is too large it is truncated mid-message, so we set the `TC` (truncation) flag in the
559/// DNS header (byte 2, bit `0x02`) telling the stub resolver to retry over TCP — relaying a chopped
560/// answer without `TC` would surface a malformed-but-"complete" message. The flag is only set when
561/// truncation actually occurs.
562fn cap_response(mut resp: Vec<u8>) -> Vec<u8> {
563 if resp.len() > MAX_UPSTREAM_RESPONSE {
564 resp.truncate(MAX_UPSTREAM_RESPONSE);
565 // The header is 12 bytes; the TC bit lives in the second flags byte (header byte 2). A
566 // capped datagram is always >= the header length, but guard anyway to never panic.
567 if let Some(flags_hi) = resp.get_mut(2) {
568 *flags_hi |= 0x02;
569 }
570 }
571 resp
572}
573
574/// The byte length of a fixed DNS header.
575const DNS_HEADER_LEN: usize = 12;
576
577/// Return the byte range of the first question section (QNAME + QTYPE + QCLASS) within `msg`,
578/// starting just after the 12-byte header. Returns [`None`] if the name is malformed, uses a
579/// compression pointer (illegal in a question), or runs past the buffer. Used to byte-compare a
580/// forwarded query's question against the upstream response's question.
581fn question_range(msg: &[u8]) -> Option<std::ops::Range<usize>> {
582 let mut off = DNS_HEADER_LEN;
583 // Walk the QNAME label sequence to the terminating root label (0x00).
584 loop {
585 let len = *msg.get(off)? as usize;
586 // A compression pointer (top two bits set) is not valid in a question section.
587 if len & 0xC0 != 0 {
588 return None;
589 }
590 off += 1;
591 if len == 0 {
592 break; // root label: QNAME complete.
593 }
594 off = off.checked_add(len)?;
595 if off > msg.len() {
596 return None;
597 }
598 }
599 // QTYPE (2) + QCLASS (2) follow the name.
600 let end = off.checked_add(4)?;
601 if end > msg.len() {
602 return None;
603 }
604 Some(DNS_HEADER_LEN..end)
605}
606
607/// Whether `resp` is a plausible DNS response to `query`: same 16-bit transaction id, the QR
608/// (response) bit set, and a byte-identical question section (QNAME + QTYPE + QCLASS). Both buffers
609/// carry the DNS header in the first 12 bytes (id at [0..2], flags at [2..4], QR is the high bit of
610/// byte 2). Used to reject off-path/forged datagrams before relaying them back to the stub resolver
611/// as authoritative: matching only the id + QR lets an injector that guesses the id swap in an
612/// answer for a different question, so we also require the echoed question to match.
613fn response_matches_query(query: &[u8], resp: &[u8]) -> bool {
614 if query.len() < DNS_HEADER_LEN || resp.len() < DNS_HEADER_LEN {
615 return false;
616 }
617 let id_matches = query[0..2] == resp[0..2];
618 let is_response = resp[2] & 0x80 != 0;
619 if !id_matches || !is_response {
620 return false;
621 }
622 // The response must echo the exact question we asked. Parse both question sections and compare
623 // their bytes; a parse failure on either side is treated as a non-match (fail closed).
624 match (question_range(query), question_range(resp)) {
625 (Some(q), Some(r)) => query[q] == resp[r],
626 _ => false,
627 }
628}
629
630/// Forward `query` to each upstream in order over the **overlay** netstack, returning the first
631/// well-formed response, or `nxdomain` if every upstream times out or errors.
632///
633/// Anti-leak: forwarding goes through the overlay netstack `channel` (a fresh `0.0.0.0:0` overlay
634/// UDP socket per query), NEVER a host socket — so the real origin IP can't leak to the resolver,
635/// and split-DNS upstreams reachable only over the tailnet/subnet-router work. Each upstream is
636/// bounded by [`UPSTREAM_TIMEOUT`]; responses are capped at [`MAX_UPSTREAM_RESPONSE`].
637pub(crate) async fn forward_query(
638 channel: &Channel,
639 upstreams: &[SocketAddr],
640 query: &[u8],
641 nxdomain: Vec<u8>,
642) -> Vec<u8> {
643 for upstream in upstreams {
644 let socket = match channel
645 .udp_bind(SocketAddr::from((Ipv4Addr::UNSPECIFIED, 0)))
646 .await
647 {
648 Ok(s) => s,
649 Err(e) => {
650 tracing::warn!(error = %e, %upstream, "magic dns upstream bind failed");
651 continue;
652 }
653 };
654
655 if let Err(e) = socket.send_to(*upstream, query).await {
656 tracing::warn!(error = %e, %upstream, "magic dns upstream send failed");
657 continue;
658 }
659
660 match timeout(UPSTREAM_TIMEOUT, socket.recv_from_bytes()).await {
661 Ok(Ok((from, resp))) if !resp.is_empty() => {
662 // Anti-poisoning: only accept a datagram that came from the upstream we queried
663 // and whose DNS header matches this query (same transaction id, QR=response bit
664 // set). An off-path injector racing the real answer is otherwise relayed straight
665 // back to the stub resolver as authoritative.
666 if from.ip() != upstream.ip() || !response_matches_query(query, &resp) {
667 tracing::debug!(%upstream, %from, "magic dns dropping unsolicited/mismatched response");
668 continue;
669 }
670 return cap_response(resp.to_vec());
671 }
672 Ok(Ok(_)) => continue,
673 Ok(Err(e)) => {
674 tracing::warn!(error = %e, %upstream, "magic dns upstream recv failed");
675 continue;
676 }
677 Err(_) => {
678 tracing::debug!(%upstream, "magic dns upstream timed out");
679 continue;
680 }
681 }
682 }
683 nxdomain
684}
685
686/// Run the receive/answer loop for the bound socket until it (or the netstack) goes away.
687///
688/// Authoritative answers are sent inline. Forwarded queries are handled on spawned tasks (each
689/// cloning the overlay `channel`) so a slow upstream never blocks other queries.
690async fn serve(
691 socket: netstack::netsock::UdpSocket,
692 rx: watch::Receiver<Arc<DnsView>>,
693 channel: Channel,
694) {
695 let socket = Arc::new(socket);
696 let mut forwards = JoinSet::new();
697 // Bounds concurrent in-flight forwards (see `MAX_INFLIGHT_FORWARDS`); a permit is held for the
698 // lifetime of each spawned forward task and released on completion.
699 let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
700 loop {
701 let (src, buf) = match socket.recv_from_bytes().await {
702 Ok(pkt) => pkt,
703 Err(e) => {
704 tracing::warn!(error = %e, "magic dns socket recv failed, stopping responder");
705 return;
706 }
707 };
708
709 // Read the freshest view per packet.
710 let view = rx.borrow().clone();
711
712 match decide(&view, &buf) {
713 // Malformed query: drop silently.
714 None => continue,
715 Some(Decision::Reply(resp)) => {
716 if let Err(e) = socket.send_to(src, &resp).await {
717 tracing::warn!(error = %e, %src, "magic dns response send failed");
718 }
719 }
720 Some(Decision::Forward {
721 upstreams,
722 query,
723 nxdomain,
724 recursive,
725 }) => {
726 // A recursive forward is eligible for exit-node DoH delegation; a split-DNS route
727 // always stays on its configured upstreams. Decide the plan against the current
728 // view so a query routed while an exit node is active egresses from that exit node.
729 let plan = if recursive {
730 recursive_plan(&view, upstreams)
731 } else {
732 RecursivePlan::Udp(upstreams)
733 };
734 // Fail closed at the in-flight cap: drop the query (the stub resolver retries or
735 // times out) rather than spawn an unbounded task that pins an overlay socket for up
736 // to UPSTREAM_TIMEOUT. The permit is moved into the task as a named `_permit` binding
737 // (NOT `let _ =`, which would drop it immediately) so it is released only when the
738 // task body completes.
739 let Ok(permit) = inflight.clone().try_acquire_owned() else {
740 tracing::warn!(
741 %src,
742 max = MAX_INFLIGHT_FORWARDS,
743 "magic dns drop: at max in-flight forwarded queries"
744 );
745 continue;
746 };
747 let socket = socket.clone();
748 let channel = channel.clone();
749 forwards.spawn(async move {
750 let _permit = permit;
751 let resp = match plan {
752 RecursivePlan::Udp(upstreams) => {
753 forward_query(&channel, &upstreams, &query, nxdomain).await
754 }
755 RecursivePlan::Doh(doh_addr) => {
756 crate::peerapi_doh::forward_doh(&channel, doh_addr, &query, nxdomain)
757 .await
758 }
759 };
760 if let Err(e) = socket.send_to(src, &resp).await {
761 tracing::warn!(error = %e, %src, "magic dns forwarded response send failed");
762 }
763 });
764 }
765 }
766
767 // Reap finished forward tasks without blocking. The unreaped completed-handle backlog is
768 // bounded by MAX_INFLIGHT_FORWARDS (a task spawns only after acquiring a permit, and there
769 // are at most that many), so this bounds JoinSet memory too — not just the reap cadence.
770 while forwards.try_join_next().is_some() {}
771 }
772}
773
774/// The MagicDNS responder actor.
775///
776/// Subscribes to control state (for the DNS config + self node) and peer state (for the peer
777/// database), keeping a [`DnsView`] that the spawned answer loop reads for every query.
778pub struct MagicDnsActor {
779 /// Keeps the socket-serving task alive for the lifetime of the actor.
780 _joinset: JoinSet<()>,
781 /// The latest view, shared with the answer loop.
782 view_tx: watch::Sender<Arc<DnsView>>,
783 /// The runtime [`Env`], retained so each view rebuild (the `StateUpdate` / `PeerState` handlers)
784 /// can re-read the live [`Env::accept_dns`] cell. Unlike `enable_ipv6` (snapshotted once at
785 /// spawn), `accept_dns` is runtime-settable via `Device::set_accept_dns`, so it must be read at
786 /// rebuild time — not captured once — for a toggle to reach the served view.
787 env: Env,
788 /// The overlay channel, retained so the [`Query`] handler can run a query through the same
789 /// forward path the serve loop uses ([`forward_query`] / [`forward_doh`], both binding
790 /// `0.0.0.0:0` on this channel — never a host socket).
791 channel: Channel,
792}
793
794/// A programmatic DNS query routed through the live MagicDNS responder (the `100.100.100.100` path),
795/// for [`Device::query_dns`](crate::Device::query_dns). The handler synthesizes a query packet and
796/// drives it through the exact same [`decide`]/forward logic as an on-the-wire query, so the result
797/// (and its anti-leak posture) matches what a tailnet client would observe.
798pub struct Query {
799 /// The canonical name to resolve (e.g. `example.com`, no trailing dot).
800 pub name: String,
801 /// The DNS query type (`1`=A, `28`=AAAA, `12`=PTR, or any other RFC 1035 TYPE).
802 pub qtype: u16,
803}
804
805/// The outcome of a `Query`: the raw DNS response bytes, the RCODE, and which upstream resolvers
806/// (if any) were consulted. The response is returned as raw bytes (matching Go `LocalClient.QueryDNS`)
807/// rather than parsed records — this fork's wire codec has no answer-record decoder.
808///
809/// (`Query` is the crate-internal actor message; not linked here as it is a private item — a
810/// `pub` doc cannot intra-doc-link to it without erroring under the doc-lint gate.)
811#[derive(Debug, Clone, kameo::Reply)]
812pub struct DnsQueryResult {
813 /// The raw DNS response datagram (header + question + any answer records).
814 pub response: Vec<u8>,
815 /// The RCODE from the response header's low 4 bits (`0`=NoError, `2`=SERVFAIL, `3`=NXDOMAIN,
816 /// `5`=Refused, …).
817 pub rcode: u8,
818 /// The upstream resolver(s) the query was forwarded to. For a UDP forward this is the candidate
819 /// list tried in order (the forwarder returns on the first that answers); for an exit-node DoH
820 /// forward it is the single DoH endpoint. Empty for a locally-answered query (an authoritative
821 /// tailnet name, a NODATA, or a fail-closed NXDOMAIN — nothing egressed).
822 pub resolvers_consulted: Vec<SocketAddr>,
823}
824
825impl kameo::Actor for MagicDnsActor {
826 type Args = (Env, Channel);
827 type Error = Error;
828
829 async fn on_start(
830 (env, channel): Self::Args,
831 slf: ActorRef<Self>,
832 ) -> Result<Self, Self::Error> {
833 env.subscribe::<Arc<ts_control::StateUpdate>>(&slf).await?;
834 env.subscribe::<Arc<PeerState>>(&slf).await?;
835 env.subscribe::<crate::route_updater::ActiveExitNode>(&slf)
836 .await?;
837
838 // Seed the view with the runtime's IPv6 gate (default off) and the current accept-dns value.
839 // Subsequent control/peer updates clone-and-modify this view: `enable_ipv6` (set once here)
840 // is preserved, while `accept_dns` is re-read live from `Env` on every rebuild (it is
841 // runtime-settable). The seed value is moot — no query is served before the first
842 // StateUpdate — but seeding it keeps the pre-update view internally consistent.
843 let (view_tx, view_rx) = watch::channel(Arc::new(DnsView {
844 enable_ipv6: env.enable_ipv6,
845 accept_dns: env.accept_dns(),
846 ..DnsView::default()
847 }));
848
849 let mut joinset = JoinSet::new();
850
851 // Bind the MagicDNS socket. If the bind fails we still start (fail closed: the actor just
852 // never answers anything) so a transient bind error doesn't take down the runtime.
853 let addr = SocketAddr::from((MAGIC_DNS_IP, MAGIC_DNS_PORT));
854 match channel.udp_bind(addr).await {
855 Ok(socket) => {
856 tracing::debug!(%addr, "magic dns responder bound");
857 joinset.spawn(serve(socket, view_rx.clone(), channel.clone()));
858 }
859 Err(e) => {
860 tracing::error!(error = %e, %addr, "magic dns udp bind failed; responder inert");
861 }
862 }
863
864 // When this node advertises a peerAPI port, run the single peerAPI server on the same shared
865 // view. It routes `/dns-query` to the exit-node DoH handler (recursive resolution gated by
866 // `forward_exit_egress`, see `peerapi_doh`) and `/v0/put/<name>` to the Taildrop receive
867 // handler when a store is configured (access-gated, fail-closed, see `peerapi`).
868 if let Some(port) = env.peerapi_port {
869 let channel = channel.clone();
870 let view_rx = view_rx.clone();
871 let forward_exit_egress = env.forward_exit_egress;
872 let taildrop = env.taildrop_store.clone();
873 let funnel_ingress = env.funnel_ingress.clone();
874 joinset.spawn(crate::peerapi::serve(
875 channel,
876 port,
877 view_rx,
878 forward_exit_egress,
879 taildrop,
880 funnel_ingress,
881 ));
882 }
883
884 Ok(Self {
885 _joinset: joinset,
886 view_tx,
887 env,
888 channel,
889 })
890 }
891}
892
893/// A bare SERVFAIL response header for a [`Query`] whose name could not be encoded into a
894/// well-formed query (a non-ASCII label or an over-255-byte name). A 12-byte header with QR=1 (this
895/// is a response) and RCODE=2 (server failure); no question or answer section (we never produced a
896/// parseable question). Lets `query_dns` return a definite, honest RCODE instead of an empty buffer
897/// that would read back as a fabricated NoError.
898fn servfail_response() -> Vec<u8> {
899 let mut resp = vec![0u8; 12];
900 // Flags: QR=1 (byte 2, 0x80) + RCODE=2 (low nibble of byte 3). All other bits clear.
901 resp[2] = 0x80;
902 resp[3] = 0x02;
903 resp
904}
905
906impl Message<Query> for MagicDnsActor {
907 type Reply = DnsQueryResult;
908
909 async fn handle(&mut self, query: Query, _ctx: &mut Context<Self, Self::Reply>) -> Self::Reply {
910 // Synthesize a query packet and drive it through the SAME decide/forward path the serve loop
911 // uses, against the freshest view — so the result and its anti-leak posture exactly match an
912 // on-the-wire query. The id is fixed (0): a programmatic query has no concurrent-demux need,
913 // and `response_matches_query` validates the echoed id against this same buffer.
914 //
915 // Normalize the name into labels: strip a single trailing dot (an FQDN's root marker — Go's
916 // `dnsname.ToFQDN` does the same) and drop empty labels. An empty label would otherwise encode
917 // as a lone `0x00`, identical to the QNAME root terminator, truncating the wire query and
918 // corrupting the QTYPE/QCLASS that follow.
919 let trimmed = query.name.strip_suffix('.').unwrap_or(&query.name);
920 let labels: Vec<String> = trimmed
921 .split('.')
922 .filter(|label| !label.is_empty())
923 .map(str::to_owned)
924 .collect();
925 let qtype = match query.qtype {
926 1 => ts_dns_wire::QType::A,
927 28 => ts_dns_wire::QType::Aaaa,
928 12 => ts_dns_wire::QType::Ptr,
929 other => ts_dns_wire::QType::Other(other),
930 };
931 // Class IN (1) — the only class the responder serves authoritatively (a non-IN class still
932 // forwards via `forward_or_nodata`, matching the on-the-wire path).
933 let buf = ts_dns_wire::encode_query(0, &ts_dns_wire::Name(labels), &qtype, 1);
934
935 let view = self.view_tx.borrow().clone();
936
937 let (response, resolvers_consulted) = match decide(&view, &buf) {
938 // `decide` returns `None` only when `decode_query` rejects the buffer we just built. With
939 // the name normalized above that can still happen for a name `encode_query` accepts but
940 // `decode_query` rejects — a non-ASCII/IDN label (the caller must pass punycode) or a name
941 // whose wire form exceeds 255 bytes. Surface a SERVFAIL (RCODE 2: "could not process")
942 // rather than an empty buffer that would read back as a fabricated NoError. The serve loop
943 // silently drops here (the on-wire client times out); a programmatic caller gets a
944 // definite, honest error instead.
945 None => (servfail_response(), Vec::new()),
946 Some(Decision::Reply(resp)) => (resp, Vec::new()),
947 Some(Decision::Forward {
948 upstreams,
949 query,
950 nxdomain,
951 recursive,
952 }) => {
953 let plan = if recursive {
954 recursive_plan(&view, upstreams)
955 } else {
956 RecursivePlan::Udp(upstreams)
957 };
958 match plan {
959 RecursivePlan::Udp(upstreams) => {
960 let resp = forward_query(&self.channel, &upstreams, &query, nxdomain).await;
961 (resp, upstreams)
962 }
963 RecursivePlan::Doh(doh_addr) => {
964 let resp = crate::peerapi_doh::forward_doh(
965 &self.channel,
966 doh_addr,
967 &query,
968 nxdomain,
969 )
970 .await;
971 // The query egressed via the exit node's DoH endpoint, not a local UDP
972 // upstream — report the DoH address as the resolver consulted.
973 (resp, vec![doh_addr])
974 }
975 }
976 }
977 };
978
979 // RCODE is the low 4 bits of the second flags byte (header byte 3).
980 let rcode = response.get(3).map(|b| b & 0x0F).unwrap_or(0);
981
982 DnsQueryResult {
983 response,
984 rcode,
985 resolvers_consulted,
986 }
987 }
988}
989
990impl Message<Arc<ts_control::StateUpdate>> for MagicDnsActor {
991 type Reply = ();
992
993 async fn handle(
994 &mut self,
995 update: Arc<ts_control::StateUpdate>,
996 _ctx: &mut Context<Self, Self::Reply>,
997 ) {
998 // Re-read the live accept-dns cell on every rebuild (it is runtime-settable via
999 // `Device::set_accept_dns`); `enable_ipv6` is preserved from the seed (set once at spawn).
1000 let accept_dns = self.env.accept_dns();
1001 self.view_tx.send_modify(|view| {
1002 let mut next = (**view).clone();
1003 next.cfg = update.dns_config.clone().unwrap_or_default();
1004 next.self_node = update.node.clone();
1005 next.accept_dns = accept_dns;
1006 *view = Arc::new(next);
1007 });
1008 }
1009}
1010
1011impl Message<Arc<PeerState>> for MagicDnsActor {
1012 type Reply = ();
1013
1014 async fn handle(&mut self, state: Arc<PeerState>, _ctx: &mut Context<Self, Self::Reply>) {
1015 // Re-read the live accept-dns cell on every rebuild: `Device::set_accept_dns` triggers a
1016 // `RepublishState` that lands here, so this is the path that re-applies the gate after a
1017 // runtime toggle (covers the netstack responder AND the peerAPI DoH server sharing the view).
1018 let accept_dns = self.env.accept_dns();
1019 self.view_tx.send_modify(|view| {
1020 let mut next = (**view).clone();
1021 next.peers = Some(state.peers.clone());
1022 next.accept_dns = accept_dns;
1023 *view = Arc::new(next);
1024 });
1025 }
1026}
1027
1028impl Message<crate::route_updater::ActiveExitNode> for MagicDnsActor {
1029 type Reply = ();
1030
1031 async fn handle(
1032 &mut self,
1033 active: crate::route_updater::ActiveExitNode,
1034 _ctx: &mut Context<Self, Self::Reply>,
1035 ) {
1036 // Cache the active exit node's DoH endpoint so the serve loop delegates recursive queries
1037 // to it. `None` (no exit node, or one that can't proxy DNS) keeps recursion local. Resolving
1038 // the address here — once, from the route updater's authoritative selection — means the
1039 // serve loop never re-resolves the selector.
1040 let exit_doh = active.node.as_ref().and_then(|n| n.peerapi_doh_addr());
1041 self.view_tx.send_modify(|view| {
1042 let mut next = (**view).clone();
1043 next.exit_doh = exit_doh;
1044 *view = Arc::new(next);
1045 });
1046 }
1047}
1048
1049#[cfg(test)]
1050mod tests {
1051 use ts_control::{StableNodeId, TailnetAddress};
1052
1053 use super::*;
1054
1055 /// Test wrapper: run [`decide`] and extract the reply bytes. These tests configure no
1056 /// upstream resolvers, so an unresolved name fails closed to a `Reply` (NXDOMAIN), never a
1057 /// `Forward`; a `Forward` here is a bug and panics.
1058 fn answer(view: &DnsView, buf: &[u8]) -> Option<Vec<u8>> {
1059 match decide(view, buf)? {
1060 Decision::Reply(resp) => Some(resp),
1061 Decision::Forward { .. } => panic!("unexpected forward in authoritative-only test"),
1062 }
1063 }
1064
1065 /// Build a `Node` named `host.user.ts.net` with a known v4/v6 tailnet address.
1066 fn test_node() -> Node {
1067 Node {
1068 id: 1,
1069 stable_id: StableNodeId("n1".to_string()),
1070 hostname: "host".to_string(),
1071 user_id: 0,
1072 tailnet: Some("user.ts.net".to_string()),
1073 tags: vec![],
1074 tailnet_address: TailnetAddress {
1075 ipv4: "100.64.0.1/32".parse().unwrap(),
1076 ipv6: "fd7a::1/128".parse().unwrap(),
1077 },
1078 node_key: [0u8; 32].into(),
1079 node_key_expiry: None,
1080 online: None,
1081 last_seen: None,
1082 key_signature: vec![],
1083 machine_key: None,
1084 disco_key: None,
1085 accepted_routes: vec![],
1086 underlay_addresses: vec![],
1087 derp_region: None,
1088 cap: Default::default(),
1089 cap_map: Default::default(),
1090 peerapi_port: None,
1091 peerapi_dns_proxy: false,
1092 is_wireguard_only: false,
1093 exit_node_dns_resolvers: vec![],
1094 peer_relay: false,
1095 service_vips: Default::default(),
1096 }
1097 }
1098
1099 /// A view with MagicDNS on and a single peer in the db.
1100 fn view_with_peer() -> DnsView {
1101 let mut db = PeerDb::default();
1102 db.upsert(&test_node());
1103
1104 DnsView {
1105 cfg: DnsConfig {
1106 magic_dns: true,
1107 search_domains: vec!["user.ts.net".to_string()],
1108 ..Default::default()
1109 },
1110 peers: Some(Arc::new(db)),
1111 self_node: None,
1112 exit_doh: None,
1113 enable_ipv6: false,
1114 accept_dns: true,
1115 }
1116 }
1117
1118 /// Build a raw DNS query buffer for `labels` with the given id, qtype, qclass.
1119 fn build_query(id: u16, labels: &[&str], qtype: u16, qclass: u16) -> Vec<u8> {
1120 let mut buf: Vec<u8> = Vec::new();
1121 buf.extend_from_slice(&id.to_be_bytes());
1122 buf.extend_from_slice(&0u16.to_be_bytes()); // flags: QR=0 (query)
1123 buf.extend_from_slice(&1u16.to_be_bytes()); // QDCOUNT
1124 buf.extend_from_slice(&0u16.to_be_bytes()); // ANCOUNT
1125 buf.extend_from_slice(&0u16.to_be_bytes()); // NSCOUNT
1126 buf.extend_from_slice(&0u16.to_be_bytes()); // ARCOUNT
1127 for label in labels {
1128 buf.push(label.len() as u8);
1129 buf.extend_from_slice(label.as_bytes());
1130 }
1131 buf.push(0); // root label
1132 buf.extend_from_slice(&qtype.to_be_bytes());
1133 buf.extend_from_slice(&qclass.to_be_bytes());
1134 buf
1135 }
1136
1137 /// Parse a response header: returns `(id, rcode, ancount)`.
1138 fn parse_header(resp: &[u8]) -> (u16, u8, u16) {
1139 let id = u16::from_be_bytes([resp[0], resp[1]]);
1140 let flags = u16::from_be_bytes([resp[2], resp[3]]);
1141 let ancount = u16::from_be_bytes([resp[6], resp[7]]);
1142 (id, (flags & 0x000F) as u8, ancount)
1143 }
1144
1145 #[test]
1146 fn a_query_for_known_peer_answers_v4() {
1147 let view = view_with_peer();
1148 let buf = build_query(0x1234, &["host", "user", "ts", "net"], 1, 1);
1149
1150 let resp = answer(&view, &buf).expect("answers");
1151 let (id, rcode, ancount) = parse_header(&resp);
1152 assert_eq!(id, 0x1234);
1153 assert_eq!(rcode, 0, "NoError");
1154 assert_eq!(ancount, 1);
1155
1156 // The trailing RDATA of the single A record is the peer's tailnet v4 octets.
1157 let tail = &resp[resp.len() - 4..];
1158 assert_eq!(tail, &[100, 64, 0, 1]);
1159 }
1160
1161 #[test]
1162 fn aaaa_query_for_known_peer_is_nodata_when_ipv6_off() {
1163 // Gate OFF (default): an AAAA query for a known overlay peer must return NoError with an
1164 // empty answer (NODATA) — NOT the overlay v6 address, which the IPv4-only client can't
1165 // route. This is the anti-fingerprint / no-dead-connections posture.
1166 let view = view_with_peer();
1167 assert!(!view.enable_ipv6, "default gate is off");
1168 let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1169
1170 let resp = answer(&view, &buf).expect("answers");
1171 let (_, rcode, ancount) = parse_header(&resp);
1172 assert_eq!(rcode, 0, "NoError (NODATA)");
1173 assert_eq!(ancount, 0, "empty answer: no AAAA handed out with IPv6 off");
1174 }
1175
1176 #[test]
1177 fn a_query_still_resolves_when_ipv6_off() {
1178 // Gate OFF must not touch the A (v4) path: the v4 answer is byte-for-byte unchanged.
1179 let view = view_with_peer();
1180 let buf = build_query(0x6, &["host", "user", "ts", "net"], 1, 1);
1181
1182 let resp = answer(&view, &buf).expect("answers");
1183 let (_, rcode, ancount) = parse_header(&resp);
1184 assert_eq!(rcode, 0, "NoError");
1185 assert_eq!(ancount, 1);
1186 let tail = &resp[resp.len() - 4..];
1187 assert_eq!(tail, &[100, 64, 0, 1]);
1188 }
1189
1190 #[test]
1191 fn aaaa_query_for_known_peer_answers_v6_when_ipv6_on() {
1192 // Gate ON: historical behavior — answer AAAA from the overlay v6 address.
1193 let mut view = view_with_peer();
1194 view.enable_ipv6 = true;
1195 let buf = build_query(0x5, &["host", "user", "ts", "net"], 28, 1);
1196
1197 let resp = answer(&view, &buf).expect("answers");
1198 let (_, rcode, ancount) = parse_header(&resp);
1199 assert_eq!(rcode, 0, "NoError");
1200 assert_eq!(ancount, 1);
1201
1202 let expected = "fd7a::1".parse::<std::net::Ipv6Addr>().unwrap().octets();
1203 let tail = &resp[resp.len() - 16..];
1204 assert_eq!(tail, expected);
1205 }
1206
1207 #[test]
1208 fn aaaa_for_unknown_tailnet_name_is_nxdomain_not_forwarded_with_ipv6_off() {
1209 // Anti-leak, unchanged by the gate: an AAAA for a name under the tailnet suffix that has no
1210 // overlay match still fails closed to NXDOMAIN — never forwarded to a recursive upstream,
1211 // even with resolvers configured. (Gate OFF only changes the *positive* overlay match into
1212 // NODATA; a non-match still routes through `forward_or_nxdomain`.)
1213 let mut db = PeerDb::default();
1214 db.upsert(&test_node());
1215 let view = DnsView {
1216 cfg: DnsConfig {
1217 magic_dns: true,
1218 search_domains: vec!["user.ts.net".to_string()],
1219 fallback_resolvers: vec![DnsResolver {
1220 transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1221 use_with_exit_node: false,
1222 }],
1223 ..Default::default()
1224 },
1225 peers: Some(Arc::new(db)),
1226 self_node: None,
1227 exit_doh: None,
1228 enable_ipv6: false,
1229 accept_dns: true,
1230 };
1231 let buf = build_query(0x5A, &["ghost", "user", "ts", "net"], 28, 1);
1232
1233 match decide(&view, &buf).expect("decides") {
1234 Decision::Reply(resp) => {
1235 let (_, rcode, _) = parse_header(&resp);
1236 assert_eq!(rcode, 3, "NxDomain: tailnet AAAA not leaked upstream");
1237 }
1238 Decision::Forward { .. } => panic!("tailnet AAAA must never be forwarded"),
1239 }
1240 }
1241
1242 #[test]
1243 fn bare_hostname_resolves() {
1244 // The name index also stores the bare hostname.
1245 let view = view_with_peer();
1246 let buf = build_query(0x7, &["host"], 1, 1);
1247
1248 let resp = answer(&view, &buf).expect("answers");
1249 let (_, rcode, ancount) = parse_header(&resp);
1250 assert_eq!(rcode, 0);
1251 assert_eq!(ancount, 1);
1252 }
1253
1254 #[test]
1255 fn unknown_name_is_nxdomain() {
1256 let view = view_with_peer();
1257 let buf = build_query(0x9, &["nope", "example", "com"], 1, 1);
1258
1259 let resp = answer(&view, &buf).expect("answers");
1260 let (_, rcode, ancount) = parse_header(&resp);
1261 assert_eq!(rcode, 3, "NxDomain");
1262 assert_eq!(ancount, 0);
1263 }
1264
1265 #[test]
1266 fn magic_dns_off_is_refused() {
1267 // Fail closed: with MagicDNS disabled, even a known name is refused.
1268 let mut view = view_with_peer();
1269 view.cfg.magic_dns = false;
1270 let buf = build_query(0xAB, &["host", "user", "ts", "net"], 1, 1);
1271
1272 let resp = answer(&view, &buf).expect("answers");
1273 let (_, rcode, ancount) = parse_header(&resp);
1274 assert_eq!(rcode, 5, "Refused");
1275 assert_eq!(ancount, 0);
1276 }
1277
1278 #[test]
1279 fn accept_dns_false_refuses_otherwise_answerable_query() {
1280 // The accept-dns gate (Go `CorpDNS`): with `accept_dns == false` the node ignores the
1281 // tailnet DNS config, so even a known peer name that would normally answer authoritatively is
1282 // REFUSED (the responder serves nothing) — mirroring Go applying an empty `dns.Config`.
1283 let mut view = view_with_peer();
1284 assert!(view.cfg.magic_dns, "MagicDNS itself is on");
1285 view.accept_dns = false;
1286 let buf = build_query(0xDD, &["host", "user", "ts", "net"], 1, 1);
1287
1288 let resp = answer(&view, &buf).expect("answers");
1289 let (_, rcode, ancount) = parse_header(&resp);
1290 assert_eq!(rcode, 5, "Refused: accept_dns off ⇒ serve nothing");
1291 assert_eq!(ancount, 0);
1292
1293 // Flip accept_dns back ON (the config was never destroyed, only gated): the same query now
1294 // answers authoritatively — proving the OFF→ON restore is automatic.
1295 view.accept_dns = true;
1296 let resp = answer(&view, &buf).expect("answers");
1297 let (_, rcode, ancount) = parse_header(&resp);
1298 assert_eq!(rcode, 0, "NoError: accept_dns on ⇒ the known peer answers");
1299 assert_eq!(ancount, 1);
1300 let tail = &resp[resp.len() - 4..];
1301 assert_eq!(tail, &[100, 64, 0, 1], "the peer's tailnet v4 is served");
1302 }
1303
1304 #[test]
1305 fn default_view_serves_nothing() {
1306 // The default (no dns_config seen) has magic_dns == false: fail closed.
1307 let view = DnsView::default();
1308 let buf = build_query(0x1, &["host", "user", "ts", "net"], 1, 1);
1309
1310 let resp = answer(&view, &buf).expect("answers");
1311 let (_, rcode, _) = parse_header(&resp);
1312 assert_eq!(rcode, 5, "Refused");
1313 }
1314
1315 #[test]
1316 fn unsupported_qtype_on_tailnet_name_is_nodata_not_refused() {
1317 // TXT (type 16) for a tailnet-authoritative name: the name exists but we hold no TXT, so —
1318 // like Go — return NODATA (empty NOERROR), NOT REFUSED (which would make a stub abandon the
1319 // resolver) and NOT NXDOMAIN (the name exists). The name is never forwarded (anti-leak).
1320 let view = view_with_peer();
1321 let buf = build_query(0x1, &["host", "user", "ts", "net"], 16, 1);
1322
1323 let resp = answer(&view, &buf).expect("answers");
1324 let (_, rcode, ancount) = parse_header(&resp);
1325 assert_eq!(rcode, 0, "NoError (NODATA), not Refused");
1326 assert_eq!(ancount, 0, "no answer records (NODATA)");
1327 }
1328
1329 #[test]
1330 fn unsupported_qtype_off_tailnet_forwards_or_nxdomains() {
1331 // A non-A/AAAA/PTR qtype for an OFF-tailnet name must be forwardable like A/AAAA — never
1332 // REFUSED. With no upstream configured in this view it fails closed to NXDOMAIN (the same
1333 // disposition an off-tailnet A query gets here), proving the qtype no longer short-circuits
1334 // to REFUSED. HTTPS/SVCB is type 65 (the browser HTTP/3 + ECH case the old REFUSED broke).
1335 let view = view_with_peer();
1336 let buf = build_query(0x1, &["example", "com"], 65, 1);
1337
1338 let resp = answer(&view, &buf).expect("answers");
1339 let (_, rcode, _) = parse_header(&resp);
1340 assert_eq!(
1341 rcode, 3,
1342 "off-tailnet, no upstream -> NXDOMAIN (forwardable, not Refused)"
1343 );
1344 }
1345
1346 #[test]
1347 fn unimplemented_qtype_on_tailnet_name_is_notimp() {
1348 // NS (2), SOA (6), HINFO (13), AXFR (252) for a tailnet-authoritative name must answer NOTIMP
1349 // (rcode 4), matching Go `resolveLocal`'s `case dns.TypeNS, dns.TypeSOA, dns.TypeAXFR,
1350 // dns.TypeHINFO: return RCodeNotImplemented`. Returning NODATA (rcode 0) here was a clean
1351 // fingerprint (a `dig SOA user.ts.net` answer differs from real tailscaled). The name is
1352 // still never forwarded (anti-leak).
1353 let view = view_with_peer();
1354 for qtype in [2u16, 6, 13, 252] {
1355 let buf = build_query(0x1, &["host", "user", "ts", "net"], qtype, 1);
1356 let resp = answer(&view, &buf).expect("answers");
1357 let (_, rcode, ancount) = parse_header(&resp);
1358 assert_eq!(rcode, 4, "qtype {qtype} on a tailnet name must be NOTIMP");
1359 assert_eq!(ancount, 0, "NOTIMP carries no answer records");
1360 }
1361 }
1362
1363 #[test]
1364 fn unimplemented_qtype_off_tailnet_still_forwards_not_notimp() {
1365 // The NOTIMP disposition is ONLY for a name we are authoritative for. An NS query for an
1366 // off-tailnet name must still forward (here: NXDOMAIN, no upstream) — NOT NOTIMP — exactly
1367 // like the off-tailnet HTTPS/SVCB case above. Guards the NOTIMP change against over-reach.
1368 let view = view_with_peer();
1369 let buf = build_query(0x1, &["example", "com"], 2, 1); // NS, off-tailnet
1370 let resp = answer(&view, &buf).expect("answers");
1371 let (_, rcode, _) = parse_header(&resp);
1372 assert_eq!(
1373 rcode, 3,
1374 "off-tailnet NS -> NXDOMAIN (forwardable), not NOTIMP"
1375 );
1376 }
1377
1378 #[test]
1379 fn malformed_query_is_dropped() {
1380 // A response (QR bit set) is not a query; we drop it (no answer).
1381 let mut buf = build_query(0x1, &["host"], 1, 1);
1382 buf[2] = 0x80; // set QR bit
1383 assert!(answer(&view_with_peer(), &buf).is_none());
1384 }
1385
1386 #[test]
1387 fn ptr_for_known_ip_answers_fqdn() {
1388 let view = view_with_peer();
1389 // Reverse name for 100.64.0.1 => 1.0.64.100.in-addr.arpa
1390 let buf = build_query(0x33, &["1", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1391
1392 let resp = answer(&view, &buf).expect("answers");
1393 let (_, rcode, ancount) = parse_header(&resp);
1394 assert_eq!(rcode, 0, "NoError");
1395 assert_eq!(ancount, 1);
1396
1397 // The PTR rdata encodes the peer's fqdn "host.user.ts.net" as length-prefixed labels.
1398 let expected = {
1399 let mut out = Vec::new();
1400 for label in ["host", "user", "ts", "net"] {
1401 out.push(label.len() as u8);
1402 out.extend_from_slice(label.as_bytes());
1403 }
1404 out.push(0);
1405 out
1406 };
1407 let tail = &resp[resp.len() - expected.len()..];
1408 assert_eq!(tail, expected.as_slice());
1409 }
1410
1411 #[test]
1412 fn ptr_for_unknown_ip_is_nxdomain() {
1413 let view = view_with_peer();
1414 // 9.9.9.9 is not a known tailnet IP.
1415 let buf = build_query(0x34, &["9", "9", "9", "9", "in-addr", "arpa"], 12, 1);
1416
1417 let resp = answer(&view, &buf).expect("answers");
1418 let (_, rcode, _) = parse_header(&resp);
1419 assert_eq!(rcode, 3, "NxDomain");
1420 }
1421
1422 #[test]
1423 fn ptr_for_unknown_tailnet_ip_is_nxdomain_not_forwarded() {
1424 // A view WITH an upstream resolver: an off-tailnet reverse query would forward, but a
1425 // reverse query for an unmatched IP in the CGNAT range (100.64.0.0/10) must fail closed to
1426 // NXDOMAIN — the probed tailnet IP must never leak upstream.
1427 let mut db = PeerDb::default();
1428 db.upsert(&test_node());
1429 let view = DnsView {
1430 cfg: DnsConfig {
1431 magic_dns: true,
1432 search_domains: vec!["user.ts.net".to_string()],
1433 fallback_resolvers: vec![DnsResolver {
1434 transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1435 use_with_exit_node: false,
1436 }],
1437 ..Default::default()
1438 },
1439 peers: Some(Arc::new(db)),
1440 self_node: None,
1441 exit_doh: None,
1442 enable_ipv6: false,
1443 accept_dns: true,
1444 };
1445
1446 // 100.64.0.9 is in CGNAT range but owned by no peer => NXDOMAIN, never a Forward.
1447 let buf = build_query(0x35, &["9", "0", "64", "100", "in-addr", "arpa"], 12, 1);
1448 match decide(&view, &buf).expect("decides") {
1449 Decision::Reply(resp) => {
1450 let (_, rcode, _) = parse_header(&resp);
1451 assert_eq!(rcode, 3, "NxDomain");
1452 }
1453 Decision::Forward { .. } => {
1454 panic!("tailnet CGNAT PTR must never be forwarded upstream")
1455 }
1456 }
1457 }
1458
1459 /// Anti-leak regression for the exotic-qtype forward path: a NON-PTR query (TXT, type 16) for a
1460 /// tailnet CGNAT reverse name, with an upstream configured, must STILL fail closed to NXDOMAIN —
1461 /// never forward. The PTR arm guards this, but the `QType::Other` path routes through
1462 /// `forward_or_nodata`, which must re-apply the reverse-zone guard or the tailnet IP leaks.
1463 #[test]
1464 fn exotic_qtype_for_tailnet_cgnat_reverse_is_nxdomain_not_forwarded() {
1465 let mut db = PeerDb::default();
1466 db.upsert(&test_node());
1467 let view = DnsView {
1468 cfg: DnsConfig {
1469 magic_dns: true,
1470 search_domains: vec!["user.ts.net".to_string()],
1471 fallback_resolvers: vec![DnsResolver {
1472 transport: ts_control::ResolverTransport::Udp("9.9.9.9:53".parse().unwrap()),
1473 use_with_exit_node: false,
1474 }],
1475 ..Default::default()
1476 },
1477 peers: Some(Arc::new(db)),
1478 self_node: None,
1479 exit_doh: None,
1480 enable_ipv6: false,
1481 accept_dns: true,
1482 };
1483
1484 // TXT (16) for a CGNAT reverse name => NXDOMAIN, never a Forward (no tailnet-IP leak).
1485 let buf = build_query(0x36, &["9", "0", "64", "100", "in-addr", "arpa"], 16, 1);
1486 match decide(&view, &buf).expect("decides") {
1487 Decision::Reply(resp) => {
1488 let (_, rcode, _) = parse_header(&resp);
1489 assert_eq!(rcode, 3, "NxDomain");
1490 }
1491 Decision::Forward { .. } => {
1492 panic!("a non-PTR query for a tailnet CGNAT reverse name must never forward")
1493 }
1494 }
1495 }
1496
1497 /// Same anti-leak guard for an `ip6.arpa` reverse name under an exotic qtype: must NXDOMAIN, not
1498 /// forward (revealing a tailnet ULA was probed).
1499 #[test]
1500 fn exotic_qtype_for_ip6_arpa_is_nxdomain_not_forwarded() {
1501 let view = view_with_routes(
1502 std::collections::BTreeMap::new(),
1503 vec![udp("9.9.9.9:53")],
1504 vec![],
1505 );
1506 // An ip6.arpa reverse name with a TXT (16) qtype must fail closed.
1507 let buf = build_query(
1508 0x37,
1509 &[
1510 "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
1511 "a", "7", "d", "f", "ip6", "arpa",
1512 ],
1513 16,
1514 1,
1515 );
1516 match decide(&view, &buf).expect("decides") {
1517 Decision::Reply(resp) => {
1518 let (_, rcode, _) = parse_header(&resp);
1519 assert_eq!(rcode, 3, "NxDomain");
1520 }
1521 Decision::Forward { .. } => panic!("an ip6.arpa exotic-qtype query must never forward"),
1522 }
1523 }
1524
1525 #[test]
1526 fn is_tailnet_cgnat_classifies_range() {
1527 assert!(is_tailnet_cgnat("100.64.0.0".parse().unwrap()));
1528 assert!(is_tailnet_cgnat("100.64.0.1".parse().unwrap()));
1529 assert!(is_tailnet_cgnat("100.127.255.255".parse().unwrap()));
1530 // Outside the /10:
1531 assert!(!is_tailnet_cgnat("100.63.255.255".parse().unwrap()));
1532 assert!(!is_tailnet_cgnat("100.128.0.0".parse().unwrap()));
1533 assert!(!is_tailnet_cgnat("9.9.9.9".parse().unwrap()));
1534 // The MagicDNS resolver IP 100.100.100.100 is itself inside the /10.
1535 assert!(is_tailnet_cgnat("100.100.100.100".parse().unwrap()));
1536 }
1537
1538 #[test]
1539 fn response_matches_query_validates_id_and_qr() {
1540 // query id 0x1234, QR=0
1541 let query = build_query(0x1234, &["a", "com"], 1, 1);
1542
1543 // A well-formed response: same id, QR=1.
1544 let mut good = query.clone();
1545 good[2] |= 0x80;
1546 assert!(response_matches_query(&query, &good));
1547
1548 // Same id but QR still 0 (not a response): rejected.
1549 assert!(!response_matches_query(&query, &query));
1550
1551 // QR=1 but a different transaction id: rejected (off-path forgery).
1552 let mut wrong_id = good.clone();
1553 wrong_id[0] ^= 0xFF;
1554 assert!(!response_matches_query(&query, &wrong_id));
1555
1556 // Too-short buffers: rejected.
1557 assert!(!response_matches_query(&query, &[0u8; 2]));
1558 assert!(!response_matches_query(&[0u8; 3], &good));
1559 }
1560
1561 #[test]
1562 fn self_node_resolves_when_no_peer_match() {
1563 // With the peer db empty but a self node set, the self node answers for its own name.
1564 let view = DnsView {
1565 cfg: DnsConfig {
1566 magic_dns: true,
1567 search_domains: vec![],
1568 ..Default::default()
1569 },
1570 peers: None,
1571 self_node: Some(test_node()),
1572 exit_doh: None,
1573 enable_ipv6: false,
1574 accept_dns: true,
1575 };
1576 let buf = build_query(0x44, &["host", "user", "ts", "net"], 1, 1);
1577
1578 let resp = answer(&view, &buf).expect("answers");
1579 let (_, rcode, ancount) = parse_header(&resp);
1580 assert_eq!(rcode, 0);
1581 assert_eq!(ancount, 1);
1582 let tail = &resp[resp.len() - 4..];
1583 assert_eq!(tail, &[100, 64, 0, 1]);
1584 }
1585
1586 #[test]
1587 fn partially_qualified_name_resolves_via_search_domain() {
1588 // "host.user" is not indexed directly, but the "user.ts.net" search domain qualifies it
1589 // to "host.user.user.ts.net"... which does NOT match. The realistic case is "host" (bare,
1590 // already indexed) and "host.user.ts.net" (fqdn). Verify a name needing suffix expansion:
1591 // with search domain "ts.net" the partially-qualified "host.user" => "host.user.ts.net".
1592 let mut view = view_with_peer();
1593 view.cfg.search_domains = vec!["ts.net".to_string()];
1594 let buf = build_query(0x55, &["host", "user"], 1, 1);
1595
1596 let resp = answer(&view, &buf).expect("answers");
1597 let (_, rcode, ancount) = parse_header(&resp);
1598 assert_eq!(rcode, 0, "NoError via search-domain expansion");
1599 assert_eq!(ancount, 1);
1600 let tail = &resp[resp.len() - 4..];
1601 assert_eq!(tail, &[100, 64, 0, 1]);
1602 }
1603
1604 #[test]
1605 fn extra_record_a_answers_when_no_peer_match() {
1606 // A control-pushed static A record answers for a non-peer name, fail-closed otherwise.
1607 let mut view = view_with_peer();
1608 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1609 name: "static.user.ts.net".to_string(),
1610 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1611 }];
1612 let buf = build_query(0x77, &["static", "user", "ts", "net"], 1, 1);
1613
1614 let resp = answer(&view, &buf).expect("answers");
1615 let (_, rcode, ancount) = parse_header(&resp);
1616 assert_eq!(rcode, 0, "NoError from extra record");
1617 assert_eq!(ancount, 1);
1618 let tail = &resp[resp.len() - 4..];
1619 assert_eq!(tail, &[100, 64, 0, 9]);
1620 }
1621
1622 #[test]
1623 fn extra_record_matches_query_case_insensitively() {
1624 // The query name is canonicalized (lowercased) at decode time, so a mixed-case query
1625 // matches a lowercase extra record.
1626 let mut view = view_with_peer();
1627 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1628 name: "static.user.ts.net".to_string(),
1629 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1630 }];
1631 let buf = build_query(0x7A, &["Static", "User", "TS", "net"], 1, 1);
1632
1633 let resp = answer(&view, &buf).expect("answers");
1634 let (_, rcode, ancount) = parse_header(&resp);
1635 assert_eq!(rcode, 0, "NoError: case-insensitive match");
1636 assert_eq!(ancount, 1);
1637 let tail = &resp[resp.len() - 4..];
1638 assert_eq!(tail, &[100, 64, 0, 9]);
1639 }
1640
1641 #[test]
1642 fn extra_record_not_expanded_by_search_domain() {
1643 // Unlike peer names, an extra record is matched as an FQDN only: a bare query that would
1644 // need search-domain expansion to reach the record name must NOT resolve.
1645 let mut view = view_with_peer();
1646 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1647 name: "static.user.ts.net".to_string(),
1648 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1649 }];
1650 // "static" would only reach "static.user.ts.net" via the "user.ts.net" search domain.
1651 let buf = build_query(0x7B, &["static"], 1, 1);
1652
1653 let resp = answer(&view, &buf).expect("answers");
1654 let (_, rcode, _) = parse_header(&resp);
1655 assert_eq!(rcode, 3, "NxDomain: extra records are not search-expanded");
1656 }
1657
1658 #[test]
1659 fn extra_record_aaaa_family_is_isolated() {
1660 // An A-only extra record must NOT answer an AAAA query for the same name (NxDomain).
1661 let mut view = view_with_peer();
1662 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1663 name: "v4only.user.ts.net".to_string(),
1664 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1665 }];
1666 let buf = build_query(0x78, &["v4only", "user", "ts", "net"], 28, 1);
1667
1668 let resp = answer(&view, &buf).expect("answers");
1669 let (_, rcode, _) = parse_header(&resp);
1670 assert_eq!(rcode, 3, "NxDomain: A record does not satisfy AAAA");
1671 }
1672
1673 #[test]
1674 fn extra_record_ignored_when_magic_dns_off() {
1675 // Fail closed: extra records are never served while MagicDNS is disabled.
1676 let mut view = view_with_peer();
1677 view.cfg.magic_dns = false;
1678 view.cfg.extra_records = vec![ts_control::ExtraRecord {
1679 name: "static.user.ts.net".to_string(),
1680 addr: IpAddr::V4(Ipv4Addr::new(100, 64, 0, 9)),
1681 }];
1682 let buf = build_query(0x79, &["static", "user", "ts", "net"], 1, 1);
1683
1684 let resp = answer(&view, &buf).expect("answers");
1685 let (_, rcode, _) = parse_header(&resp);
1686 assert_eq!(rcode, 5, "Refused");
1687 }
1688
1689 #[test]
1690 fn non_in_class_on_tailnet_name_is_nodata_not_answered_as_in() {
1691 // A CHAOS-class (3) query for a tailnet name must NOT be answered as IN (no overlay A), and
1692 // must NOT be REFUSED (Go does no class check on the local path). It's an unsupported
1693 // authoritative class -> NODATA (empty NOERROR), and never forwarded (tailnet name).
1694 let view = view_with_peer();
1695 let buf = build_query(0x66, &["host", "user", "ts", "net"], 1, 3);
1696
1697 let resp = answer(&view, &buf).expect("answers");
1698 let (_, rcode, ancount) = parse_header(&resp);
1699 assert_eq!(
1700 rcode, 0,
1701 "NoError (NODATA), not Refused and not an IN answer"
1702 );
1703 assert_eq!(
1704 ancount, 0,
1705 "must not hand out the overlay A for a non-IN class"
1706 );
1707 }
1708
1709 #[test]
1710 fn non_in_class_off_tailnet_forwards_or_nxdomains() {
1711 // A non-IN class for an OFF-tailnet name is forwardable (Go forwards it), never REFUSED.
1712 // No upstream here -> NXDOMAIN, proving the class gate no longer short-circuits to Refused.
1713 let view = view_with_peer();
1714 let buf = build_query(0x66, &["example", "com"], 1, 3);
1715
1716 let resp = answer(&view, &buf).expect("answers");
1717 let (_, rcode, _) = parse_header(&resp);
1718 assert_eq!(
1719 rcode, 3,
1720 "off-tailnet non-IN class, no upstream -> NXDOMAIN, not Refused"
1721 );
1722 }
1723
1724 /// A view with MagicDNS on, the `user.ts.net` search domain, and the given split-DNS routes
1725 /// + global resolvers.
1726 fn view_with_routes(
1727 routes: std::collections::BTreeMap<String, Vec<DnsResolver>>,
1728 resolvers: Vec<DnsResolver>,
1729 fallback: Vec<DnsResolver>,
1730 ) -> DnsView {
1731 DnsView {
1732 cfg: DnsConfig {
1733 magic_dns: true,
1734 search_domains: vec!["user.ts.net".to_string()],
1735 routes,
1736 resolvers,
1737 fallback_resolvers: fallback,
1738 ..Default::default()
1739 },
1740 peers: None,
1741 self_node: None,
1742 exit_doh: None,
1743 enable_ipv6: false,
1744 accept_dns: true,
1745 }
1746 }
1747
1748 fn udp(addr: &str) -> DnsResolver {
1749 DnsResolver {
1750 transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
1751 use_with_exit_node: false,
1752 }
1753 }
1754
1755 #[test]
1756 fn split_dns_route_forwards_to_matching_upstream() {
1757 let mut routes = std::collections::BTreeMap::new();
1758 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1759 let view = view_with_routes(routes, vec![], vec![]);
1760 let buf = build_query(0x100, &["api", "corp", "example"], 1, 1);
1761
1762 match decide(&view, &buf).expect("decides") {
1763 Decision::Forward { upstreams, .. } => {
1764 assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1765 }
1766 Decision::Reply(_) => panic!("expected forward to the split-DNS upstream"),
1767 }
1768 }
1769
1770 #[test]
1771 fn exotic_qtype_off_tailnet_forwards_to_upstream() {
1772 // The core of the fix: an HTTPS/SVCB (type 65) query for an off-tailnet name with a matching
1773 // route must FORWARD to the upstream (verbatim), exactly like an A query would — not REFUSE
1774 // and not NXDOMAIN. This is the browser HTTP/3 + ECH case the old blanket-REFUSE broke.
1775 let mut routes = std::collections::BTreeMap::new();
1776 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1777 let view = view_with_routes(routes, vec![], vec![]);
1778 let buf = build_query(0x102, &["api", "corp", "example"], 65, 1);
1779
1780 match decide(&view, &buf).expect("decides") {
1781 Decision::Forward {
1782 upstreams, query, ..
1783 } => {
1784 assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1785 assert_eq!(query, buf, "the exotic-qtype query is forwarded verbatim");
1786 }
1787 Decision::Reply(_) => {
1788 panic!("an off-tailnet HTTPS-record query must forward, not reply")
1789 }
1790 }
1791 }
1792
1793 #[test]
1794 fn non_in_class_off_tailnet_forwards_to_upstream() {
1795 // A non-IN class for an off-tailnet routed name forwards too (Go does no class check on the
1796 // local path). Proves the class gate no longer short-circuits to REFUSED before routing.
1797 let mut routes = std::collections::BTreeMap::new();
1798 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
1799 let view = view_with_routes(routes, vec![], vec![]);
1800 let buf = build_query(0x103, &["api", "corp", "example"], 1, 3);
1801
1802 match decide(&view, &buf).expect("decides") {
1803 Decision::Forward { upstreams, .. } => {
1804 assert_eq!(upstreams, vec!["10.0.0.53:53".parse().unwrap()]);
1805 }
1806 Decision::Reply(_) => {
1807 panic!("an off-tailnet non-IN-class query must forward, not reply")
1808 }
1809 }
1810 }
1811
1812 /// The local responder bounds concurrent in-flight forwards: `serve` acquires one
1813 /// `MAX_INFLIGHT_FORWARDS` permit per spawned forward task and drops the query fail-closed when
1814 /// the pool is exhausted (a client spraying forwardable names can't open unbounded overlay
1815 /// sockets). This pins the gating semantics `serve` relies on — drained pool refuses a new
1816 /// permit; releasing one restores capacity — and the cap constant itself. (The async `serve`
1817 /// loop has no netstack-free test seam, so the semaphore behavior is exercised directly here, the
1818 /// same `Arc<Semaphore>::try_acquire_owned` the loop uses.)
1819 #[test]
1820 fn forward_inflight_cap_fails_closed_when_saturated() {
1821 use std::sync::Arc;
1822
1823 use tokio::sync::Semaphore;
1824
1825 let inflight = Arc::new(Semaphore::new(MAX_INFLIGHT_FORWARDS));
1826
1827 // Drain every permit (one per concurrently in-flight forward).
1828 let mut held = Vec::with_capacity(MAX_INFLIGHT_FORWARDS);
1829 for _ in 0..MAX_INFLIGHT_FORWARDS {
1830 held.push(
1831 inflight
1832 .clone()
1833 .try_acquire_owned()
1834 .expect("permits available below the cap"),
1835 );
1836 }
1837
1838 // At the cap, the next forward is refused — `serve` would drop the query, not spawn.
1839 assert!(
1840 inflight.clone().try_acquire_owned().is_err(),
1841 "a saturated forward pool must refuse a new permit (fail closed)"
1842 );
1843
1844 // Completing an in-flight forward releases its permit and restores capacity.
1845 drop(held.pop());
1846 assert!(
1847 inflight.clone().try_acquire_owned().is_ok(),
1848 "releasing a permit must let the next forward proceed"
1849 );
1850 }
1851
1852 /// A permit moved into a spawned forward task (the `let _permit = permit;` shape `serve` uses)
1853 /// must stay held for the *whole* task body — across the `.await` on the upstream — and release
1854 /// only when the task completes. This guards the regression the saturation test above can't see:
1855 /// "tidying" `let _permit = permit;` to `let _ = permit;` would drop the permit immediately,
1856 /// re-opening unbounded concurrency while leaving the synchronous drain/restore test green. Here a
1857 /// 1-permit pool is consumed by a task that holds it across a yield; the pool must read empty
1858 /// while the task runs and refill once it finishes.
1859 #[tokio::test]
1860 async fn forward_permit_is_held_for_the_task_lifetime_not_dropped_early() {
1861 use std::sync::Arc;
1862
1863 use tokio::sync::Semaphore;
1864
1865 let inflight = Arc::new(Semaphore::new(1));
1866 let permit = inflight
1867 .clone()
1868 .try_acquire_owned()
1869 .expect("the sole permit is available");
1870
1871 let (started_tx, started_rx) = tokio::sync::oneshot::channel();
1872 let (release_tx, release_rx) = tokio::sync::oneshot::channel();
1873 let task = tokio::spawn(async move {
1874 // Same shape as `serve`'s spawned forward: the permit is a named binding moved into the
1875 // task, so it lives until the body ends — not dropped at the `let`.
1876 let _permit = permit;
1877 started_tx.send(()).unwrap();
1878 // Stand in for the `.await` on the upstream forward.
1879 release_rx.await.unwrap();
1880 });
1881
1882 started_rx.await.unwrap();
1883 // While the task runs, the permit it moved in is still held — the pool is empty.
1884 assert!(
1885 inflight.clone().try_acquire_owned().is_err(),
1886 "a permit moved into a running task must stay held across its await"
1887 );
1888
1889 // Let the task finish; its permit drops with the body and capacity returns.
1890 release_tx.send(()).unwrap();
1891 task.await.unwrap();
1892 assert!(
1893 inflight.clone().try_acquire_owned().is_ok(),
1894 "the permit must be released once the task body completes"
1895 );
1896 }
1897
1898 #[test]
1899 fn longest_suffix_route_wins() {
1900 let mut routes = std::collections::BTreeMap::new();
1901 routes.insert("example".to_string(), vec![udp("10.0.0.1:53")]);
1902 routes.insert("corp.example".to_string(), vec![udp("10.0.0.2:53")]);
1903 let view = view_with_routes(routes, vec![], vec![]);
1904 let buf = build_query(0x101, &["api", "corp", "example"], 1, 1);
1905
1906 match decide(&view, &buf).expect("decides") {
1907 Decision::Forward { upstreams, .. } => {
1908 assert_eq!(
1909 upstreams,
1910 vec!["10.0.0.2:53".parse().unwrap()],
1911 "longer suffix wins"
1912 );
1913 }
1914 Decision::Reply(_) => panic!("expected forward"),
1915 }
1916 }
1917
1918 #[test]
1919 fn negative_route_is_nxdomain_not_forwarded() {
1920 // An empty upstream list is a negative route: fail closed, never forward.
1921 let mut routes = std::collections::BTreeMap::new();
1922 routes.insert("blocked.example".to_string(), vec![]);
1923 let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
1924 let buf = build_query(0x102, &["x", "blocked", "example"], 1, 1);
1925
1926 match decide(&view, &buf).expect("decides") {
1927 Decision::Reply(resp) => {
1928 let (_, rcode, _) = parse_header(&resp);
1929 assert_eq!(rcode, 3, "NxDomain: negative route is not forwarded");
1930 }
1931 Decision::Forward { .. } => panic!("negative route must not forward"),
1932 }
1933 }
1934
1935 #[test]
1936 fn unrouted_name_forwards_to_fallback_then_global() {
1937 // No route matches: fallback resolvers are preferred over global resolvers.
1938 let view = view_with_routes(
1939 std::collections::BTreeMap::new(),
1940 vec![udp("8.8.8.8:53")],
1941 vec![udp("1.1.1.1:53")],
1942 );
1943 let buf = build_query(0x103, &["example", "com"], 1, 1);
1944
1945 match decide(&view, &buf).expect("decides") {
1946 Decision::Forward { upstreams, .. } => {
1947 assert_eq!(
1948 upstreams,
1949 vec!["1.1.1.1:53".parse().unwrap()],
1950 "fallback preferred"
1951 );
1952 }
1953 Decision::Reply(_) => panic!("expected forward to fallback"),
1954 }
1955 }
1956
1957 #[test]
1958 fn unrouted_name_forwards_to_global_when_no_fallback() {
1959 let view = view_with_routes(
1960 std::collections::BTreeMap::new(),
1961 vec![udp("8.8.8.8:53")],
1962 vec![],
1963 );
1964 let buf = build_query(0x104, &["example", "com"], 1, 1);
1965
1966 match decide(&view, &buf).expect("decides") {
1967 Decision::Forward { upstreams, .. } => {
1968 assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
1969 }
1970 Decision::Reply(_) => panic!("expected forward to global resolver"),
1971 }
1972 }
1973
1974 #[test]
1975 fn tailnet_name_is_never_forwarded() {
1976 // Anti-leak: a name under a tailnet search domain that has no overlay match must fail
1977 // closed to NXDOMAIN, never leak to an upstream resolver, even with resolvers configured.
1978 let view = view_with_routes(
1979 std::collections::BTreeMap::new(),
1980 vec![udp("8.8.8.8:53")],
1981 vec![udp("1.1.1.1:53")],
1982 );
1983 // "ghost.user.ts.net" is under the tailnet suffix but matches no peer.
1984 let buf = build_query(0x105, &["ghost", "user", "ts", "net"], 1, 1);
1985
1986 match decide(&view, &buf).expect("decides") {
1987 Decision::Reply(resp) => {
1988 let (_, rcode, _) = parse_header(&resp);
1989 assert_eq!(rcode, 3, "NxDomain: tailnet name not leaked upstream");
1990 }
1991 Decision::Forward { .. } => panic!("tailnet name must never be forwarded"),
1992 }
1993 }
1994
1995 #[test]
1996 fn no_resolvers_fails_closed() {
1997 // No route, no resolvers: an unknown name is NXDOMAIN, not forwarded.
1998 let view = view_with_routes(std::collections::BTreeMap::new(), vec![], vec![]);
1999 let buf = build_query(0x106, &["example", "com"], 1, 1);
2000
2001 match decide(&view, &buf).expect("decides") {
2002 Decision::Reply(resp) => {
2003 let (_, rcode, _) = parse_header(&resp);
2004 assert_eq!(rcode, 3, "NxDomain");
2005 }
2006 Decision::Forward { .. } => panic!("must not forward with no resolvers"),
2007 }
2008 }
2009
2010 #[test]
2011 fn overlay_match_wins_over_forwarding() {
2012 // A known peer name resolves authoritatively even when upstream resolvers are configured.
2013 let mut db = PeerDb::default();
2014 db.upsert(&test_node());
2015 let view = DnsView {
2016 cfg: DnsConfig {
2017 magic_dns: true,
2018 search_domains: vec!["user.ts.net".to_string()],
2019 resolvers: vec![udp("8.8.8.8:53")],
2020 ..Default::default()
2021 },
2022 peers: Some(Arc::new(db)),
2023 self_node: None,
2024 exit_doh: None,
2025 enable_ipv6: false,
2026 accept_dns: true,
2027 };
2028 let buf = build_query(0x107, &["host", "user", "ts", "net"], 1, 1);
2029
2030 match decide(&view, &buf).expect("decides") {
2031 Decision::Reply(resp) => {
2032 let (_, rcode, ancount) = parse_header(&resp);
2033 assert_eq!(rcode, 0, "authoritative answer wins");
2034 assert_eq!(ancount, 1);
2035 }
2036 Decision::Forward { .. } => panic!("overlay match must not forward"),
2037 }
2038 }
2039
2040 #[test]
2041 fn ipv6_reverse_ptr_is_nxdomain_not_forwarded() {
2042 // Anti-leak: an `ip6.arpa` reverse PTR for a tailnet ULA (fd7a:…) must fail closed to
2043 // NXDOMAIN, never be forwarded — even with an upstream resolver configured. This fork is
2044 // IPv4-only on the tailnet; forwarding would reveal that a v6 address was probed.
2045 let view = view_with_routes(
2046 std::collections::BTreeMap::new(),
2047 vec![udp("8.8.8.8:53")],
2048 vec![udp("1.1.1.1:53")],
2049 );
2050 // Reverse name for fd7a::1 (nibble-reversed) under ip6.arpa. The exact nibble labels don't
2051 // matter to the guard — any name ending in ip6.arpa must fail closed.
2052 let labels = vec![
2053 "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
2054 "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "a", "7", "d", "f", "ip6",
2055 "arpa",
2056 ];
2057 let buf = build_query(0x200, &labels, 12, 1);
2058
2059 match decide(&view, &buf).expect("decides") {
2060 Decision::Reply(resp) => {
2061 let (_, rcode, _) = parse_header(&resp);
2062 assert_eq!(
2063 rcode, 3,
2064 "NxDomain: ip6.arpa reverse must not leak upstream"
2065 );
2066 }
2067 Decision::Forward { .. } => panic!("ip6.arpa PTR must never be forwarded"),
2068 }
2069 }
2070
2071 #[test]
2072 fn cap_response_sets_tc_when_truncated() {
2073 // An oversize upstream answer is capped to a single datagram AND marked truncated (TC bit)
2074 // so the stub resolver retries over TCP rather than trusting a chopped message.
2075 let mut big = build_query(0x300, &["example", "com"], 1, 1);
2076 big[2] |= 0x80; // make it a response (QR=1)
2077 big.resize(MAX_UPSTREAM_RESPONSE + 500, 0xAB);
2078
2079 let out = cap_response(big);
2080 assert_eq!(out.len(), MAX_UPSTREAM_RESPONSE, "capped to one datagram");
2081 assert_ne!(out[2] & 0x02, 0, "TC bit set on truncation");
2082 }
2083
2084 #[test]
2085 fn cap_response_leaves_small_response_untouched() {
2086 // A response that fits is returned verbatim with no TC bit forced on.
2087 let mut small = build_query(0x301, &["example", "com"], 1, 1);
2088 small[2] |= 0x80;
2089 let before = small.clone();
2090
2091 let out = cap_response(small);
2092 assert_eq!(out, before, "small response unchanged");
2093 assert_eq!(out[2] & 0x02, 0, "TC bit not set when no truncation");
2094 }
2095
2096 #[test]
2097 fn response_matches_query_rejects_mismatched_question() {
2098 // id + QR match but the echoed question differs (different QNAME) => rejected. This guards
2099 // against an off-path injector that guesses the id but answers a different question.
2100 let query = build_query(0x1234, &["a", "com"], 1, 1);
2101
2102 let mut wrong_question = build_query(0x1234, &["b", "com"], 1, 1);
2103 wrong_question[2] |= 0x80; // QR=1, same id
2104 assert!(
2105 !response_matches_query(&query, &wrong_question),
2106 "different QNAME must be rejected"
2107 );
2108
2109 // A different QTYPE with the same name is also rejected.
2110 let mut wrong_qtype = build_query(0x1234, &["a", "com"], 28, 1);
2111 wrong_qtype[2] |= 0x80;
2112 assert!(
2113 !response_matches_query(&query, &wrong_qtype),
2114 "different QTYPE must be rejected"
2115 );
2116
2117 // The exact echoed question with QR=1 is accepted.
2118 let mut good = query.clone();
2119 good[2] |= 0x80;
2120 assert!(
2121 response_matches_query(&query, &good),
2122 "matching question accepted"
2123 );
2124 }
2125
2126 #[test]
2127 fn suffix_matches_handles_boundaries_and_empty() {
2128 // Exact and label-boundary matches.
2129 assert!(suffix_matches("corp", "corp"));
2130 assert!(suffix_matches("a.corp", "corp"));
2131 assert!(suffix_matches("a.b.corp", "corp"));
2132 // Not a label boundary.
2133 assert!(!suffix_matches("acorp", "corp"));
2134 // Empty suffix never matches (defense-in-depth against `ends_with("")`).
2135 assert!(!suffix_matches("anything.example", ""));
2136 assert!(!suffix_matches("", ""));
2137 }
2138
2139 #[test]
2140 fn empty_search_domain_does_not_capture_everything() {
2141 // Defense-in-depth: an empty search domain must NOT make every name look like a tailnet
2142 // name (which would fail-close legitimate recursive queries / mis-route). With an empty
2143 // suffix present alongside a real resolver, an off-tailnet name still forwards.
2144 let mut view = view_with_routes(
2145 std::collections::BTreeMap::new(),
2146 vec![udp("8.8.8.8:53")],
2147 vec![],
2148 );
2149 view.cfg.search_domains = vec![String::new()];
2150 let buf = build_query(0x400, &["example", "com"], 1, 1);
2151
2152 match decide(&view, &buf).expect("decides") {
2153 Decision::Forward { upstreams, .. } => {
2154 assert_eq!(upstreams, vec!["8.8.8.8:53".parse().unwrap()]);
2155 }
2156 Decision::Reply(_) => {
2157 panic!("empty search domain must not treat every name as tailnet")
2158 }
2159 }
2160 }
2161
2162 #[test]
2163 fn empty_route_suffix_does_not_capture_everything() {
2164 // Defense-in-depth: an empty route suffix must not match every name (which would route all
2165 // queries to that route's upstreams). With an empty-suffix route present, an unrelated name
2166 // still falls through to the global resolver.
2167 let mut routes = std::collections::BTreeMap::new();
2168 routes.insert(String::new(), vec![udp("10.9.9.9:53")]);
2169 let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2170 let buf = build_query(0x401, &["example", "com"], 1, 1);
2171
2172 match decide(&view, &buf).expect("decides") {
2173 Decision::Forward { upstreams, .. } => {
2174 assert_eq!(
2175 upstreams,
2176 vec!["8.8.8.8:53".parse().unwrap()],
2177 "empty route suffix must not capture; falls through to global"
2178 );
2179 }
2180 Decision::Reply(_) => panic!("expected forward to global resolver"),
2181 }
2182 }
2183
2184 fn udp_exit(addr: &str) -> DnsResolver {
2185 DnsResolver {
2186 transport: ts_control::ResolverTransport::Udp(addr.parse().unwrap()),
2187 use_with_exit_node: true,
2188 }
2189 }
2190
2191 #[test]
2192 fn recursive_forward_is_flagged_route_forward_is_not() {
2193 // A recursive (global/fallback) forward sets `recursive = true` (eligible for DoH
2194 // delegation); a deliberately-configured split-DNS route sets `recursive = false`.
2195 let mut routes = std::collections::BTreeMap::new();
2196 routes.insert("corp.example".to_string(), vec![udp("10.0.0.53:53")]);
2197 let view = view_with_routes(routes, vec![udp("8.8.8.8:53")], vec![]);
2198
2199 let routed = build_query(0x500, &["api", "corp", "example"], 1, 1);
2200 match decide(&view, &routed).expect("decides") {
2201 Decision::Forward { recursive, .. } => {
2202 assert!(!recursive, "split-DNS route is not a recursive forward")
2203 }
2204 Decision::Reply(_) => panic!("expected route forward"),
2205 }
2206
2207 let global = build_query(0x501, &["example", "com"], 1, 1);
2208 match decide(&view, &global).expect("decides") {
2209 Decision::Forward { recursive, .. } => {
2210 assert!(recursive, "unrouted name is a recursive forward")
2211 }
2212 Decision::Reply(_) => panic!("expected recursive forward"),
2213 }
2214 }
2215
2216 #[test]
2217 fn recursive_plan_keeps_udp_without_exit_node() {
2218 // No active exit node: a recursive forward stays on its default UDP upstreams.
2219 let view = view_with_routes(
2220 std::collections::BTreeMap::new(),
2221 vec![udp("8.8.8.8:53")],
2222 vec![],
2223 );
2224 let default = vec!["8.8.8.8:53".parse().unwrap()];
2225 assert_eq!(
2226 recursive_plan(&view, default.clone()),
2227 RecursivePlan::Udp(default)
2228 );
2229 }
2230
2231 #[test]
2232 fn recursive_plan_delegates_to_doh_with_exit_node() {
2233 // Exit node active, no kept-local resolvers: recursive queries delegate to the exit node's
2234 // DoH endpoint so resolution egresses from the exit node, not this host.
2235 let mut view = view_with_routes(
2236 std::collections::BTreeMap::new(),
2237 vec![udp("8.8.8.8:53")],
2238 vec![],
2239 );
2240 let doh: SocketAddr = "100.64.0.5:8080".parse().unwrap();
2241 view.exit_doh = Some(doh);
2242 assert_eq!(
2243 recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2244 RecursivePlan::Doh(doh)
2245 );
2246 }
2247
2248 #[test]
2249 fn recursive_plan_keeps_use_with_exit_node_resolvers_local() {
2250 // Even with an exit node active, resolvers flagged `use_with_exit_node` stay local (Go keeps
2251 // UseWithExitNode resolvers). The plan forwards to those over UDP, never delegating to DoH.
2252 let mut view = view_with_routes(
2253 std::collections::BTreeMap::new(),
2254 vec![udp_exit("10.0.0.53:53"), udp("8.8.8.8:53")],
2255 vec![],
2256 );
2257 view.exit_doh = Some("100.64.0.5:8080".parse().unwrap());
2258 // The default upstreams the caller computed are irrelevant when kept-local resolvers exist;
2259 // the plan must use the kept-local ones.
2260 assert_eq!(
2261 recursive_plan(&view, vec!["8.8.8.8:53".parse().unwrap()]),
2262 RecursivePlan::Udp(vec!["10.0.0.53:53".parse().unwrap()])
2263 );
2264 }
2265}