ts_runtime/capture.rs
1//! Pcap stream framer for debug packet capture (`CapturePcap`).
2//!
3//! This is the *format* half of Tailscale's debug packet capture: a [`PcapSink`](crate::capture::PcapSink) that frames
4//! captured packets into a byte stream and writes them to any [`std::io::Write`]. A separate seam
5//! tees packets into the sink; this module is only concerned with producing bytes.
6//!
7//! The on-the-wire format is **classic pcap** (not pcapng), little-endian, byte-faithful to Go
8//! Tailscale's `feature/capture` (`capture.go`):
9//!
10//! - a 24-byte classic pcap global header, written once on construction, using link type
11//! [`LINKTYPE_USER0`](crate::capture::LINKTYPE_USER0) (147);
12//! - per packet, a 16-byte classic pcap record header, followed by Tailscale's custom 4-byte path
13//! preamble (a `u16` little-endian path code, then a SNAT length byte and a DNAT length byte),
14//! followed by the raw IP packet bytes.
15//!
16//! Because this fork never performs SNAT/DNAT on the captured path, both NAT length bytes in the
17//! preamble are **always 0** (the no-NAT common case). A file produced here is readable in
18//! Wireshark; with Tailscale's `ts-dissector.lua` the per-record path/preamble decodes, and without
19//! it the records are still walkable but shown as opaque USER0 data.
20
21use std::time::{SystemTime, UNIX_EPOCH};
22
23/// LINKTYPE_USER0 — the link-layer type Go Tailscale uses for its capture stream. Wireshark needs
24/// Tailscale's `ts-dissector.lua` to decode the per-record path/preamble; without it the records are
25/// still walkable but shown as opaque USER0 data.
26pub const LINKTYPE_USER0: u32 = 147;
27
28/// A pcap stream framer that writes captured packets to a writer in Go-Tailscale-faithful classic
29/// pcap (USER0 link type + a 4-byte path preamble per record). Construct with [`PcapSink::new`]
30/// (which emits the global header), then call [`PcapSink::log_packet`] per packet.
31///
32/// Records are **not** flushed per packet (that would be a syscall on every packet on the single
33/// dataplane thread). For buffering, wrap `writer` in a [`std::io::BufWriter`]; buffered records are
34/// flushed when the writer is dropped (on capture stop), or call [`PcapSink::flush`] periodically if
35/// a reader needs to tail the stream promptly.
36pub struct PcapSink<W> {
37 writer: W,
38}
39
40impl<W: std::io::Write> PcapSink<W> {
41 /// Create a sink and immediately write the 24-byte pcap global header.
42 pub fn new(mut writer: W) -> std::io::Result<Self> {
43 writer.write_all(&global_header())?;
44 Ok(Self { writer })
45 }
46
47 /// Frame and write one captured packet: the 16-byte record header, the 4-byte Tailscale path
48 /// preamble, then the raw IP bytes. The timestamp is taken from the system clock now.
49 pub fn log_packet(&mut self, path_code: u16, pkt: &[u8]) -> std::io::Result<()> {
50 let (sec, usec) = now_parts();
51 self.write_record(path_code, sec, usec, pkt)
52 }
53
54 /// Pure record writer (timestamp injected), factored out so the exact byte layout is
55 /// unit-testable without the system clock.
56 fn write_record(
57 &mut self,
58 path_code: u16,
59 ts_sec: u32,
60 ts_usec: u32,
61 pkt: &[u8],
62 ) -> std::io::Result<()> {
63 // caplen == orig_len == 4 (preamble) + pkt.len(). IP packets are <= 64 KiB, so this cast
64 // can never overflow in practice; saturate defensively regardless.
65 let incl_len: u32 = 4u32.saturating_add(pkt.len() as u32);
66
67 // 16-byte classic pcap record header (little-endian).
68 self.writer.write_all(&ts_sec.to_le_bytes())?;
69 self.writer.write_all(&ts_usec.to_le_bytes())?;
70 self.writer.write_all(&incl_len.to_le_bytes())?;
71 self.writer.write_all(&incl_len.to_le_bytes())?;
72
73 // 4-byte Tailscale path preamble (path u16 LE, then no-NAT zero length bytes).
74 self.writer.write_all(&record_preamble(path_code))?;
75
76 // Raw IP packet bytes.
77 self.writer.write_all(pkt)?;
78
79 // No per-record flush: flushing on every packet is a syscall per packet on the single
80 // dataplane thread, which collapses throughput under capture. Buffered records are flushed
81 // when the writer is dropped on capture stop (see [`PcapSink::flush`] for an explicit
82 // periodic/tailing flush, and wrap `writer` in a `std::io::BufWriter` if you want buffering).
83 Ok(())
84 }
85
86 /// Flush the underlying writer. Optional: callers that need a reader tailing the stream (e.g.
87 /// `tcpdump -r` on a growing file, or a live pipe) to see packets promptly can call this
88 /// periodically — it is *not* called per record, so the hot path stays syscall-free. Buffered
89 /// records are otherwise flushed when the writer is dropped on capture stop.
90 pub fn flush(&mut self) -> std::io::Result<()> {
91 self.writer.flush()
92 }
93
94 /// Consume the sink and return the inner writer (test helper for byte assertions).
95 #[cfg(test)]
96 fn into_inner(self) -> W {
97 self.writer
98 }
99}
100
101/// Return the 24-byte classic pcap global header (little-endian), with USER0 link type.
102fn global_header() -> [u8; 24] {
103 let mut h = [0u8; 24];
104 h[0..4].copy_from_slice(&0xA1B2_C3D4u32.to_le_bytes()); // magic_number
105 h[4..6].copy_from_slice(&2u16.to_le_bytes()); // version_major
106 h[6..8].copy_from_slice(&4u16.to_le_bytes()); // version_minor
107 h[8..12].copy_from_slice(&0i32.to_le_bytes()); // thiszone
108 h[12..16].copy_from_slice(&0u32.to_le_bytes()); // sigfigs
109 h[16..20].copy_from_slice(&65535u32.to_le_bytes()); // snaplen
110 h[20..24].copy_from_slice(&LINKTYPE_USER0.to_le_bytes()); // network (linktype)
111 h
112}
113
114/// Return the 4-byte Tailscale per-record preamble: the path code as a little-endian `u16`, then a
115/// zero SNAT length byte and a zero DNAT length byte (this fork never does SNAT/DNAT).
116fn record_preamble(path_code: u16) -> [u8; 4] {
117 let p = path_code.to_le_bytes();
118 [p[0], p[1], 0, 0]
119}
120
121/// Return `(seconds, microseconds)` since the Unix epoch from the system clock. On a clock error
122/// (time before the epoch) return `(0, 0)`.
123fn now_parts() -> (u32, u32) {
124 match SystemTime::now().duration_since(UNIX_EPOCH) {
125 Ok(d) => (d.as_secs() as u32, d.subsec_micros()),
126 Err(_) => (0, 0),
127 }
128}
129
130#[cfg(test)]
131mod tests {
132 use super::*;
133
134 #[test]
135 fn global_header_is_exact() {
136 assert_eq!(
137 global_header(),
138 [
139 0xD4, 0xC3, 0xB2, 0xA1, // magic 0xA1B2C3D4 LE
140 0x02, 0x00, // version_major 2
141 0x04, 0x00, // version_minor 4
142 0x00, 0x00, 0x00, 0x00, // thiszone 0
143 0x00, 0x00, 0x00, 0x00, // sigfigs 0
144 0xFF, 0xFF, 0x00, 0x00, // snaplen 65535
145 0x93, 0x00, 0x00, 0x00, // network 147 (LINKTYPE_USER0)
146 ]
147 );
148 }
149
150 #[test]
151 fn record_preamble_encodes_path_le() {
152 assert_eq!(record_preamble(1), [0x01, 0x00, 0x00, 0x00]);
153 assert_eq!(record_preamble(0x0102), [0x02, 0x01, 0x00, 0x00]);
154 }
155
156 #[test]
157 fn write_record_layout() {
158 let mut sink = PcapSink::new(Vec::<u8>::new()).expect("global header");
159 sink.write_record(1, 0x1122_3344, 0x0005_5AA5, &[0xAB, 0xCD, 0xEF])
160 .expect("write record");
161 let buf = sink.into_inner();
162
163 // Skip the 24-byte global header; assert the record bytes that follow.
164 let rec = &buf[24..];
165 assert_eq!(
166 rec,
167 &[
168 0x44, 0x33, 0x22, 0x11, // ts_sec 0x11223344 LE
169 0xA5, 0x5A, 0x05, 0x00, // ts_usec 0x00055AA5 LE
170 0x07, 0x00, 0x00, 0x00, // caplen 4 + 3 = 7
171 0x07, 0x00, 0x00, 0x00, // orig_len 7
172 0x01, 0x00, 0x00, 0x00, // preamble: path 1, snat 0, dnat 0
173 0xAB, 0xCD, 0xEF, // payload
174 ]
175 );
176 }
177
178 #[test]
179 fn new_writes_global_header() {
180 let mut buf = Vec::<u8>::new();
181 let _sink = PcapSink::new(&mut buf).expect("global header");
182 assert_eq!(buf, global_header());
183 }
184}