Skip to main content

fips_core/upper/
tun.rs

1//! FIPS TUN Interface
2//!
3//! Manages the TUN device for sending and receiving IPv6 packets.
4//! The TUN interface presents FIPS addresses to the local system,
5//! allowing standard socket applications to communicate over the mesh.
6//!
7//! Platform-specific implementations:
8//! - Linux: Uses the `tun` crate with `rtnetlink` for interface configuration
9//! - macOS: Uses the `tun` crate with `ifconfig`/`route` for interface configuration
10//! - Windows: Uses the `wintun` crate for TUN device support
11
12use crate::FipsAddress;
13#[cfg(any(
14    target_os = "linux",
15    target_os = "macos",
16    not(any(target_os = "linux", target_os = "macos", windows))
17))]
18use crate::TunConfig;
19use std::collections::HashMap;
20#[cfg(any(target_os = "linux", target_os = "macos"))]
21use std::fs::File;
22#[cfg(any(target_os = "linux", target_os = "macos"))]
23use std::io::Read;
24#[cfg(not(target_os = "macos"))]
25#[cfg(any(target_os = "linux", target_os = "macos"))]
26use std::io::Write;
27use std::net::Ipv6Addr;
28#[cfg(any(target_os = "linux", target_os = "macos"))]
29use std::os::unix::io::{AsRawFd, FromRawFd};
30use std::sync::{Arc, RwLock, mpsc};
31use thiserror::Error;
32#[cfg(any(target_os = "linux", target_os = "macos"))]
33use tracing::error;
34use tracing::{debug, trace};
35#[cfg(windows)]
36use tracing::{error, warn};
37#[cfg(any(target_os = "linux", target_os = "macos"))]
38use tun::Layer;
39
40/// Read-only handle to the per-destination path MTU map. Populated by
41/// the discovery handler on `LookupResponse`; read by the TUN reader
42/// (outbound clamp) and writer (inbound clamp) at TCP MSS clamp time.
43/// Keyed by [`FipsAddress`] (16 bytes, the IPv6 form of a fips peer
44/// address).
45pub type PathMtuLookup = Arc<RwLock<HashMap<FipsAddress, u16>>>;
46
47/// Compute the effective TCP MSS ceiling for a packet given its peer
48/// address bytes (a 16-byte IPv6 destination on outbound, source on
49/// inbound). Returns `min(global_max_mss, learned_path_max_mss)` when
50/// the per-destination path MTU is known via discovery; otherwise
51/// returns `min(global_max_mss, ipv6_minimum_safe_max_mss)`, the
52/// conservative IPv6-minimum-derived ceiling.
53///
54/// The conservative empty-lookup fallback exists because there is a
55/// race window between TCP-SYN-out and discovery-completes-with-path-
56/// MTU on cold flows. Without the floor, the first SYN exits at the
57/// kernel-natural MSS (TUN MTU minus IPv6/TCP headers), which can
58/// exceed what some downstream forwarder hop is willing to carry.
59/// The drop is silent (no PTB feedback through the userspace TUN to
60/// the kernel TCP stack), so TCP retransmits at the same too-large
61/// MSS and the application's first connection wedges before discovery
62/// completes for a corrected second SYN to fire.
63///
64/// RFC 8200 mandates every IPv6 path accepts at least 1280-byte
65/// packets, so a SYN clamped to the IPv6-minimum-derived MSS fits
66/// any compliant path. Subsequent flows pick up the actual learned
67/// per-destination value, which can be larger (when path supports
68/// it) or smaller (when path is observed-tighter than the IPv6 min).
69///
70/// Path MTU bytes-on-wire to TCP MSS: subtract 77 bytes of FIPS encap
71/// overhead, then 40 bytes IPv6 + 20 bytes TCP headers.
72#[cfg(any(test, target_os = "linux", target_os = "macos", windows))]
73pub(crate) fn per_flow_max_mss(
74    lookup: &PathMtuLookup,
75    addr_bytes: &[u8],
76    global_max_mss: u16,
77) -> u16 {
78    use super::icmp::effective_ipv6_mtu;
79
80    // RFC 8200 IPv6-minimum MTU (1280) → effective FIPS-encapsulated
81    // payload (1203) → TCP segment after IPv6+TCP headers (1143).
82    // Used as the conservative ceiling for empty-lookup destinations.
83    const IPV6_MIN_MTU: u16 = 1280;
84    let conservative_max_mss = effective_ipv6_mtu(IPV6_MIN_MTU)
85        .saturating_sub(40)
86        .saturating_sub(20);
87    let empty_lookup_ceiling = std::cmp::min(global_max_mss, conservative_max_mss);
88
89    if addr_bytes.len() != 16 {
90        trace!(
91            len = addr_bytes.len(),
92            global_max_mss,
93            empty_lookup_ceiling,
94            "per_flow_max_mss: addr_bytes wrong length, fall back to conservative ceiling"
95        );
96        return empty_lookup_ceiling;
97    }
98    let Ok(fips_addr) = FipsAddress::from_slice(addr_bytes) else {
99        trace!(
100            global_max_mss,
101            empty_lookup_ceiling,
102            "per_flow_max_mss: FipsAddress::from_slice rejected (non-fd::/8 prefix), fall back to conservative ceiling"
103        );
104        return empty_lookup_ceiling;
105    };
106    let Ok(map) = lookup.read() else {
107        trace!(
108            fips_addr = %fips_addr,
109            global_max_mss,
110            empty_lookup_ceiling,
111            "per_flow_max_mss: lookup read lock poisoned, fall back to conservative ceiling"
112        );
113        return empty_lookup_ceiling;
114    };
115    let Some(&path_mtu) = map.get(&fips_addr) else {
116        trace!(
117            fips_addr = %fips_addr,
118            global_max_mss,
119            empty_lookup_ceiling,
120            map_len = map.len(),
121            "per_flow_max_mss: no path_mtu_lookup entry for destination, fall back to conservative ceiling"
122        );
123        return empty_lookup_ceiling;
124    };
125    let path_max_mss = effective_ipv6_mtu(path_mtu)
126        .saturating_sub(40)
127        .saturating_sub(20);
128    let result = std::cmp::min(global_max_mss, path_max_mss);
129    trace!(
130        fips_addr = %fips_addr,
131        path_mtu,
132        path_max_mss,
133        global_max_mss,
134        result,
135        "per_flow_max_mss: per-destination clamp applied"
136    );
137    result
138}
139
140/// Channel sender for packets to be written to TUN.
141pub type TunTx = mpsc::Sender<Vec<u8>>;
142
143/// Channel sender for outbound packets from TUN reader to Node.
144pub type TunOutboundTx = tokio::sync::mpsc::Sender<Vec<u8>>;
145/// Channel receiver for outbound packets (consumed by Node's RX loop).
146pub type TunOutboundRx = tokio::sync::mpsc::Receiver<Vec<u8>>;
147
148/// Errors that can occur with TUN operations.
149#[derive(Debug, Error)]
150pub enum TunError {
151    #[error("failed to create TUN device: {0}")]
152    Create(#[source] Box<dyn std::error::Error + Send + Sync>),
153
154    #[error("failed to configure TUN device: {0}")]
155    Configure(String),
156
157    #[cfg(target_os = "linux")]
158    #[error("netlink error: {0}")]
159    Netlink(#[from] rtnetlink::Error),
160
161    #[error("interface not found: {0}")]
162    InterfaceNotFound(String),
163
164    #[error("permission denied: {0}")]
165    PermissionDenied(String),
166
167    #[cfg(any(target_os = "linux", target_os = "macos"))]
168    #[error("IPv6 is disabled (set net.ipv6.conf.all.disable_ipv6=0)")]
169    Ipv6Disabled,
170
171    #[error("system TUN is not supported on this platform")]
172    UnsupportedPlatform,
173}
174
175#[cfg(any(target_os = "linux", target_os = "macos"))]
176impl From<tun::Error> for TunError {
177    fn from(e: tun::Error) -> Self {
178        TunError::Create(Box::new(e))
179    }
180}
181
182/// TUN device state.
183#[derive(Debug, Clone, Copy, PartialEq, Eq)]
184pub enum TunState {
185    /// TUN is disabled in configuration.
186    Disabled,
187    /// TUN is configured but not yet created.
188    Configured,
189    /// TUN device is active and ready.
190    Active,
191    /// TUN device failed to initialize.
192    Failed,
193}
194
195impl std::fmt::Display for TunState {
196    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
197        match self {
198            TunState::Disabled => write!(f, "disabled"),
199            TunState::Configured => write!(f, "configured"),
200            TunState::Active => write!(f, "active"),
201            TunState::Failed => write!(f, "failed"),
202        }
203    }
204}
205
206// ============================================================================
207// Unix (Linux + macOS) TUN implementation
208// ============================================================================
209
210/// FIPS TUN device wrapper.
211#[cfg(any(target_os = "linux", target_os = "macos"))]
212pub struct TunDevice {
213    device: tun::Device,
214    name: String,
215    mtu: u16,
216    address: FipsAddress,
217}
218
219#[cfg(any(target_os = "linux", target_os = "macos"))]
220impl TunDevice {
221    /// Create or open a TUN device.
222    ///
223    /// If the interface already exists, opens it and reconfigures it.
224    /// Otherwise, creates a new TUN device.
225    ///
226    /// This requires CAP_NET_ADMIN capability (run with sudo or setcap).
227    pub async fn create(config: &TunConfig, address: FipsAddress) -> Result<Self, TunError> {
228        // Check if IPv6 is enabled
229        if platform::is_ipv6_disabled() {
230            return Err(TunError::Ipv6Disabled);
231        }
232
233        let name = config.name();
234        let mtu = config.mtu();
235
236        // Delete existing interface if present (TUN devices are exclusive)
237        if platform::interface_exists(name).await {
238            debug!(name, "Deleting existing TUN interface");
239            if let Err(e) = platform::delete_interface(name).await {
240                debug!(name, error = %e, "Failed to delete existing interface");
241            }
242        }
243
244        // Create the TUN device
245        let mut tun_config = tun::Configuration::default();
246
247        // On macOS, utun devices get kernel-assigned names (utun0, utun1, ...),
248        // so we skip setting the name and read it back after creation.
249        #[cfg(target_os = "linux")]
250        #[allow(deprecated)]
251        tun_config.name(name).layer(Layer::L3).mtu(mtu);
252
253        #[cfg(target_os = "macos")]
254        {
255            #[allow(deprecated)]
256            tun_config.layer(Layer::L3).mtu(mtu);
257        }
258
259        let device = tun::create(&tun_config)?;
260
261        // Read the actual device name (on macOS this is the kernel-assigned utun* name)
262        let actual_name = {
263            use tun::AbstractDevice;
264            device
265                .tun_name()
266                .map_err(|e| TunError::Configure(format!("failed to get device name: {}", e)))?
267        };
268
269        // Configure address and bring up via platform-specific method
270        platform::configure_interface(&actual_name, address.to_ipv6(), mtu).await?;
271
272        Ok(Self {
273            device,
274            name: actual_name,
275            mtu,
276            address,
277        })
278    }
279
280    /// Get the device name.
281    pub fn name(&self) -> &str {
282        &self.name
283    }
284
285    /// Get the configured MTU.
286    pub fn mtu(&self) -> u16 {
287        self.mtu
288    }
289
290    /// Get the FIPS address assigned to this device.
291    pub fn address(&self) -> &FipsAddress {
292        &self.address
293    }
294
295    /// Get a reference to the underlying tun::Device.
296    pub fn device(&self) -> &tun::Device {
297        &self.device
298    }
299
300    /// Get a mutable reference to the underlying tun::Device.
301    pub fn device_mut(&mut self) -> &mut tun::Device {
302        &mut self.device
303    }
304
305    /// Read a packet from the TUN device.
306    ///
307    /// Returns the number of bytes read into the buffer, or an `io::Error`.
308    /// The buffer should be at least MTU + header size (typically 1500+ bytes).
309    ///
310    /// The tun crate's `Read` impl transparently strips the macOS utun
311    /// packet information header, so this returns a raw IP packet on all
312    /// platforms.
313    ///
314    /// The raw `io::Error` is returned so callers can inspect `ErrorKind`
315    /// (e.g. `WouldBlock`) or `raw_os_error()` without string matching.
316    pub fn read_packet(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
317        self.device.read(buf)
318    }
319
320    /// Shutdown and delete the TUN device.
321    ///
322    /// This deletes the interface entirely.
323    pub async fn shutdown(&self) -> Result<(), TunError> {
324        debug!(name = %self.name, "Deleting TUN device");
325        platform::delete_interface(&self.name).await
326    }
327
328    /// Create a TunWriter for this device.
329    ///
330    /// This duplicates the underlying file descriptor so that reads and writes
331    /// can happen independently on separate threads. Returns the writer and
332    /// a channel sender for submitting packets to be written.
333    ///
334    /// `max_mss` is the global TCP MSS ceiling derived from the local
335    /// `transport_mtu()` floor. `path_mtu_lookup` is a read-only handle to
336    /// the per-destination path MTU map populated by discovery; the writer
337    /// reads it on each inbound SYN-ACK to compute a per-flow ceiling that
338    /// honors learned narrow paths through the mesh.
339    pub fn create_writer(
340        &self,
341        max_mss: u16,
342        path_mtu_lookup: PathMtuLookup,
343    ) -> Result<(TunWriter, TunTx), TunError> {
344        let fd = self.device.as_raw_fd();
345
346        // Duplicate the file descriptor for writing
347        let write_fd = unsafe { libc::dup(fd) };
348        if write_fd < 0 {
349            return Err(TunError::Configure(format!(
350                "failed to dup fd: {}",
351                std::io::Error::last_os_error()
352            )));
353        }
354
355        let write_file = unsafe { File::from_raw_fd(write_fd) };
356        let (tx, rx) = mpsc::channel();
357
358        Ok((
359            TunWriter {
360                file: write_file,
361                rx,
362                name: self.name.clone(),
363                max_mss,
364                path_mtu_lookup,
365            },
366            tx,
367        ))
368    }
369}
370
371/// macOS utun protocol family value for IPv6 (matches `<sys/socket.h>`
372/// `AF_INET6` on Darwin). Used as the 4-byte big-endian packet-info
373/// header prepended to every utun frame.
374#[cfg(target_os = "macos")]
375const UTUN_AF_INET6: u32 = 30;
376
377/// Build the 4-byte big-endian utun packet-info header for an IPv6 frame.
378///
379/// utun devices on macOS require a 4-byte address-family prefix on every
380/// frame: a single big-endian `u32` carrying the protocol family. For
381/// IPv6 traffic (the only family FIPS sends) this is `AF_INET6 = 30`,
382/// which serializes as `[0x00, 0x00, 0x00, 0x1e]`.
383#[cfg(target_os = "macos")]
384#[inline]
385fn utun_af_inet6_header() -> [u8; 4] {
386    UTUN_AF_INET6.to_be_bytes()
387}
388
389/// Parse the 4-byte big-endian utun packet-info header.
390///
391/// Returns the address-family value (`AF_INET6 = 30` for IPv6 frames),
392/// or `None` if the buffer is shorter than the 4-byte header. The `tun`
393/// crate's `Read` impl strips this transparently for us in the read
394/// path; this helper exists for round-trip testability with
395/// [`utun_af_inet6_header`] and for any future code path that reads
396/// from the dup'd fd directly.
397#[cfg(all(test, target_os = "macos"))]
398#[inline]
399fn parse_utun_af_prefix(buf: &[u8]) -> Option<u32> {
400    if buf.len() < 4 {
401        return None;
402    }
403    Some(u32::from_be_bytes([buf[0], buf[1], buf[2], buf[3]]))
404}
405
406/// Writer thread for TUN device.
407///
408/// Services a queue of outbound packets and writes them to the TUN device.
409/// Multiple producers can send packets via the TunTx channel.
410///
411/// Also performs TCP MSS clamping on inbound SYN-ACK packets.
412#[cfg(any(target_os = "linux", target_os = "macos"))]
413pub struct TunWriter {
414    file: File,
415    rx: mpsc::Receiver<Vec<u8>>,
416    name: String,
417    max_mss: u16,
418    path_mtu_lookup: PathMtuLookup,
419}
420
421#[cfg(any(target_os = "linux", target_os = "macos"))]
422impl TunWriter {
423    /// Run the writer loop.
424    ///
425    /// Blocks forever, reading packets from the channel and writing them
426    /// to the TUN device. Returns when the channel is closed (all senders dropped).
427    #[cfg_attr(target_os = "macos", allow(unused_mut))]
428    pub fn run(mut self) {
429        use super::tcp_mss::clamp_tcp_mss;
430
431        debug!(name = %self.name, max_mss = self.max_mss, "TUN writer starting");
432
433        for mut packet in self.rx {
434            // Per-destination clamp: peer IPv6 source address (bytes 8..24)
435            // identifies the flow's remote end. If discovery has learned a
436            // smaller path MTU for that peer, tighten the ceiling.
437            let effective_max_mss = if packet.len() >= 24 {
438                per_flow_max_mss(&self.path_mtu_lookup, &packet[8..24], self.max_mss)
439            } else {
440                self.max_mss
441            };
442            // Clamp TCP MSS on inbound SYN-ACK packets
443            if clamp_tcp_mss(&mut packet, effective_max_mss) {
444                trace!(
445                    name = %self.name,
446                    max_mss = effective_max_mss,
447                    "Clamped TCP MSS in inbound SYN-ACK packet"
448                );
449            }
450
451            // On macOS, utun devices require a 4-byte packet information header
452            // prepended to each packet. The tun crate handles this for its own
453            // Read/Write impl, but we use a dup'd fd directly. We use writev
454            // to avoid allocating a buffer on every packet.
455            #[cfg(target_os = "macos")]
456            let write_result = {
457                use std::os::unix::io::AsRawFd;
458                let af_header = utun_af_inet6_header();
459                let iov = [
460                    libc::iovec {
461                        iov_base: af_header.as_ptr() as *mut libc::c_void,
462                        iov_len: 4,
463                    },
464                    libc::iovec {
465                        iov_base: packet.as_ptr() as *mut libc::c_void,
466                        iov_len: packet.len(),
467                    },
468                ];
469                let ret = unsafe { libc::writev(self.file.as_raw_fd(), iov.as_ptr(), 2) };
470                if ret < 0 {
471                    Err(std::io::Error::last_os_error())
472                } else {
473                    let expected = 4 + packet.len();
474                    if (ret as usize) < expected {
475                        Err(std::io::Error::new(
476                            std::io::ErrorKind::WriteZero,
477                            format!("short writev: {} of {} bytes", ret, expected),
478                        ))
479                    } else {
480                        Ok(())
481                    }
482                }
483            };
484            #[cfg(not(target_os = "macos"))]
485            let write_result = self.file.write_all(&packet);
486
487            if let Err(e) = write_result {
488                // "Bad address" is expected during shutdown when interface is deleted
489                let err_str = e.to_string();
490                if err_str.contains("Bad address") {
491                    break;
492                }
493                error!(name = %self.name, error = %e, "TUN write error");
494            } else {
495                trace!(name = %self.name, len = packet.len(), "TUN packet written");
496            }
497        }
498    }
499}
500
501/// TUN packet reader loop (Linux).
502///
503/// Reads IPv6 packets from the TUN device. Packets destined for FIPS addresses
504/// (fd::/8) are forwarded to the Node via the outbound channel for session
505/// encapsulation and routing. Non-FIPS packets receive ICMPv6 Destination
506/// Unreachable responses.
507///
508/// Also performs TCP MSS clamping on SYN packets to prevent oversized segments.
509///
510/// This is designed to run in a dedicated thread since TUN reads are blocking.
511/// The loop exits when the TUN interface is deleted (EFAULT) or an unrecoverable
512/// error occurs.
513#[cfg(not(target_os = "macos"))]
514#[cfg(any(target_os = "linux", target_os = "macos"))]
515pub fn run_tun_reader(
516    mut device: TunDevice,
517    mtu: u16,
518    our_addr: FipsAddress,
519    tun_tx: TunTx,
520    outbound_tx: TunOutboundTx,
521    transport_mtu: u16,
522    path_mtu_lookup: PathMtuLookup,
523) {
524    let (name, mut buf, max_mss) = tun_reader_setup(device.name(), mtu, transport_mtu);
525
526    loop {
527        match device.read_packet(&mut buf) {
528            Ok(n) if n > 0 => {
529                if !handle_tun_packet(
530                    &mut buf[..n],
531                    max_mss,
532                    &name,
533                    our_addr,
534                    &tun_tx,
535                    &outbound_tx,
536                    &path_mtu_lookup,
537                ) {
538                    break;
539                }
540            }
541            Ok(_) => {}
542            Err(e) => {
543                // EFAULT ("Bad address") is expected during shutdown when the interface is deleted
544                if e.raw_os_error() != Some(libc::EFAULT) {
545                    error!(name = %name, error = %e, "TUN read error");
546                }
547                break;
548            }
549        }
550    }
551}
552
553/// RAII wrapper that closes a raw fd on drop.
554///
555/// Used to ensure the shutdown pipe read-end is always closed when
556/// `run_tun_reader` returns, regardless of which exit path is taken.
557#[cfg(target_os = "macos")]
558struct ShutdownFd(std::os::unix::io::RawFd);
559
560#[cfg(target_os = "macos")]
561impl Drop for ShutdownFd {
562    fn drop(&mut self) {
563        unsafe {
564            libc::close(self.0);
565        }
566    }
567}
568
569/// TUN packet reader loop (macOS).
570///
571/// Uses `select()` to multiplex between the TUN fd and a shutdown pipe,
572/// avoiding the need to close the TUN fd externally (which would cause a
573/// double-close when `TunDevice` drops).
574#[cfg(target_os = "macos")]
575#[allow(clippy::too_many_arguments)]
576pub fn run_tun_reader(
577    mut device: TunDevice,
578    mtu: u16,
579    our_addr: FipsAddress,
580    tun_tx: TunTx,
581    outbound_tx: TunOutboundTx,
582    transport_mtu: u16,
583    path_mtu_lookup: PathMtuLookup,
584    shutdown_fd: std::os::unix::io::RawFd,
585) {
586    let _shutdown_fd = ShutdownFd(shutdown_fd);
587    let tun_fd = device.device().as_raw_fd();
588    let (name, mut buf, max_mss) = tun_reader_setup(device.name(), mtu, transport_mtu);
589
590    // Set TUN fd to non-blocking so we can use select + read without blocking
591    // past the point where select returns readable.
592    unsafe {
593        let flags = libc::fcntl(tun_fd, libc::F_GETFL);
594        if flags >= 0 {
595            libc::fcntl(tun_fd, libc::F_SETFL, flags | libc::O_NONBLOCK);
596        }
597    }
598
599    let nfds = tun_fd.max(shutdown_fd) + 1;
600
601    loop {
602        // Wait for either TUN data or shutdown signal
603        unsafe {
604            let mut read_fds: libc::fd_set = std::mem::zeroed();
605            libc::FD_ZERO(&mut read_fds);
606            libc::FD_SET(tun_fd, &mut read_fds);
607            libc::FD_SET(shutdown_fd, &mut read_fds);
608
609            let ret = libc::select(
610                nfds,
611                &mut read_fds,
612                std::ptr::null_mut(),
613                std::ptr::null_mut(),
614                std::ptr::null_mut(),
615            );
616            if ret < 0 {
617                let err = std::io::Error::last_os_error();
618                if err.kind() == std::io::ErrorKind::Interrupted {
619                    continue;
620                }
621                error!(name = %name, error = %err, "TUN select error");
622                break;
623            }
624
625            // Shutdown signal received
626            if libc::FD_ISSET(shutdown_fd, &read_fds) {
627                debug!(name = %name, "TUN reader received shutdown signal");
628                break;
629            }
630        }
631
632        // TUN fd is readable — drain all available packets
633        loop {
634            match device.read_packet(&mut buf) {
635                Ok(n) if n > 0 => {
636                    if !handle_tun_packet(
637                        &mut buf[..n],
638                        max_mss,
639                        &name,
640                        our_addr,
641                        &tun_tx,
642                        &outbound_tx,
643                        &path_mtu_lookup,
644                    ) {
645                        return; // _shutdown_fd closes on drop
646                    }
647                }
648                Ok(_) => break, // No more data
649                Err(e) => {
650                    if e.kind() == std::io::ErrorKind::WouldBlock {
651                        break; // Done for this select round
652                    }
653                    // EBADF is expected during shutdown when the fd is closed
654                    if e.raw_os_error() != Some(libc::EBADF) {
655                        error!(name = %name, error = %e, "TUN read error");
656                    }
657                    return; // _shutdown_fd closes on drop
658                }
659            }
660        }
661    }
662    // _shutdown_fd closes on drop
663}
664
665/// Common setup for TUN reader: allocates buffer, computes max MSS.
666#[cfg(any(target_os = "linux", target_os = "macos", windows))]
667fn tun_reader_setup(device_name: &str, mtu: u16, transport_mtu: u16) -> (String, Vec<u8>, u16) {
668    use super::icmp::effective_ipv6_mtu;
669
670    let name = device_name.to_string();
671    let buf = vec![0u8; mtu as usize + 100];
672
673    const IPV6_HEADER: u16 = 40;
674    const TCP_HEADER: u16 = 20;
675    let effective_mtu = effective_ipv6_mtu(transport_mtu);
676    let max_mss = effective_mtu
677        .saturating_sub(IPV6_HEADER)
678        .saturating_sub(TCP_HEADER);
679
680    debug!(
681        name = %name,
682        tun_mtu = mtu,
683        transport_mtu = transport_mtu,
684        effective_mtu = effective_mtu,
685        max_mss = max_mss,
686        "TUN reader starting"
687    );
688
689    (name, buf, max_mss)
690}
691
692/// Process a single TUN packet. Returns `false` if the reader should exit.
693#[cfg(any(target_os = "linux", target_os = "macos", windows))]
694fn handle_tun_packet(
695    packet: &mut [u8],
696    max_mss: u16,
697    name: &str,
698    our_addr: FipsAddress,
699    tun_tx: &TunTx,
700    outbound_tx: &TunOutboundTx,
701    path_mtu_lookup: &PathMtuLookup,
702) -> bool {
703    use super::icmp::{DestUnreachableCode, build_dest_unreachable, should_send_icmp_error};
704    use super::tcp_mss::clamp_tcp_mss;
705
706    log_ipv6_packet(packet);
707
708    // Must be a valid IPv6 packet
709    if packet.len() < 40 || packet[0] >> 4 != 6 {
710        return true;
711    }
712
713    // Check if destination is a FIPS address (fd::/8 prefix)
714    if packet[24] == crate::identity::FIPS_ADDRESS_PREFIX {
715        // Per-destination clamp: if discovery has learned a smaller path
716        // MTU for this destination, tighten the ceiling for this flow.
717        let effective_max_mss = per_flow_max_mss(path_mtu_lookup, &packet[24..40], max_mss);
718        if clamp_tcp_mss(packet, effective_max_mss) {
719            trace!(name = %name, max_mss = effective_max_mss, "Clamped TCP MSS in SYN packet");
720        }
721        if outbound_tx.blocking_send(packet.to_vec()).is_err() {
722            return false; // Channel closed, shutdown
723        }
724    } else {
725        // Non-FIPS destination: send ICMPv6 Destination Unreachable
726        if should_send_icmp_error(packet)
727            && let Some(response) =
728                build_dest_unreachable(packet, DestUnreachableCode::NoRoute, our_addr.to_ipv6())
729        {
730            trace!(name = %name, len = response.len(), "Sending ICMPv6 Destination Unreachable (non-FIPS destination)");
731            if tun_tx.send(response).is_err() {
732                return false;
733            }
734        }
735    }
736    true
737}
738
739#[cfg(any(target_os = "linux", target_os = "macos"))]
740impl std::fmt::Debug for TunDevice {
741    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
742        f.debug_struct("TunDevice")
743            .field("name", &self.name)
744            .field("mtu", &self.mtu)
745            .field("address", &self.address)
746            .finish()
747    }
748}
749
750/// Log basic information about an IPv6 packet at TRACE level.
751pub fn log_ipv6_packet(packet: &[u8]) {
752    if packet.len() < 40 {
753        debug!(len = packet.len(), "Received undersized packet");
754        return;
755    }
756
757    let version = packet[0] >> 4;
758    if version != 6 {
759        debug!(version, len = packet.len(), "Received non-IPv6 packet");
760        return;
761    }
762
763    let payload_len = u16::from_be_bytes([packet[4], packet[5]]);
764    let next_header = packet[6];
765    let hop_limit = packet[7];
766
767    let src = Ipv6Addr::from(<[u8; 16]>::try_from(&packet[8..24]).unwrap());
768    let dst = Ipv6Addr::from(<[u8; 16]>::try_from(&packet[24..40]).unwrap());
769
770    let protocol = match next_header {
771        6 => "TCP",
772        17 => "UDP",
773        58 => "ICMPv6",
774        _ => "other",
775    };
776
777    trace!("TUN packet received:");
778    trace!("      src: {}", src);
779    trace!("      dst: {}", dst);
780    trace!(" protocol: {} ({})", protocol, next_header);
781    trace!("  payload: {} bytes, hop_limit: {}", payload_len, hop_limit);
782}
783
784/// Shutdown and delete a TUN interface by name.
785///
786/// This deletes the interface, which will cause any blocking reads
787/// to return an error. Use this for graceful shutdown when the TUN device
788/// has been moved to another thread.
789#[cfg(any(target_os = "linux", target_os = "macos"))]
790pub async fn shutdown_tun_interface(name: &str) -> Result<(), TunError> {
791    debug!("Shutting down TUN interface {}", name);
792    platform::delete_interface(name).await?;
793    debug!("TUN interface {} stopped", name);
794    Ok(())
795}
796
797// ============================================================================
798// Windows TUN implementation (wintun)
799// ============================================================================
800
801#[cfg(windows)]
802mod windows_tun {
803    use super::*;
804    use crate::TunConfig;
805    use std::sync::Arc;
806
807    /// The Windows adapter name visible in network settings and used in netsh commands.
808    pub(crate) const ADAPTER_NAME: &str = "FIPS";
809
810    /// Wintun ring buffer capacity in bytes. Must be a power of 2 between
811    /// 0x20000 (128 KiB) and 0x4000000 (64 MiB). 2 MiB balances memory
812    /// usage against burst tolerance.
813    const WINTUN_RING_CAPACITY: u32 = 0x200000; // 2 MiB
814
815    /// FIPS TUN device wrapper (Windows/wintun).
816    ///
817    /// Uses the wintun driver for userspace packet I/O on Windows. The wintun
818    /// DLL must be present in the executable's directory or system PATH.
819    /// Adapter creation requires Administrator privileges.
820    ///
821    /// Unlike the Linux TUN which uses a file descriptor, wintun uses a
822    /// session-based API with ring buffers for packet exchange.
823    pub struct TunDevice {
824        session: Arc<wintun::Session>,
825        _adapter: Arc<wintun::Adapter>,
826        name: String,
827        mtu: u16,
828        address: FipsAddress,
829    }
830
831    impl TunDevice {
832        /// Create a wintun TUN adapter and configure it with an IPv6 address.
833        ///
834        /// Loads the wintun DLL, creates (or reopens) a named adapter, starts
835        /// a session with a 2 MiB ring buffer, and configures the interface
836        /// via netsh. Requires Administrator privileges.
837        pub async fn create(config: &TunConfig, address: FipsAddress) -> Result<Self, TunError> {
838            let name = config.name();
839            let mtu = config.mtu();
840
841            // Load the wintun DLL
842            let wintun = unsafe { wintun::load() }.map_err(|e| {
843                TunError::Create(
844                    format!(
845                        "Failed to load wintun.dll: {}. Download from https://www.wintun.net/",
846                        e
847                    )
848                    .into(),
849                )
850            })?;
851
852            // Create or reopen the adapter.
853            // First arg: adapter name visible in Windows network settings.
854            // Second arg: tunnel type (internal identifier for wintun).
855            let adapter = match wintun::Adapter::create(&wintun, ADAPTER_NAME, name, None) {
856                Ok(a) => a,
857                Err(e) => {
858                    return Err(TunError::Create(
859                        format!(
860                            "Failed to create wintun adapter '{}': {}. Run as Administrator.",
861                            name, e
862                        )
863                        .into(),
864                    ));
865                }
866            };
867
868            // Start a session with the configured ring buffer capacity
869            let session = adapter.start_session(WINTUN_RING_CAPACITY).map_err(|e| {
870                TunError::Create(format!("Failed to start wintun session: {}", e).into())
871            })?;
872
873            let session = Arc::new(session);
874
875            // Configure the IPv6 address and route via netsh.
876            // Use the adapter name (ADAPTER_NAME) not the tunnel type name.
877            let ipv6_addr = address.to_ipv6();
878            configure_windows_interface(ADAPTER_NAME, ipv6_addr, mtu).await?;
879
880            Ok(Self {
881                session,
882                _adapter: adapter,
883                name: name.to_string(),
884                mtu,
885                address,
886            })
887        }
888
889        /// Get the device name.
890        pub fn name(&self) -> &str {
891            &self.name
892        }
893
894        /// Get the configured MTU.
895        pub fn mtu(&self) -> u16 {
896            self.mtu
897        }
898
899        /// Get the FIPS address assigned to this device.
900        pub fn address(&self) -> &FipsAddress {
901            &self.address
902        }
903
904        /// Read a packet from the TUN device.
905        ///
906        /// Blocks until a packet is available from the wintun session.
907        /// Returns the number of bytes copied into `buf`.
908        pub fn read_packet(&mut self, buf: &mut [u8]) -> Result<usize, TunError> {
909            match self.session.receive_blocking() {
910                Ok(packet) => {
911                    let bytes = packet.bytes();
912                    let len = bytes.len().min(buf.len());
913                    buf[..len].copy_from_slice(&bytes[..len]);
914                    Ok(len)
915                }
916                Err(e) => Err(TunError::Configure(format!("read failed: {}", e))),
917            }
918        }
919
920        /// Shutdown the TUN device by removing the fd00::/8 route.
921        ///
922        /// The wintun adapter and session are cleaned up when dropped.
923        pub async fn shutdown(&self) -> Result<(), TunError> {
924            debug!(name = %self.name, "Shutting down TUN device");
925            let _ = tokio::process::Command::new("netsh")
926                .args([
927                    "interface",
928                    "ipv6",
929                    "delete",
930                    "route",
931                    "fd00::/8",
932                    &format!("interface={}", ADAPTER_NAME),
933                ])
934                .output()
935                .await;
936            Ok(())
937        }
938
939        /// Create a TunWriter for this device.
940        ///
941        /// Clones the wintun session `Arc` so the writer can allocate and send
942        /// packets independently. Returns the writer and a channel sender for
943        /// submitting packets to be written.
944        ///
945        /// `max_mss` is the global TCP MSS ceiling. `path_mtu_lookup` is a
946        /// read-only handle to per-destination path MTU learned via
947        /// discovery.
948        pub fn create_writer(
949            &self,
950            max_mss: u16,
951            path_mtu_lookup: PathMtuLookup,
952        ) -> Result<(TunWriter, TunTx), TunError> {
953            let (tx, rx) = mpsc::channel();
954            Ok((
955                TunWriter {
956                    session: self.session.clone(),
957                    rx,
958                    name: self.name.clone(),
959                    max_mss,
960                    path_mtu_lookup,
961                },
962                tx,
963            ))
964        }
965    }
966
967    impl std::fmt::Debug for TunDevice {
968        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
969            f.debug_struct("TunDevice")
970                .field("name", &self.name)
971                .field("mtu", &self.mtu)
972                .field("address", &self.address)
973                .finish()
974        }
975    }
976
977    /// Writer thread for TUN device (Windows).
978    ///
979    /// Services a queue of outbound packets and writes them to the wintun
980    /// session. Uses `allocate_send_packet()` / `send_packet()` instead of
981    /// file I/O.
982    ///
983    /// Also performs TCP MSS clamping on inbound SYN-ACK packets.
984    pub struct TunWriter {
985        session: Arc<wintun::Session>,
986        rx: mpsc::Receiver<Vec<u8>>,
987        name: String,
988        max_mss: u16,
989        path_mtu_lookup: PathMtuLookup,
990    }
991
992    impl TunWriter {
993        /// Run the writer loop.
994        ///
995        /// Blocks forever, reading packets from the channel and writing them
996        /// to the wintun session. Returns when the channel is closed.
997        pub fn run(self) {
998            use super::per_flow_max_mss;
999            use crate::upper::tcp_mss::clamp_tcp_mss;
1000
1001            debug!(name = %self.name, max_mss = self.max_mss, "TUN writer starting");
1002
1003            for mut packet in self.rx {
1004                // Per-destination clamp (peer source IPv6 = bytes 8..24)
1005                let effective_max_mss = if packet.len() >= 24 {
1006                    per_flow_max_mss(&self.path_mtu_lookup, &packet[8..24], self.max_mss)
1007                } else {
1008                    self.max_mss
1009                };
1010                // Clamp TCP MSS on inbound SYN-ACK packets
1011                if clamp_tcp_mss(&mut packet, effective_max_mss) {
1012                    trace!(
1013                        name = %self.name,
1014                        max_mss = effective_max_mss,
1015                        "Clamped TCP MSS in inbound SYN-ACK packet"
1016                    );
1017                }
1018
1019                let pkt_len = match u16::try_from(packet.len()) {
1020                    Ok(len) => len,
1021                    Err(_) => {
1022                        warn!(name = %self.name, len = packet.len(), "Dropping oversized packet for TUN");
1023                        continue;
1024                    }
1025                };
1026                match self.session.allocate_send_packet(pkt_len) {
1027                    Ok(mut send_packet) => {
1028                        send_packet.bytes_mut().copy_from_slice(&packet);
1029                        self.session.send_packet(send_packet);
1030                        trace!(name = %self.name, len = packet.len(), "TUN packet written");
1031                    }
1032                    Err(e) => {
1033                        error!(name = %self.name, error = %e, "TUN write error (allocate)");
1034                    }
1035                }
1036            }
1037        }
1038    }
1039
1040    /// TUN packet reader loop (Windows).
1041    ///
1042    /// Reads IPv6 packets from the wintun session. Packets destined for FIPS
1043    /// addresses (fd::/8) are forwarded to the Node via the outbound channel
1044    /// for session encapsulation and routing. Non-FIPS packets receive ICMPv6
1045    /// Destination Unreachable responses.
1046    ///
1047    /// Also performs TCP MSS clamping on SYN packets to prevent oversized segments.
1048    ///
1049    /// This is designed to run in a dedicated thread since wintun reads are blocking.
1050    /// The loop exits when the session is closed or an unrecoverable error occurs.
1051    pub fn run_tun_reader(
1052        mut device: TunDevice,
1053        mtu: u16,
1054        our_addr: FipsAddress,
1055        tun_tx: TunTx,
1056        outbound_tx: TunOutboundTx,
1057        transport_mtu: u16,
1058        path_mtu_lookup: PathMtuLookup,
1059    ) {
1060        let (name, mut buf, max_mss) = super::tun_reader_setup(device.name(), mtu, transport_mtu);
1061
1062        loop {
1063            match device.read_packet(&mut buf) {
1064                Ok(n) if n > 0 => {
1065                    if !super::handle_tun_packet(
1066                        &mut buf[..n],
1067                        max_mss,
1068                        &name,
1069                        our_addr,
1070                        &tun_tx,
1071                        &outbound_tx,
1072                        &path_mtu_lookup,
1073                    ) {
1074                        break;
1075                    }
1076                }
1077                Ok(_) => {}
1078                Err(e) => {
1079                    let err_str = format!("{}", e);
1080                    if !err_str.contains("Bad address") {
1081                        error!(name = %name, error = %e, "TUN read error");
1082                    }
1083                    break;
1084                }
1085            }
1086        }
1087    }
1088
1089    /// Shutdown and delete a TUN interface by name (Windows).
1090    ///
1091    /// Removes the fd00::/8 route via netsh. The wintun adapter itself
1092    /// is cleaned up when the `Adapter` handle is dropped.
1093    pub async fn shutdown_tun_interface(name: &str) -> Result<(), TunError> {
1094        debug!("Shutting down TUN interface {}", name);
1095        let _ = tokio::process::Command::new("netsh")
1096            .args([
1097                "interface",
1098                "ipv6",
1099                "delete",
1100                "route",
1101                "fd00::/8",
1102                &format!("interface={}", ADAPTER_NAME),
1103            ])
1104            .output()
1105            .await;
1106        let _ = name; // name is the tunnel type, not the adapter name
1107        debug!("TUN interface {} stopped", name);
1108        Ok(())
1109    }
1110
1111    /// Configure the Windows network interface with IPv6 address, MTU, and route.
1112    ///
1113    /// Uses `netsh` commands to configure the wintun adapter. A brief delay
1114    /// is inserted before configuration to allow Windows to fully register
1115    /// the adapter in its network stack.
1116    ///
1117    /// `adapter_name` must be the Windows adapter name (e.g. "FIPS"), not the
1118    /// wintun tunnel type name.
1119    async fn configure_windows_interface(
1120        adapter_name: &str,
1121        addr: Ipv6Addr,
1122        mtu: u16,
1123    ) -> Result<(), TunError> {
1124        // Brief delay to let Windows fully register the adapter
1125        tokio::time::sleep(std::time::Duration::from_millis(500)).await;
1126
1127        // Set IPv6 address
1128        let output = tokio::process::Command::new("netsh")
1129            .args([
1130                "interface",
1131                "ipv6",
1132                "add",
1133                "address",
1134                adapter_name,
1135                &format!("{}/128", addr),
1136            ])
1137            .output()
1138            .await
1139            .map_err(|e| TunError::Configure(format!("netsh add address failed: {}", e)))?;
1140
1141        if !output.status.success() {
1142            let stderr = String::from_utf8_lossy(&output.stderr);
1143            let stdout = String::from_utf8_lossy(&output.stdout);
1144            if !stderr.contains("already") && !stdout.contains("already") {
1145                warn!(
1146                    "netsh add address failed: stdout={} stderr={}",
1147                    stdout.trim(),
1148                    stderr.trim()
1149                );
1150            }
1151        }
1152
1153        // Set MTU
1154        let output = tokio::process::Command::new("netsh")
1155            .args([
1156                "interface",
1157                "ipv6",
1158                "set",
1159                "subinterface",
1160                adapter_name,
1161                &format!("mtu={}", mtu),
1162            ])
1163            .output()
1164            .await
1165            .map_err(|e| TunError::Configure(format!("netsh set mtu failed: {}", e)))?;
1166
1167        if !output.status.success() {
1168            let stderr = String::from_utf8_lossy(&output.stderr);
1169            let stdout = String::from_utf8_lossy(&output.stdout);
1170            warn!(
1171                "netsh set mtu failed: stdout={} stderr={}",
1172                stdout.trim(),
1173                stderr.trim()
1174            );
1175        }
1176
1177        // Add route for fd00::/8 (FIPS address space) via this adapter
1178        let output = tokio::process::Command::new("netsh")
1179            .args([
1180                "interface",
1181                "ipv6",
1182                "add",
1183                "route",
1184                "fd00::/8",
1185                adapter_name,
1186            ])
1187            .output()
1188            .await
1189            .map_err(|e| TunError::Configure(format!("netsh add route failed: {}", e)))?;
1190
1191        if !output.status.success() {
1192            let stderr = String::from_utf8_lossy(&output.stderr);
1193            let stdout = String::from_utf8_lossy(&output.stdout);
1194            if !stderr.contains("already") && !stdout.contains("already") {
1195                warn!(
1196                    "netsh add route failed: stdout={} stderr={}",
1197                    stdout.trim(),
1198                    stderr.trim()
1199                );
1200            }
1201        }
1202
1203        Ok(())
1204    }
1205}
1206
1207// Re-export Windows TUN types at module level
1208#[cfg(windows)]
1209pub use windows_tun::{TunDevice, TunWriter, run_tun_reader, shutdown_tun_interface};
1210
1211// ============================================================================
1212// Unsupported system TUN platforms
1213// ============================================================================
1214
1215#[cfg(not(any(target_os = "linux", target_os = "macos", windows)))]
1216mod unsupported_tun {
1217    use super::*;
1218
1219    /// Placeholder TUN device for platforms where apps must own packet I/O.
1220    pub struct TunDevice {
1221        name: String,
1222        mtu: u16,
1223        address: FipsAddress,
1224    }
1225
1226    impl TunDevice {
1227        /// System TUN creation is not available on this platform.
1228        pub async fn create(config: &TunConfig, address: FipsAddress) -> Result<Self, TunError> {
1229            let _ = (config, address);
1230            Err(TunError::UnsupportedPlatform)
1231        }
1232
1233        /// Get the configured device name.
1234        pub fn name(&self) -> &str {
1235            &self.name
1236        }
1237
1238        /// Get the configured MTU.
1239        pub fn mtu(&self) -> u16 {
1240            self.mtu
1241        }
1242
1243        /// Get the FIPS address assigned to this device.
1244        pub fn address(&self) -> &FipsAddress {
1245            &self.address
1246        }
1247
1248        /// Creating a system TUN writer is not available on this platform.
1249        pub fn create_writer(
1250            &self,
1251            max_mss: u16,
1252            path_mtu_lookup: PathMtuLookup,
1253        ) -> Result<(TunWriter, TunTx), TunError> {
1254            let _ = (max_mss, path_mtu_lookup);
1255            Err(TunError::UnsupportedPlatform)
1256        }
1257    }
1258
1259    impl std::fmt::Debug for TunDevice {
1260        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1261            f.debug_struct("TunDevice")
1262                .field("name", &self.name)
1263                .field("mtu", &self.mtu)
1264                .field("address", &self.address)
1265                .finish()
1266        }
1267    }
1268
1269    /// Placeholder writer for type-checking unreachable system-TUN paths.
1270    pub struct TunWriter;
1271
1272    impl TunWriter {
1273        /// No-op: system TUN is unavailable on this platform.
1274        pub fn run(self) {}
1275    }
1276
1277    /// No-op reader placeholder for platforms where apps own packet I/O.
1278    #[allow(clippy::too_many_arguments)]
1279    pub fn run_tun_reader(
1280        device: TunDevice,
1281        mtu: u16,
1282        our_addr: FipsAddress,
1283        tun_tx: TunTx,
1284        outbound_tx: TunOutboundTx,
1285        transport_mtu: u16,
1286        path_mtu_lookup: PathMtuLookup,
1287    ) {
1288        let _ = (
1289            device,
1290            mtu,
1291            our_addr,
1292            tun_tx,
1293            outbound_tx,
1294            transport_mtu,
1295            path_mtu_lookup,
1296        );
1297    }
1298
1299    /// No-op shutdown for platforms without a FIPS-created system TUN.
1300    pub async fn shutdown_tun_interface(name: &str) -> Result<(), TunError> {
1301        let _ = name;
1302        Ok(())
1303    }
1304}
1305
1306#[cfg(not(any(target_os = "linux", target_os = "macos", windows)))]
1307pub use unsupported_tun::{TunDevice, TunWriter, run_tun_reader, shutdown_tun_interface};
1308
1309#[cfg(target_os = "linux")]
1310mod platform {
1311    use super::TunError;
1312    use futures::TryStreamExt;
1313    use rtnetlink::{Handle, LinkUnspec, RouteMessageBuilder, new_connection};
1314    use std::net::Ipv6Addr;
1315    use tracing::debug;
1316
1317    /// Check if IPv6 is disabled system-wide.
1318    pub fn is_ipv6_disabled() -> bool {
1319        std::fs::read_to_string("/proc/sys/net/ipv6/conf/all/disable_ipv6")
1320            .map(|s| s.trim() == "1")
1321            .unwrap_or(false)
1322    }
1323
1324    /// Check if a network interface already exists.
1325    pub async fn interface_exists(name: &str) -> bool {
1326        let Ok((connection, handle, _)) = new_connection() else {
1327            return false;
1328        };
1329        tokio::spawn(connection);
1330
1331        get_interface_index(&handle, name).await.is_ok()
1332    }
1333
1334    /// Delete a network interface by name.
1335    pub async fn delete_interface(name: &str) -> Result<(), TunError> {
1336        let (connection, handle, _) = new_connection()
1337            .map_err(|e| TunError::Configure(format!("netlink connection failed: {}", e)))?;
1338        tokio::spawn(connection);
1339
1340        let index = get_interface_index(&handle, name).await?;
1341        handle.link().del(index).execute().await?;
1342        Ok(())
1343    }
1344
1345    /// Configure a network interface with an IPv6 address via netlink.
1346    pub async fn configure_interface(name: &str, addr: Ipv6Addr, mtu: u16) -> Result<(), TunError> {
1347        let (connection, handle, _) = new_connection()
1348            .map_err(|e| TunError::Configure(format!("netlink connection failed: {}", e)))?;
1349        tokio::spawn(connection);
1350
1351        // Get interface index
1352        let index = get_interface_index(&handle, name).await?;
1353
1354        // Add IPv6 address with /128 prefix (point-to-point)
1355        handle
1356            .address()
1357            .add(index, std::net::IpAddr::V6(addr), 128)
1358            .execute()
1359            .await?;
1360
1361        // Set MTU
1362        handle
1363            .link()
1364            .change(LinkUnspec::new_with_index(index).mtu(mtu as u32).build())
1365            .execute()
1366            .await?;
1367
1368        // Bring interface up
1369        handle
1370            .link()
1371            .change(LinkUnspec::new_with_index(index).up().build())
1372            .execute()
1373            .await?;
1374
1375        // Add route for fd00::/8 (FIPS address space) via this interface
1376        let fd_prefix: Ipv6Addr = "fd00::".parse().unwrap();
1377        let route = RouteMessageBuilder::<Ipv6Addr>::new()
1378            .destination_prefix(fd_prefix, 8)
1379            .output_interface(index)
1380            .build();
1381        handle
1382            .route()
1383            .add(route)
1384            .execute()
1385            .await
1386            .map_err(|e| TunError::Configure(format!("failed to add fd00::/8 route: {}", e)))?;
1387
1388        // Add ip6 rule to ensure fd00::/8 uses the main table, preventing other
1389        // routing software (e.g. Tailscale) from intercepting FIPS traffic via
1390        // catch-all rules in auxiliary routing tables.
1391        let mut rule_req = handle
1392            .rule()
1393            .add()
1394            .v6()
1395            .destination_prefix(fd_prefix, 8)
1396            .table_id(254)
1397            .priority(5265);
1398        rule_req.message_mut().header.action = 1.into(); // FR_ACT_TO_TBL
1399        if let Err(e) = rule_req.execute().await {
1400            debug!("ip6 rule for fd00::/8 not added (may already exist): {e}");
1401        }
1402
1403        Ok(())
1404    }
1405
1406    /// Get the interface index by name.
1407    async fn get_interface_index(handle: &Handle, name: &str) -> Result<u32, TunError> {
1408        let mut links = handle.link().get().match_name(name.to_string()).execute();
1409
1410        if let Some(link) = links.try_next().await? {
1411            Ok(link.header.index)
1412        } else {
1413            Err(TunError::InterfaceNotFound(name.to_string()))
1414        }
1415    }
1416}
1417
1418#[cfg(target_os = "macos")]
1419mod platform {
1420    use super::TunError;
1421    use std::net::Ipv6Addr;
1422    use tokio::process::Command;
1423
1424    /// Check if IPv6 is disabled system-wide.
1425    pub fn is_ipv6_disabled() -> bool {
1426        // macOS: check via sysctl; if the key doesn't exist, IPv6 is enabled
1427        std::process::Command::new("sysctl")
1428            .args(["-n", "net.inet6.ip6.disabled"])
1429            .output()
1430            .map(|o| String::from_utf8_lossy(&o.stdout).trim() == "1")
1431            .unwrap_or(false)
1432    }
1433
1434    /// Check if a network interface already exists.
1435    pub async fn interface_exists(name: &str) -> bool {
1436        Command::new("ifconfig")
1437            .arg(name)
1438            .stdout(std::process::Stdio::null())
1439            .stderr(std::process::Stdio::null())
1440            .status()
1441            .await
1442            .map(|s| s.success())
1443            .unwrap_or(false)
1444    }
1445
1446    /// Shut down a network interface by name.
1447    ///
1448    /// On macOS, utun devices are automatically destroyed when the file
1449    /// descriptor is closed. Bringing the interface down causes any
1450    /// blocking reads to return an error, which unblocks the reader thread.
1451    pub async fn delete_interface(name: &str) -> Result<(), TunError> {
1452        run_cmd("ifconfig", &[name, "down"]).await
1453    }
1454
1455    /// Configure a network interface with an IPv6 address using ifconfig/route.
1456    pub async fn configure_interface(name: &str, addr: Ipv6Addr, mtu: u16) -> Result<(), TunError> {
1457        // Add IPv6 address with /128 prefix
1458        run_cmd(
1459            "ifconfig",
1460            &[name, "inet6", &addr.to_string(), "prefixlen", "128"],
1461        )
1462        .await?;
1463
1464        // Set MTU
1465        run_cmd("ifconfig", &[name, "mtu", &mtu.to_string()]).await?;
1466
1467        // Bring interface up
1468        run_cmd("ifconfig", &[name, "up"]).await?;
1469
1470        // Add route for fd00::/8 (FIPS address space) via this interface
1471        run_cmd(
1472            "route",
1473            &[
1474                "add",
1475                "-inet6",
1476                "-prefixlen",
1477                "8",
1478                "fd00::",
1479                "-interface",
1480                name,
1481            ],
1482        )
1483        .await?;
1484
1485        Ok(())
1486    }
1487
1488    /// Run a command and return an error if it fails.
1489    async fn run_cmd(program: &str, args: &[&str]) -> Result<(), TunError> {
1490        let output = Command::new(program)
1491            .args(args)
1492            .output()
1493            .await
1494            .map_err(|e| TunError::Configure(format!("{} failed: {}", program, e)))?;
1495
1496        if !output.status.success() {
1497            let stderr = String::from_utf8_lossy(&output.stderr);
1498            return Err(TunError::Configure(format!(
1499                "{} {} failed: {}",
1500                program,
1501                args.join(" "),
1502                stderr.trim()
1503            )));
1504        }
1505        Ok(())
1506    }
1507}
1508
1509#[cfg(test)]
1510mod tests {
1511    use super::*;
1512
1513    #[test]
1514    fn test_tun_state_display() {
1515        assert_eq!(format!("{}", TunState::Disabled), "disabled");
1516        assert_eq!(format!("{}", TunState::Active), "active");
1517    }
1518
1519    // Note: TUN device creation tests require elevated privileges
1520    // and are better suited for integration tests.
1521
1522    // ========================================================================
1523    // per_flow_max_mss — per-destination MSS clamp regression coverage
1524    // ========================================================================
1525
1526    fn fips_addr_with_node_byte(b: u8) -> FipsAddress {
1527        let mut bytes = [0u8; 16];
1528        bytes[0] = crate::identity::FIPS_ADDRESS_PREFIX;
1529        bytes[1] = b;
1530        FipsAddress::from_bytes(bytes).unwrap()
1531    }
1532
1533    fn empty_lookup() -> PathMtuLookup {
1534        Arc::new(RwLock::new(HashMap::new()))
1535    }
1536
1537    #[test]
1538    fn per_flow_empty_lookup_returns_conservative_ceiling() {
1539        // Cold-flow first-SYN race-window guard: when no per-destination
1540        // path_mtu has been learned yet, fall back to the IPv6-minimum-
1541        // derived ceiling (1280 - 77 - 60 = 1143) rather than the local
1542        // global ceiling. This ensures the first SYN to an unknown
1543        // destination clamps small enough to traverse any RFC-8200-
1544        // compliant IPv6 path.
1545        let lookup = empty_lookup();
1546        let addr = fips_addr_with_node_byte(0x42);
1547        assert_eq!(per_flow_max_mss(&lookup, addr.as_bytes(), 1360), 1143);
1548    }
1549
1550    #[test]
1551    fn per_flow_empty_lookup_returns_global_when_global_smaller() {
1552        // When the local global ceiling is already <= the conservative
1553        // 1143 ceiling (e.g. a daemon configured with UDP-1280 only),
1554        // the empty-lookup fallback stays at the global rather than
1555        // expanding upward.
1556        let lookup = empty_lookup();
1557        let addr = fips_addr_with_node_byte(0x42);
1558        assert_eq!(per_flow_max_mss(&lookup, addr.as_bytes(), 1100), 1100);
1559    }
1560
1561    #[test]
1562    fn per_flow_clamps_to_path_mtu_when_smaller() {
1563        // Discovery learned path_mtu=1280 for this destination; global
1564        // ceiling is 1360. Per-flow clamp should be min(1360, 1280-77-60)
1565        // = min(1360, 1143) = 1143.
1566        let lookup = empty_lookup();
1567        let addr = fips_addr_with_node_byte(0x42);
1568        lookup.write().unwrap().insert(addr, 1280);
1569        assert_eq!(per_flow_max_mss(&lookup, addr.as_bytes(), 1360), 1143);
1570    }
1571
1572    #[test]
1573    fn per_flow_keeps_global_when_path_mtu_larger() {
1574        // Discovery learned path_mtu=1452 (> global). Per-flow stays at
1575        // global 1143 (the smaller of the two).
1576        let lookup = empty_lookup();
1577        let addr = fips_addr_with_node_byte(0x42);
1578        lookup.write().unwrap().insert(addr, 1452);
1579        // global=1143 (UDP-1280-derived); path_max = 1452-77-60 = 1315.
1580        assert_eq!(per_flow_max_mss(&lookup, addr.as_bytes(), 1143), 1143);
1581    }
1582
1583    #[test]
1584    fn per_flow_learned_value_overrides_conservative_ceiling() {
1585        // When discovery has learned a per-destination value LARGER than
1586        // the conservative 1143 ceiling, the learned value (capped by
1587        // the global ceiling) wins. The conservative ceiling is only the
1588        // empty-lookup fallback; once an entry exists, the actual
1589        // learned value governs.
1590        let lookup = empty_lookup();
1591        let addr = fips_addr_with_node_byte(0x42);
1592        lookup.write().unwrap().insert(addr, 1452);
1593        // global=1360, path_max = 1452-77-60 = 1315; min(1360, 1315) = 1315.
1594        // 1315 > 1143, so the conservative ceiling did NOT clamp here.
1595        assert_eq!(per_flow_max_mss(&lookup, addr.as_bytes(), 1360), 1315);
1596    }
1597
1598    #[test]
1599    fn per_flow_returns_conservative_ceiling_for_non_fips_addr() {
1600        // Non-fips IPv6 (e.g. fe80::/10 link-local) takes the empty-
1601        // lookup path. With global=1360, fall back to 1143.
1602        let lookup = empty_lookup();
1603        let mut bytes = [0u8; 16];
1604        bytes[0] = 0xfe;
1605        bytes[1] = 0x80;
1606        assert_eq!(per_flow_max_mss(&lookup, &bytes, 1360), 1143);
1607    }
1608
1609    #[test]
1610    fn per_flow_returns_conservative_ceiling_on_short_addr_slice() {
1611        let lookup = empty_lookup();
1612        let bytes = [0u8; 8];
1613        assert_eq!(per_flow_max_mss(&lookup, &bytes, 1360), 1143);
1614    }
1615
1616    #[test]
1617    fn per_flow_independent_per_destination() {
1618        // Two different destinations with different path MTUs. Each
1619        // lookup honors its own value; cross-talk would be a regression.
1620        let lookup = empty_lookup();
1621        let a = fips_addr_with_node_byte(0x10);
1622        let b = fips_addr_with_node_byte(0x20);
1623        lookup.write().unwrap().insert(a, 1280);
1624        lookup.write().unwrap().insert(b, 1452);
1625        assert_eq!(per_flow_max_mss(&lookup, a.as_bytes(), 1360), 1143);
1626        assert_eq!(per_flow_max_mss(&lookup, b.as_bytes(), 1360), 1315);
1627    }
1628
1629    // ========================================================================
1630    // macOS utun packet-info header (AF_INET6 4-byte big-endian prefix)
1631    //
1632    // These tests are pure-data byte-buffer manipulation and require no
1633    // privilege, no actual TUN device, no system calls. They pin the wire
1634    // format that `TunWriter::run` emits ahead of every IPv6 frame on the
1635    // dup'd utun fd, and the inverse parse used for round-trip checking.
1636    // ========================================================================
1637
1638    #[cfg(target_os = "macos")]
1639    mod macos_utun_header {
1640        use super::super::{UTUN_AF_INET6, parse_utun_af_prefix, utun_af_inet6_header};
1641
1642        #[test]
1643        fn af_inet6_constant_matches_darwin() {
1644            // Darwin's <sys/socket.h> defines AF_INET6 = 30. If this ever
1645            // diverges, every utun write FIPS issues will be misclassified
1646            // by the kernel and dropped.
1647            assert_eq!(UTUN_AF_INET6, 30);
1648        }
1649
1650        #[test]
1651        fn encode_produces_big_endian_af_inet6() {
1652            // The kernel reads the 4-byte prefix as a big-endian u32.
1653            // 30 == 0x0000001e, so the wire bytes are [0, 0, 0, 0x1e].
1654            let header = utun_af_inet6_header();
1655            assert_eq!(header, [0x00, 0x00, 0x00, 0x1e]);
1656        }
1657
1658        #[test]
1659        fn encode_round_trips_through_parse() {
1660            let header = utun_af_inet6_header();
1661            let parsed = parse_utun_af_prefix(&header).expect("4 bytes is enough");
1662            assert_eq!(parsed, UTUN_AF_INET6);
1663        }
1664
1665        #[test]
1666        fn parse_rejects_short_buffer() {
1667            // Anything shorter than the 4-byte header is ill-formed.
1668            assert_eq!(parse_utun_af_prefix(&[]), None);
1669            assert_eq!(parse_utun_af_prefix(&[0x00]), None);
1670            assert_eq!(parse_utun_af_prefix(&[0x00, 0x00]), None);
1671            assert_eq!(parse_utun_af_prefix(&[0x00, 0x00, 0x00]), None);
1672        }
1673
1674        #[test]
1675        fn parse_accepts_minimum_header_with_trailing_payload() {
1676            // A real utun read returns header + IP packet concatenated.
1677            // The parser only consumes the first 4 bytes.
1678            let mut frame = utun_af_inet6_header().to_vec();
1679            frame.extend_from_slice(&[0x60; 40]); // dummy IPv6 header
1680            let parsed = parse_utun_af_prefix(&frame).expect("4 bytes is enough");
1681            assert_eq!(parsed, UTUN_AF_INET6);
1682        }
1683
1684        #[test]
1685        fn parse_garbage_bytes_returns_garbage_value_not_panic() {
1686            // A well-formed 4-byte buffer whose value is not AF_INET6
1687            // should parse successfully (returning the raw u32) without
1688            // panicking. Discriminating "expected" vs "unexpected" AF
1689            // values is the caller's responsibility.
1690            let buf = [0xde, 0xad, 0xbe, 0xef];
1691            let parsed = parse_utun_af_prefix(&buf).expect("4 bytes is enough");
1692            assert_eq!(parsed, 0xdeadbeef);
1693            assert_ne!(parsed, UTUN_AF_INET6);
1694        }
1695    }
1696}