Skip to main content

microsandbox_network/
stack.rs

1//! smoltcp interface setup, frame classification, and poll loop.
2//!
3//! This module contains the core networking event loop that runs on a
4//! dedicated OS thread. It bridges guest ethernet frames (via
5//! [`SmoltcpDevice`]) to smoltcp's TCP/IP stack and services connections
6//! through tokio proxy tasks.
7
8use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr};
9use std::sync::Arc;
10
11use smoltcp::iface::{Config, Interface, SocketSet};
12use smoltcp::time::Instant;
13use std::sync::atomic::Ordering;
14
15use smoltcp::wire::{
16    EthernetAddress, EthernetFrame, EthernetProtocol, HardwareAddress, IpAddress, IpCidr,
17    IpProtocol, Ipv4Packet, Ipv6Packet, TcpPacket, UdpPacket,
18};
19
20use crate::config::{DnsConfig, PublishedPort};
21use crate::conn::ConnectionTracker;
22use crate::device::SmoltcpDevice;
23use crate::dns::interceptor::DnsInterceptor;
24use crate::policy::{NetworkPolicy, Protocol};
25use crate::proxy;
26use crate::publisher::PortPublisher;
27use crate::shared::SharedState;
28use crate::tls::{proxy as tls_proxy, state::TlsState};
29use crate::udp_relay::UdpRelay;
30
31//--------------------------------------------------------------------------------------------------
32// Types
33//--------------------------------------------------------------------------------------------------
34
35/// Result of classifying a guest ethernet frame before smoltcp processes it.
36///
37/// Pre-inspection allows the poll loop to:
38/// - Create TCP sockets before smoltcp sees a SYN (preventing auto-RST).
39/// - Handle non-DNS UDP outside smoltcp (smoltcp lacks wildcard port binding).
40/// - Route DNS queries to the interception handler.
41pub enum FrameAction {
42    /// TCP SYN to a new destination — create a smoltcp socket before
43    /// letting smoltcp process the frame.
44    TcpSyn { src: SocketAddr, dst: SocketAddr },
45
46    /// Non-DNS UDP datagram — handle entirely outside smoltcp via the UDP
47    /// relay.
48    UdpRelay { src: SocketAddr, dst: SocketAddr },
49
50    /// DNS query (UDP to port 53) — let smoltcp's bound UDP socket handle it.
51    Dns,
52
53    /// Everything else (ARP, NDP, ICMP, TCP data/ACK/FIN, etc.) — let
54    /// smoltcp process normally.
55    Passthrough,
56}
57
58/// Resolved network parameters for the poll loop. Created by
59/// `SmoltcpNetwork::new()` from `NetworkConfig` + sandbox slot.
60pub struct PollLoopConfig {
61    /// Gateway MAC address (smoltcp's identity on the virtual LAN).
62    pub gateway_mac: [u8; 6],
63    /// Guest MAC address.
64    pub guest_mac: [u8; 6],
65    /// Gateway IPv4 address.
66    pub gateway_ipv4: Ipv4Addr,
67    /// Guest IPv4 address.
68    pub guest_ipv4: Ipv4Addr,
69    /// Gateway IPv6 address.
70    pub gateway_ipv6: Ipv6Addr,
71    /// IP-level MTU (e.g. 1500).
72    pub mtu: usize,
73}
74
75//--------------------------------------------------------------------------------------------------
76// Functions
77//--------------------------------------------------------------------------------------------------
78
79/// Classify a raw ethernet frame for pre-inspection.
80///
81/// Uses smoltcp's wire module for zero-copy parsing. Returns
82/// [`FrameAction::Passthrough`] for any frame that cannot be parsed or
83/// doesn't match a special case.
84pub fn classify_frame(frame: &[u8]) -> FrameAction {
85    let Ok(eth) = EthernetFrame::new_checked(frame) else {
86        return FrameAction::Passthrough;
87    };
88
89    match eth.ethertype() {
90        EthernetProtocol::Ipv4 => classify_ipv4(eth.payload()),
91        EthernetProtocol::Ipv6 => classify_ipv6(eth.payload()),
92        _ => FrameAction::Passthrough, // ARP, etc.
93    }
94}
95
96/// Create and configure the smoltcp [`Interface`].
97///
98/// The interface is configured as the **gateway**: it owns the gateway IP
99/// addresses and responds to ARP/NDP for them. `any_ip` mode is enabled so
100/// smoltcp accepts traffic destined for arbitrary remote IPs (not just the
101/// gateway), combined with default routes.
102pub fn create_interface(device: &mut SmoltcpDevice, config: &PollLoopConfig) -> Interface {
103    let hw_addr = HardwareAddress::Ethernet(EthernetAddress(config.gateway_mac));
104    let iface_config = Config::new(hw_addr);
105    let mut iface = Interface::new(iface_config, device, smoltcp_now());
106
107    // Configure gateway IP addresses.
108    iface.update_ip_addrs(|addrs| {
109        addrs
110            .push(IpCidr::new(
111                IpAddress::Ipv4(config.gateway_ipv4),
112                // /30 subnet: gateway + guest.
113                30,
114            ))
115            .expect("failed to add gateway IPv4 address");
116        addrs
117            .push(IpCidr::new(IpAddress::Ipv6(config.gateway_ipv6), 64))
118            .expect("failed to add gateway IPv6 address");
119    });
120
121    // Default routes so smoltcp accepts traffic for all destinations.
122    iface
123        .routes_mut()
124        .add_default_ipv4_route(config.gateway_ipv4)
125        .expect("failed to add default IPv4 route");
126    iface
127        .routes_mut()
128        .add_default_ipv6_route(config.gateway_ipv6)
129        .expect("failed to add default IPv6 route");
130
131    // Accept traffic destined for any IP, not just gateway addresses.
132    iface.set_any_ip(true);
133
134    iface
135}
136
137/// Main smoltcp poll loop. Runs on a dedicated OS thread.
138///
139/// Processes guest frames with pre-inspection, drives smoltcp's TCP/IP
140/// stack, and sleeps via `poll(2)` between events.
141///
142/// # Phases per iteration
143///
144/// 1. **Drain guest frames** — pop from `tx_ring`, classify, pre-inspect.
145/// 2. **smoltcp egress + maintenance** — transmit queued packets, run timers.
146/// 3. **Service connections** — relay data between smoltcp sockets and proxy
147///    tasks (added by later tasks).
148/// 4. **Sleep** — `poll(2)` on `tx_wake` + `proxy_wake` pipes with smoltcp's
149///    requested timeout.
150#[allow(clippy::too_many_arguments)]
151pub fn smoltcp_poll_loop(
152    shared: Arc<SharedState>,
153    config: PollLoopConfig,
154    network_policy: NetworkPolicy,
155    dns_config: DnsConfig,
156    tls_state: Option<Arc<TlsState>>,
157    published_ports: Vec<PublishedPort>,
158    max_connections: Option<usize>,
159    tokio_handle: tokio::runtime::Handle,
160) {
161    let mut device = SmoltcpDevice::new(shared.clone(), config.mtu);
162    let mut iface = create_interface(&mut device, &config);
163    let mut sockets = SocketSet::new(vec![]);
164    let mut conn_tracker = ConnectionTracker::new(max_connections);
165
166    let mut dns_interceptor =
167        DnsInterceptor::new(&mut sockets, dns_config, shared.clone(), &tokio_handle);
168    let mut port_publisher = PortPublisher::new(&published_ports, config.guest_ipv4, &tokio_handle);
169    let mut udp_relay = UdpRelay::new(
170        shared.clone(),
171        config.gateway_mac,
172        config.guest_mac,
173        tokio_handle.clone(),
174    );
175
176    // Rate-limit cleanup operations: run at most once per second.
177    let mut last_cleanup = std::time::Instant::now();
178
179    // poll(2) file descriptors for sleeping.
180    let mut poll_fds = [
181        libc::pollfd {
182            fd: shared.tx_wake.as_raw_fd(),
183            events: libc::POLLIN,
184            revents: 0,
185        },
186        libc::pollfd {
187            fd: shared.proxy_wake.as_raw_fd(),
188            events: libc::POLLIN,
189            revents: 0,
190        },
191    ];
192
193    loop {
194        let now = smoltcp_now();
195
196        // ── Phase 1: Drain all guest frames with pre-inspection ──────────
197        while let Some(frame) = device.stage_next_frame() {
198            match classify_frame(frame) {
199                FrameAction::TcpSyn { src, dst } => {
200                    // Policy check before socket creation.
201                    if network_policy
202                        .evaluate_egress(dst, Protocol::Tcp)
203                        .is_allow()
204                        && !conn_tracker.has_socket_for(&src, &dst)
205                    {
206                        conn_tracker.create_tcp_socket(src, dst, &mut sockets);
207                    }
208                    // Let smoltcp process — matching socket completes
209                    // handshake, no socket means auto-RST.
210                    iface.poll_ingress_single(now, &mut device, &mut sockets);
211                }
212
213                FrameAction::UdpRelay { src, dst } => {
214                    // QUIC blocking: drop UDP to intercepted ports when
215                    // TLS interception is active.
216                    if let Some(ref tls) = tls_state
217                        && tls.config.intercepted_ports.contains(&dst.port())
218                        && tls.config.block_quic_on_intercept
219                    {
220                        device.drop_staged_frame();
221                        continue;
222                    }
223
224                    // Policy check.
225                    if network_policy.evaluate_egress(dst, Protocol::Udp).is_deny() {
226                        device.drop_staged_frame();
227                        continue;
228                    }
229
230                    udp_relay.relay_outbound(frame, src, dst);
231                    device.drop_staged_frame();
232                }
233
234                FrameAction::Dns | FrameAction::Passthrough => {
235                    // ARP, ICMP, DNS (port 53), TCP data — smoltcp handles.
236                    iface.poll_ingress_single(now, &mut device, &mut sockets);
237                }
238            }
239        }
240
241        // ── Phase 2: Ingress egress + maintenance ─────────────────────────
242        // Flush frames generated by Phase 1 ingress (ACKs, SYN-ACKs, etc.)
243        // before relaying data so smoltcp has up-to-date state.
244        loop {
245            let result = iface.poll_egress(now, &mut device, &mut sockets);
246            if matches!(result, smoltcp::iface::PollResult::None) {
247                break;
248            }
249        }
250        iface.poll_maintenance(now);
251
252        // Coalesced wake: if Phase 1/2 emitted any frames, wake the
253        // NetWorker once instead of per-frame.
254        if device.frames_emitted.swap(false, Ordering::Relaxed) {
255            shared.rx_wake.wake();
256        }
257
258        // ── Phase 3: Service connections + relay data ────────────────────
259        // Relay proxy data INTO smoltcp sockets first, then a single egress
260        // pass flushes everything. This eliminates the former "Phase 2b"
261        // double-egress pattern.
262        conn_tracker.relay_data(&mut sockets);
263        dns_interceptor.process(&mut sockets);
264
265        // Accept queued inbound connections from published port listeners.
266        port_publisher.accept_inbound(&mut iface, &mut sockets, &shared, &tokio_handle);
267        port_publisher.relay_data(&mut sockets);
268
269        // Detect newly-established connections and spawn proxy tasks.
270        let new_conns = conn_tracker.take_new_connections(&mut sockets);
271        for conn in new_conns {
272            if let Some(ref tls_state) = tls_state
273                && tls_state
274                    .config
275                    .intercepted_ports
276                    .contains(&conn.dst.port())
277            {
278                // TLS-intercepted port — spawn TLS MITM proxy.
279                tls_proxy::spawn_tls_proxy(
280                    &tokio_handle,
281                    conn.dst,
282                    conn.from_smoltcp,
283                    conn.to_smoltcp,
284                    shared.clone(),
285                    tls_state.clone(),
286                );
287                continue;
288            }
289            // Plain TCP proxy.
290            proxy::spawn_tcp_proxy(
291                &tokio_handle,
292                conn.dst,
293                conn.from_smoltcp,
294                conn.to_smoltcp,
295                shared.clone(),
296            );
297        }
298
299        // Rate-limited cleanup: TIME_WAIT is 60s, session timeout is 60s,
300        // so checking once per second is more than sufficient.
301        if last_cleanup.elapsed() >= std::time::Duration::from_secs(1) {
302            conn_tracker.cleanup_closed(&mut sockets);
303            port_publisher.cleanup_closed(&mut sockets);
304            udp_relay.cleanup_expired();
305            last_cleanup = std::time::Instant::now();
306        }
307
308        // ── Phase 4: Flush relay data + sleep ────────────────────────────
309        // Single egress pass flushes all data written by Phase 3.
310        loop {
311            let result = iface.poll_egress(now, &mut device, &mut sockets);
312            if matches!(result, smoltcp::iface::PollResult::None) {
313                break;
314            }
315        }
316
317        // Coalesced wake: if Phase 3/4 emitted any frames, wake once.
318        if device.frames_emitted.swap(false, Ordering::Relaxed) {
319            shared.rx_wake.wake();
320        }
321
322        let timeout_ms = iface
323            .poll_delay(now, &sockets)
324            .map(|d| d.total_millis().min(i32::MAX as u64) as i32)
325            .unwrap_or(100); // 100ms fallback when no timers pending.
326
327        // SAFETY: poll_fds is a valid array of pollfd structs with valid fds.
328        unsafe {
329            libc::poll(
330                poll_fds.as_mut_ptr(),
331                poll_fds.len() as libc::nfds_t,
332                timeout_ms,
333            );
334        }
335
336        // Conditional drain: only drain pipes that actually have data.
337        if poll_fds[0].revents & libc::POLLIN != 0 {
338            shared.tx_wake.drain();
339        }
340        if poll_fds[1].revents & libc::POLLIN != 0 {
341            shared.proxy_wake.drain();
342        }
343    }
344}
345
346//--------------------------------------------------------------------------------------------------
347// Functions: Helpers
348//--------------------------------------------------------------------------------------------------
349
350/// Get the current time as a smoltcp [`Instant`] using a monotonic clock.
351///
352/// Uses `std::time::Instant` (monotonic) instead of `SystemTime` (wall
353/// clock) to avoid issues with NTP clock step corrections that could
354/// cause smoltcp timers to misbehave.
355fn smoltcp_now() -> Instant {
356    static EPOCH: std::sync::OnceLock<std::time::Instant> = std::sync::OnceLock::new();
357    let epoch = EPOCH.get_or_init(std::time::Instant::now);
358    let elapsed = epoch.elapsed();
359    Instant::from_millis(elapsed.as_millis() as i64)
360}
361
362/// Classify an IPv4 packet payload (after stripping the Ethernet header).
363fn classify_ipv4(payload: &[u8]) -> FrameAction {
364    let Ok(ipv4) = Ipv4Packet::new_checked(payload) else {
365        return FrameAction::Passthrough;
366    };
367    classify_transport(
368        ipv4.next_header(),
369        ipv4.src_addr().into(),
370        ipv4.dst_addr().into(),
371        ipv4.payload(),
372    )
373}
374
375/// Classify an IPv6 packet payload (after stripping the Ethernet header).
376fn classify_ipv6(payload: &[u8]) -> FrameAction {
377    let Ok(ipv6) = Ipv6Packet::new_checked(payload) else {
378        return FrameAction::Passthrough;
379    };
380    classify_transport(
381        ipv6.next_header(),
382        ipv6.src_addr().into(),
383        ipv6.dst_addr().into(),
384        ipv6.payload(),
385    )
386}
387
388/// Classify the transport-layer protocol (shared by IPv4 and IPv6).
389fn classify_transport(
390    protocol: IpProtocol,
391    src_ip: std::net::IpAddr,
392    dst_ip: std::net::IpAddr,
393    transport_payload: &[u8],
394) -> FrameAction {
395    match protocol {
396        IpProtocol::Tcp => {
397            let Ok(tcp) = TcpPacket::new_checked(transport_payload) else {
398                return FrameAction::Passthrough;
399            };
400            if tcp.syn() && !tcp.ack() {
401                FrameAction::TcpSyn {
402                    src: SocketAddr::new(src_ip, tcp.src_port()),
403                    dst: SocketAddr::new(dst_ip, tcp.dst_port()),
404                }
405            } else {
406                FrameAction::Passthrough
407            }
408        }
409        IpProtocol::Udp => {
410            let Ok(udp) = UdpPacket::new_checked(transport_payload) else {
411                return FrameAction::Passthrough;
412            };
413            if udp.dst_port() == 53 {
414                FrameAction::Dns
415            } else {
416                FrameAction::UdpRelay {
417                    src: SocketAddr::new(src_ip, udp.src_port()),
418                    dst: SocketAddr::new(dst_ip, udp.dst_port()),
419                }
420            }
421        }
422        _ => FrameAction::Passthrough, // ICMP, etc.
423    }
424}
425
426//--------------------------------------------------------------------------------------------------
427// Tests
428//--------------------------------------------------------------------------------------------------
429
430#[cfg(test)]
431mod tests {
432    use super::*;
433
434    /// Build a minimal Ethernet + IPv4 + TCP SYN frame.
435    fn build_tcp_syn_frame(
436        src_ip: [u8; 4],
437        dst_ip: [u8; 4],
438        src_port: u16,
439        dst_port: u16,
440    ) -> Vec<u8> {
441        let mut frame = vec![0u8; 14 + 20 + 20]; // eth + ipv4 + tcp
442
443        // Ethernet header.
444        frame[12] = 0x08; // EtherType: IPv4
445        frame[13] = 0x00;
446
447        // IPv4 header.
448        let ip = &mut frame[14..34];
449        ip[0] = 0x45; // Version + IHL
450        let total_len = 40u16; // 20 (IP) + 20 (TCP)
451        ip[2..4].copy_from_slice(&total_len.to_be_bytes());
452        ip[6] = 0x40; // Don't Fragment
453        ip[8] = 64; // TTL
454        ip[9] = 6; // Protocol: TCP
455        ip[12..16].copy_from_slice(&src_ip);
456        ip[16..20].copy_from_slice(&dst_ip);
457
458        // TCP header.
459        let tcp = &mut frame[34..54];
460        tcp[0..2].copy_from_slice(&src_port.to_be_bytes());
461        tcp[2..4].copy_from_slice(&dst_port.to_be_bytes());
462        tcp[12] = 0x50; // Data offset: 5 words
463        tcp[13] = 0x02; // SYN flag
464
465        frame
466    }
467
468    /// Build a minimal Ethernet + IPv4 + UDP frame.
469    fn build_udp_frame(src_ip: [u8; 4], dst_ip: [u8; 4], src_port: u16, dst_port: u16) -> Vec<u8> {
470        let mut frame = vec![0u8; 14 + 20 + 8]; // eth + ipv4 + udp
471
472        // Ethernet header.
473        frame[12] = 0x08;
474        frame[13] = 0x00;
475
476        // IPv4 header.
477        let ip = &mut frame[14..34];
478        ip[0] = 0x45;
479        let total_len = 28u16; // 20 (IP) + 8 (UDP)
480        ip[2..4].copy_from_slice(&total_len.to_be_bytes());
481        ip[8] = 64;
482        ip[9] = 17; // Protocol: UDP
483        ip[12..16].copy_from_slice(&src_ip);
484        ip[16..20].copy_from_slice(&dst_ip);
485
486        // UDP header.
487        let udp = &mut frame[34..42];
488        udp[0..2].copy_from_slice(&src_port.to_be_bytes());
489        udp[2..4].copy_from_slice(&dst_port.to_be_bytes());
490        let udp_len = 8u16;
491        udp[4..6].copy_from_slice(&udp_len.to_be_bytes());
492
493        frame
494    }
495
496    #[test]
497    fn classify_tcp_syn() {
498        let frame = build_tcp_syn_frame([10, 0, 0, 2], [93, 184, 216, 34], 54321, 443);
499        match classify_frame(&frame) {
500            FrameAction::TcpSyn { src, dst } => {
501                assert_eq!(
502                    src,
503                    SocketAddr::new(Ipv4Addr::new(10, 0, 0, 2).into(), 54321)
504                );
505                assert_eq!(
506                    dst,
507                    SocketAddr::new(Ipv4Addr::new(93, 184, 216, 34).into(), 443)
508                );
509            }
510            _ => panic!("expected TcpSyn"),
511        }
512    }
513
514    #[test]
515    fn classify_tcp_ack_is_passthrough() {
516        let mut frame = build_tcp_syn_frame([10, 0, 0, 2], [93, 184, 216, 34], 54321, 443);
517        // Change flags to ACK only (not SYN).
518        frame[34 + 13] = 0x10; // ACK flag
519        assert!(matches!(classify_frame(&frame), FrameAction::Passthrough));
520    }
521
522    #[test]
523    fn classify_udp_dns() {
524        let frame = build_udp_frame([10, 0, 0, 2], [10, 0, 0, 1], 12345, 53);
525        assert!(matches!(classify_frame(&frame), FrameAction::Dns));
526    }
527
528    #[test]
529    fn classify_udp_non_dns() {
530        let frame = build_udp_frame([10, 0, 0, 2], [8, 8, 8, 8], 12345, 443);
531        match classify_frame(&frame) {
532            FrameAction::UdpRelay { src, dst } => {
533                assert_eq!(src.port(), 12345);
534                assert_eq!(dst.port(), 443);
535            }
536            _ => panic!("expected UdpRelay"),
537        }
538    }
539
540    #[test]
541    fn classify_arp_is_passthrough() {
542        let mut frame = vec![0u8; 42]; // ARP frame
543        frame[12] = 0x08;
544        frame[13] = 0x06; // EtherType: ARP
545        assert!(matches!(classify_frame(&frame), FrameAction::Passthrough));
546    }
547
548    #[test]
549    fn classify_garbage_is_passthrough() {
550        assert!(matches!(classify_frame(&[]), FrameAction::Passthrough));
551        assert!(matches!(classify_frame(&[0; 5]), FrameAction::Passthrough));
552    }
553}