Skip to main content

microsandbox_network/
stack.rs

1//! smoltcp interface setup, frame classification, and poll loop.
2//!
3//! This module contains the core networking event loop that runs on a
4//! dedicated OS thread. It bridges guest ethernet frames (via
5//! [`SmoltcpDevice`]) to smoltcp's TCP/IP stack and services connections
6//! through tokio proxy tasks.
7
8use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr};
9use std::sync::Arc;
10
11use smoltcp::iface::{Config, Interface, SocketSet};
12use smoltcp::time::Instant;
13use std::sync::atomic::Ordering;
14
15use smoltcp::wire::{
16    EthernetAddress, EthernetFrame, EthernetProtocol, HardwareAddress, Icmpv4Packet, Icmpv4Repr,
17    Icmpv6Packet, Icmpv6Repr, IpAddress, IpCidr, IpProtocol, Ipv4Packet, Ipv4Repr, Ipv6Packet,
18    Ipv6Repr, TcpPacket, UdpPacket,
19};
20
21use crate::config::{DnsConfig, PublishedPort};
22use crate::conn::ConnectionTracker;
23use crate::device::SmoltcpDevice;
24use crate::dns::interceptor::DnsInterceptor;
25use crate::icmp_relay::IcmpRelay;
26use crate::policy::{NetworkPolicy, Protocol};
27use crate::proxy;
28use crate::publisher::PortPublisher;
29use crate::shared::SharedState;
30use crate::tls::{proxy as tls_proxy, state::TlsState};
31use crate::udp_relay::UdpRelay;
32
33//--------------------------------------------------------------------------------------------------
34// Types
35//--------------------------------------------------------------------------------------------------
36
37/// Result of classifying a guest ethernet frame before smoltcp processes it.
38///
39/// Pre-inspection allows the poll loop to:
40/// - Create TCP sockets before smoltcp sees a SYN (preventing auto-RST).
41/// - Handle non-DNS UDP outside smoltcp (smoltcp lacks wildcard port binding).
42/// - Route DNS queries to the interception handler.
43pub enum FrameAction {
44    /// TCP SYN to a new destination — create a smoltcp socket before
45    /// letting smoltcp process the frame.
46    TcpSyn { src: SocketAddr, dst: SocketAddr },
47
48    /// Non-DNS UDP datagram — handle entirely outside smoltcp via the UDP
49    /// relay.
50    UdpRelay { src: SocketAddr, dst: SocketAddr },
51
52    /// DNS query (UDP to port 53) — let smoltcp's bound UDP socket handle it.
53    Dns,
54
55    /// Everything else (ARP, NDP, ICMP, TCP data/ACK/FIN, etc.) — let
56    /// smoltcp process normally.
57    Passthrough,
58}
59
60/// Resolved network parameters for the poll loop. Created by
61/// `SmoltcpNetwork::new()` from `NetworkConfig` + sandbox slot.
62pub struct PollLoopConfig {
63    /// Gateway MAC address (smoltcp's identity on the virtual LAN).
64    pub gateway_mac: [u8; 6],
65    /// Guest MAC address.
66    pub guest_mac: [u8; 6],
67    /// Gateway IPv4 address.
68    pub gateway_ipv4: Ipv4Addr,
69    /// Guest IPv4 address.
70    pub guest_ipv4: Ipv4Addr,
71    /// Gateway IPv6 address.
72    pub gateway_ipv6: Ipv6Addr,
73    /// IP-level MTU (e.g. 1500).
74    pub mtu: usize,
75}
76
77//--------------------------------------------------------------------------------------------------
78// Functions
79//--------------------------------------------------------------------------------------------------
80
81/// Classify a raw ethernet frame for pre-inspection.
82///
83/// Uses smoltcp's wire module for zero-copy parsing. Returns
84/// [`FrameAction::Passthrough`] for any frame that cannot be parsed or
85/// doesn't match a special case.
86pub fn classify_frame(frame: &[u8]) -> FrameAction {
87    let Ok(eth) = EthernetFrame::new_checked(frame) else {
88        return FrameAction::Passthrough;
89    };
90
91    match eth.ethertype() {
92        EthernetProtocol::Ipv4 => classify_ipv4(eth.payload()),
93        EthernetProtocol::Ipv6 => classify_ipv6(eth.payload()),
94        _ => FrameAction::Passthrough, // ARP, etc.
95    }
96}
97
98/// Create and configure the smoltcp [`Interface`].
99///
100/// The interface is configured as the **gateway**: it owns the gateway IP
101/// addresses and responds to ARP/NDP for them. `any_ip` mode is enabled so
102/// smoltcp accepts traffic destined for arbitrary remote IPs (not just the
103/// gateway), combined with default routes.
104pub fn create_interface(device: &mut SmoltcpDevice, config: &PollLoopConfig) -> Interface {
105    let hw_addr = HardwareAddress::Ethernet(EthernetAddress(config.gateway_mac));
106    let iface_config = Config::new(hw_addr);
107    let mut iface = Interface::new(iface_config, device, smoltcp_now());
108
109    // Configure gateway IP addresses.
110    iface.update_ip_addrs(|addrs| {
111        addrs
112            .push(IpCidr::new(
113                IpAddress::Ipv4(config.gateway_ipv4),
114                // /30 subnet: gateway + guest.
115                30,
116            ))
117            .expect("failed to add gateway IPv4 address");
118        addrs
119            .push(IpCidr::new(IpAddress::Ipv6(config.gateway_ipv6), 64))
120            .expect("failed to add gateway IPv6 address");
121    });
122
123    // Default routes so smoltcp accepts traffic for all destinations.
124    iface
125        .routes_mut()
126        .add_default_ipv4_route(config.gateway_ipv4)
127        .expect("failed to add default IPv4 route");
128    iface
129        .routes_mut()
130        .add_default_ipv6_route(config.gateway_ipv6)
131        .expect("failed to add default IPv6 route");
132
133    // Accept traffic destined for any IP, not just gateway addresses.
134    iface.set_any_ip(true);
135
136    iface
137}
138
139/// Main smoltcp poll loop. Runs on a dedicated OS thread.
140///
141/// Processes guest frames with pre-inspection, drives smoltcp's TCP/IP
142/// stack, and sleeps via `poll(2)` between events.
143///
144/// # Phases per iteration
145///
146/// 1. **Drain guest frames** — pop from `tx_ring`, classify, pre-inspect.
147/// 2. **smoltcp egress + maintenance** — transmit queued packets, run timers.
148/// 3. **Service connections** — relay data between smoltcp sockets and proxy
149///    tasks (added by later tasks).
150/// 4. **Sleep** — `poll(2)` on `tx_wake` + `proxy_wake` pipes with smoltcp's
151///    requested timeout.
152#[allow(clippy::too_many_arguments)]
153pub fn smoltcp_poll_loop(
154    shared: Arc<SharedState>,
155    config: PollLoopConfig,
156    network_policy: NetworkPolicy,
157    dns_config: DnsConfig,
158    tls_state: Option<Arc<TlsState>>,
159    published_ports: Vec<PublishedPort>,
160    max_connections: Option<usize>,
161    tokio_handle: tokio::runtime::Handle,
162) {
163    let mut device = SmoltcpDevice::new(shared.clone(), config.mtu);
164    let mut iface = create_interface(&mut device, &config);
165    let mut sockets = SocketSet::new(vec![]);
166    let mut conn_tracker = ConnectionTracker::new(max_connections);
167
168    let mut dns_interceptor =
169        DnsInterceptor::new(&mut sockets, dns_config, shared.clone(), &tokio_handle);
170    let mut port_publisher = PortPublisher::new(&published_ports, config.guest_ipv4, &tokio_handle);
171    let mut udp_relay = UdpRelay::new(
172        shared.clone(),
173        config.gateway_mac,
174        config.guest_mac,
175        tokio_handle.clone(),
176    );
177    let icmp_relay = IcmpRelay::new(
178        shared.clone(),
179        config.gateway_mac,
180        config.guest_mac,
181        tokio_handle.clone(),
182    );
183
184    // Rate-limit cleanup operations: run at most once per second.
185    let mut last_cleanup = std::time::Instant::now();
186
187    // poll(2) file descriptors for sleeping.
188    let mut poll_fds = [
189        libc::pollfd {
190            fd: shared.tx_wake.as_raw_fd(),
191            events: libc::POLLIN,
192            revents: 0,
193        },
194        libc::pollfd {
195            fd: shared.proxy_wake.as_raw_fd(),
196            events: libc::POLLIN,
197            revents: 0,
198        },
199    ];
200
201    loop {
202        let now = smoltcp_now();
203
204        // ── Phase 1: Drain all guest frames with pre-inspection ──────────
205        while let Some(frame) = device.stage_next_frame() {
206            if handle_gateway_icmp_echo(frame, &config, &shared) {
207                device.drop_staged_frame();
208                continue;
209            }
210
211            if icmp_relay.relay_outbound_if_echo(frame, &config, &network_policy) {
212                device.drop_staged_frame();
213                continue;
214            }
215
216            match classify_frame(frame) {
217                FrameAction::TcpSyn { src, dst } => {
218                    // Policy check before socket creation.
219                    if network_policy
220                        .evaluate_egress(dst, Protocol::Tcp)
221                        .is_allow()
222                        && !conn_tracker.has_socket_for(&src, &dst)
223                    {
224                        conn_tracker.create_tcp_socket(src, dst, &mut sockets);
225                    }
226                    // Let smoltcp process — matching socket completes
227                    // handshake, no socket means auto-RST.
228                    iface.poll_ingress_single(now, &mut device, &mut sockets);
229                }
230
231                FrameAction::UdpRelay { src, dst } => {
232                    // QUIC blocking: drop UDP to intercepted ports when
233                    // TLS interception is active.
234                    if let Some(ref tls) = tls_state
235                        && tls.config.intercepted_ports.contains(&dst.port())
236                        && tls.config.block_quic_on_intercept
237                    {
238                        device.drop_staged_frame();
239                        continue;
240                    }
241
242                    // Policy check.
243                    if network_policy.evaluate_egress(dst, Protocol::Udp).is_deny() {
244                        device.drop_staged_frame();
245                        continue;
246                    }
247
248                    udp_relay.relay_outbound(frame, src, dst);
249                    device.drop_staged_frame();
250                }
251
252                FrameAction::Dns | FrameAction::Passthrough => {
253                    // ARP, ICMP, DNS (port 53), TCP data — smoltcp handles.
254                    iface.poll_ingress_single(now, &mut device, &mut sockets);
255                }
256            }
257        }
258
259        // ── Phase 2: Ingress egress + maintenance ─────────────────────────
260        // Flush frames generated by Phase 1 ingress (ACKs, SYN-ACKs, etc.)
261        // before relaying data so smoltcp has up-to-date state.
262        loop {
263            let result = iface.poll_egress(now, &mut device, &mut sockets);
264            if matches!(result, smoltcp::iface::PollResult::None) {
265                break;
266            }
267        }
268        iface.poll_maintenance(now);
269
270        // Coalesced wake: if Phase 1/2 emitted any frames, wake the
271        // NetWorker once instead of per-frame.
272        if device.frames_emitted.swap(false, Ordering::Relaxed) {
273            shared.rx_wake.wake();
274        }
275
276        // ── Phase 3: Service connections + relay data ────────────────────
277        // Relay proxy data INTO smoltcp sockets first, then a single egress
278        // pass flushes everything. This eliminates the former "Phase 2b"
279        // double-egress pattern.
280        conn_tracker.relay_data(&mut sockets);
281        dns_interceptor.process(&mut sockets);
282
283        // Accept queued inbound connections from published port listeners.
284        port_publisher.accept_inbound(&mut iface, &mut sockets, &shared, &tokio_handle);
285        port_publisher.relay_data(&mut sockets);
286
287        // Detect newly-established connections and spawn proxy tasks.
288        let new_conns = conn_tracker.take_new_connections(&mut sockets);
289        for conn in new_conns {
290            if let Some(ref tls_state) = tls_state
291                && tls_state
292                    .config
293                    .intercepted_ports
294                    .contains(&conn.dst.port())
295            {
296                // TLS-intercepted port — spawn TLS MITM proxy.
297                tls_proxy::spawn_tls_proxy(
298                    &tokio_handle,
299                    conn.dst,
300                    conn.from_smoltcp,
301                    conn.to_smoltcp,
302                    shared.clone(),
303                    tls_state.clone(),
304                );
305                continue;
306            }
307            // Plain TCP proxy.
308            proxy::spawn_tcp_proxy(
309                &tokio_handle,
310                conn.dst,
311                conn.from_smoltcp,
312                conn.to_smoltcp,
313                shared.clone(),
314            );
315        }
316
317        // Rate-limited cleanup: TIME_WAIT is 60s, session timeout is 60s,
318        // so checking once per second is more than sufficient.
319        if last_cleanup.elapsed() >= std::time::Duration::from_secs(1) {
320            conn_tracker.cleanup_closed(&mut sockets);
321            port_publisher.cleanup_closed(&mut sockets);
322            udp_relay.cleanup_expired();
323            last_cleanup = std::time::Instant::now();
324        }
325
326        // ── Phase 4: Flush relay data + sleep ────────────────────────────
327        // Single egress pass flushes all data written by Phase 3.
328        loop {
329            let result = iface.poll_egress(now, &mut device, &mut sockets);
330            if matches!(result, smoltcp::iface::PollResult::None) {
331                break;
332            }
333        }
334
335        // Coalesced wake: if Phase 3/4 emitted any frames, wake once.
336        if device.frames_emitted.swap(false, Ordering::Relaxed) {
337            shared.rx_wake.wake();
338        }
339
340        let timeout_ms = iface
341            .poll_delay(now, &sockets)
342            .map(|d| d.total_millis().min(i32::MAX as u64) as i32)
343            .unwrap_or(100); // 100ms fallback when no timers pending.
344
345        // SAFETY: poll_fds is a valid array of pollfd structs with valid fds.
346        unsafe {
347            libc::poll(
348                poll_fds.as_mut_ptr(),
349                poll_fds.len() as libc::nfds_t,
350                timeout_ms,
351            );
352        }
353
354        // Conditional drain: only drain pipes that actually have data.
355        if poll_fds[0].revents & libc::POLLIN != 0 {
356            shared.tx_wake.drain();
357        }
358        if poll_fds[1].revents & libc::POLLIN != 0 {
359            shared.proxy_wake.drain();
360        }
361    }
362}
363
364//--------------------------------------------------------------------------------------------------
365// Functions: Helpers
366//--------------------------------------------------------------------------------------------------
367
368/// Get the current time as a smoltcp [`Instant`] using a monotonic clock.
369///
370/// Uses `std::time::Instant` (monotonic) instead of `SystemTime` (wall
371/// clock) to avoid issues with NTP clock step corrections that could
372/// cause smoltcp timers to misbehave.
373fn smoltcp_now() -> Instant {
374    static EPOCH: std::sync::OnceLock<std::time::Instant> = std::sync::OnceLock::new();
375    let epoch = EPOCH.get_or_init(std::time::Instant::now);
376    let elapsed = epoch.elapsed();
377    Instant::from_millis(elapsed.as_millis() as i64)
378}
379
380/// Reply locally to ICMP echo requests aimed at the sandbox gateway.
381///
382/// `any_ip` is required so smoltcp accepts guest traffic for arbitrary remote
383/// destinations, but that would make smoltcp's automatic ICMP echo replies
384/// spoof remote hosts. Handle only the real gateway IPs here and leave all
385/// other ICMP traffic untouched.
386fn handle_gateway_icmp_echo(frame: &[u8], config: &PollLoopConfig, shared: &SharedState) -> bool {
387    let Ok(eth) = EthernetFrame::new_checked(frame) else {
388        return false;
389    };
390
391    let reply = match eth.ethertype() {
392        EthernetProtocol::Ipv4 => gateway_icmpv4_echo_reply(&eth, config),
393        EthernetProtocol::Ipv6 => gateway_icmpv6_echo_reply(&eth, config),
394        _ => None,
395    };
396    let Some(reply) = reply else {
397        return false;
398    };
399
400    let reply_len = reply.len();
401    if shared.rx_ring.push(reply).is_ok() {
402        shared.add_rx_bytes(reply_len);
403        shared.rx_wake.wake();
404    }
405
406    true
407}
408
409/// Build an IPv4 ICMP echo reply when the guest pings the gateway IPv4.
410fn gateway_icmpv4_echo_reply(
411    eth: &EthernetFrame<&[u8]>,
412    config: &PollLoopConfig,
413) -> Option<Vec<u8>> {
414    let ipv4 = Ipv4Packet::new_checked(eth.payload()).ok()?;
415    if ipv4.dst_addr() != config.gateway_ipv4 || ipv4.next_header() != IpProtocol::Icmp {
416        return None;
417    }
418
419    let icmp = Icmpv4Packet::new_checked(ipv4.payload()).ok()?;
420    let Icmpv4Repr::EchoRequest {
421        ident,
422        seq_no,
423        data,
424    } = Icmpv4Repr::parse(&icmp, &smoltcp::phy::ChecksumCapabilities::default()).ok()?
425    else {
426        return None;
427    };
428
429    let ipv4_repr = Ipv4Repr {
430        src_addr: config.gateway_ipv4,
431        dst_addr: ipv4.src_addr(),
432        next_header: IpProtocol::Icmp,
433        payload_len: 8 + data.len(),
434        hop_limit: 64,
435    };
436    let icmp_repr = Icmpv4Repr::EchoReply {
437        ident,
438        seq_no,
439        data,
440    };
441    let mut reply = vec![0u8; 14 + ipv4_repr.buffer_len() + icmp_repr.buffer_len()];
442
443    let mut reply_eth = EthernetFrame::new_unchecked(&mut reply);
444    reply_eth.set_src_addr(EthernetAddress(config.gateway_mac));
445    reply_eth.set_dst_addr(eth.src_addr());
446    reply_eth.set_ethertype(EthernetProtocol::Ipv4);
447
448    ipv4_repr.emit(
449        &mut Ipv4Packet::new_unchecked(&mut reply[14..34]),
450        &smoltcp::phy::ChecksumCapabilities::default(),
451    );
452    icmp_repr.emit(
453        &mut Icmpv4Packet::new_unchecked(&mut reply[34..]),
454        &smoltcp::phy::ChecksumCapabilities::default(),
455    );
456
457    Some(reply)
458}
459
460/// Build an IPv6 ICMP echo reply when the guest pings the gateway IPv6.
461fn gateway_icmpv6_echo_reply(
462    eth: &EthernetFrame<&[u8]>,
463    config: &PollLoopConfig,
464) -> Option<Vec<u8>> {
465    let ipv6 = Ipv6Packet::new_checked(eth.payload()).ok()?;
466    if ipv6.dst_addr() != config.gateway_ipv6 || ipv6.next_header() != IpProtocol::Icmpv6 {
467        return None;
468    }
469
470    let icmp = Icmpv6Packet::new_checked(ipv6.payload()).ok()?;
471    let Icmpv6Repr::EchoRequest {
472        ident,
473        seq_no,
474        data,
475    } = Icmpv6Repr::parse(
476        &ipv6.src_addr(),
477        &ipv6.dst_addr(),
478        &icmp,
479        &smoltcp::phy::ChecksumCapabilities::default(),
480    )
481    .ok()?
482    else {
483        return None;
484    };
485
486    let ipv6_repr = Ipv6Repr {
487        src_addr: config.gateway_ipv6,
488        dst_addr: ipv6.src_addr(),
489        next_header: IpProtocol::Icmpv6,
490        payload_len: icmp_repr_buffer_len_v6(data),
491        hop_limit: 64,
492    };
493    let icmp_repr = Icmpv6Repr::EchoReply {
494        ident,
495        seq_no,
496        data,
497    };
498    let ipv6_hdr_len = 40;
499    let mut reply = vec![0u8; 14 + ipv6_hdr_len + icmp_repr.buffer_len()];
500
501    let mut reply_eth = EthernetFrame::new_unchecked(&mut reply);
502    reply_eth.set_src_addr(EthernetAddress(config.gateway_mac));
503    reply_eth.set_dst_addr(eth.src_addr());
504    reply_eth.set_ethertype(EthernetProtocol::Ipv6);
505
506    ipv6_repr.emit(&mut Ipv6Packet::new_unchecked(&mut reply[14..54]));
507    icmp_repr.emit(
508        &config.gateway_ipv6,
509        &ipv6.src_addr(),
510        &mut Icmpv6Packet::new_unchecked(&mut reply[54..]),
511        &smoltcp::phy::ChecksumCapabilities::default(),
512    );
513
514    Some(reply)
515}
516
517fn icmp_repr_buffer_len_v6(data: &[u8]) -> usize {
518    Icmpv6Repr::EchoReply {
519        ident: 0,
520        seq_no: 0,
521        data,
522    }
523    .buffer_len()
524}
525
526/// Classify an IPv4 packet payload (after stripping the Ethernet header).
527fn classify_ipv4(payload: &[u8]) -> FrameAction {
528    let Ok(ipv4) = Ipv4Packet::new_checked(payload) else {
529        return FrameAction::Passthrough;
530    };
531    classify_transport(
532        ipv4.next_header(),
533        ipv4.src_addr().into(),
534        ipv4.dst_addr().into(),
535        ipv4.payload(),
536    )
537}
538
539/// Classify an IPv6 packet payload (after stripping the Ethernet header).
540fn classify_ipv6(payload: &[u8]) -> FrameAction {
541    let Ok(ipv6) = Ipv6Packet::new_checked(payload) else {
542        return FrameAction::Passthrough;
543    };
544    classify_transport(
545        ipv6.next_header(),
546        ipv6.src_addr().into(),
547        ipv6.dst_addr().into(),
548        ipv6.payload(),
549    )
550}
551
552/// Classify the transport-layer protocol (shared by IPv4 and IPv6).
553fn classify_transport(
554    protocol: IpProtocol,
555    src_ip: std::net::IpAddr,
556    dst_ip: std::net::IpAddr,
557    transport_payload: &[u8],
558) -> FrameAction {
559    match protocol {
560        IpProtocol::Tcp => {
561            let Ok(tcp) = TcpPacket::new_checked(transport_payload) else {
562                return FrameAction::Passthrough;
563            };
564            if tcp.syn() && !tcp.ack() {
565                FrameAction::TcpSyn {
566                    src: SocketAddr::new(src_ip, tcp.src_port()),
567                    dst: SocketAddr::new(dst_ip, tcp.dst_port()),
568                }
569            } else {
570                FrameAction::Passthrough
571            }
572        }
573        IpProtocol::Udp => {
574            let Ok(udp) = UdpPacket::new_checked(transport_payload) else {
575                return FrameAction::Passthrough;
576            };
577            if udp.dst_port() == 53 {
578                FrameAction::Dns
579            } else {
580                FrameAction::UdpRelay {
581                    src: SocketAddr::new(src_ip, udp.src_port()),
582                    dst: SocketAddr::new(dst_ip, udp.dst_port()),
583                }
584            }
585        }
586        _ => FrameAction::Passthrough, // ICMP, etc.
587    }
588}
589
590//--------------------------------------------------------------------------------------------------
591// Tests
592//--------------------------------------------------------------------------------------------------
593
594#[cfg(test)]
595mod tests {
596    use super::*;
597    use std::sync::Arc;
598
599    use smoltcp::phy::ChecksumCapabilities;
600    use smoltcp::wire::{
601        ArpOperation, ArpPacket, ArpRepr, EthernetRepr, Icmpv4Packet, Icmpv4Repr, Ipv4Repr,
602    };
603
604    use crate::device::SmoltcpDevice;
605    use crate::shared::SharedState;
606
607    /// Build a minimal Ethernet + IPv4 + TCP SYN frame.
608    fn build_tcp_syn_frame(
609        src_ip: [u8; 4],
610        dst_ip: [u8; 4],
611        src_port: u16,
612        dst_port: u16,
613    ) -> Vec<u8> {
614        let mut frame = vec![0u8; 14 + 20 + 20]; // eth + ipv4 + tcp
615
616        // Ethernet header.
617        frame[12] = 0x08; // EtherType: IPv4
618        frame[13] = 0x00;
619
620        // IPv4 header.
621        let ip = &mut frame[14..34];
622        ip[0] = 0x45; // Version + IHL
623        let total_len = 40u16; // 20 (IP) + 20 (TCP)
624        ip[2..4].copy_from_slice(&total_len.to_be_bytes());
625        ip[6] = 0x40; // Don't Fragment
626        ip[8] = 64; // TTL
627        ip[9] = 6; // Protocol: TCP
628        ip[12..16].copy_from_slice(&src_ip);
629        ip[16..20].copy_from_slice(&dst_ip);
630
631        // TCP header.
632        let tcp = &mut frame[34..54];
633        tcp[0..2].copy_from_slice(&src_port.to_be_bytes());
634        tcp[2..4].copy_from_slice(&dst_port.to_be_bytes());
635        tcp[12] = 0x50; // Data offset: 5 words
636        tcp[13] = 0x02; // SYN flag
637
638        frame
639    }
640
641    /// Build a minimal Ethernet + IPv4 + UDP frame.
642    fn build_udp_frame(src_ip: [u8; 4], dst_ip: [u8; 4], src_port: u16, dst_port: u16) -> Vec<u8> {
643        let mut frame = vec![0u8; 14 + 20 + 8]; // eth + ipv4 + udp
644
645        // Ethernet header.
646        frame[12] = 0x08;
647        frame[13] = 0x00;
648
649        // IPv4 header.
650        let ip = &mut frame[14..34];
651        ip[0] = 0x45;
652        let total_len = 28u16; // 20 (IP) + 8 (UDP)
653        ip[2..4].copy_from_slice(&total_len.to_be_bytes());
654        ip[8] = 64;
655        ip[9] = 17; // Protocol: UDP
656        ip[12..16].copy_from_slice(&src_ip);
657        ip[16..20].copy_from_slice(&dst_ip);
658
659        // UDP header.
660        let udp = &mut frame[34..42];
661        udp[0..2].copy_from_slice(&src_port.to_be_bytes());
662        udp[2..4].copy_from_slice(&dst_port.to_be_bytes());
663        let udp_len = 8u16;
664        udp[4..6].copy_from_slice(&udp_len.to_be_bytes());
665
666        frame
667    }
668
669    /// Build a minimal Ethernet + IPv4 + ICMP echo request frame.
670    fn build_icmpv4_echo_frame(
671        src_mac: [u8; 6],
672        dst_mac: [u8; 6],
673        src_ip: [u8; 4],
674        dst_ip: [u8; 4],
675        ident: u16,
676        seq_no: u16,
677        data: &[u8],
678    ) -> Vec<u8> {
679        let ipv4_repr = Ipv4Repr {
680            src_addr: Ipv4Addr::from(src_ip).into(),
681            dst_addr: Ipv4Addr::from(dst_ip).into(),
682            next_header: IpProtocol::Icmp,
683            payload_len: 8 + data.len(),
684            hop_limit: 64,
685        };
686        let icmp_repr = Icmpv4Repr::EchoRequest {
687            ident,
688            seq_no,
689            data,
690        };
691        let frame_len = 14 + ipv4_repr.buffer_len() + icmp_repr.buffer_len();
692        let mut frame = vec![0u8; frame_len];
693
694        let mut eth_frame = EthernetFrame::new_unchecked(&mut frame);
695        EthernetRepr {
696            src_addr: EthernetAddress(src_mac),
697            dst_addr: EthernetAddress(dst_mac),
698            ethertype: EthernetProtocol::Ipv4,
699        }
700        .emit(&mut eth_frame);
701
702        ipv4_repr.emit(
703            &mut Ipv4Packet::new_unchecked(&mut frame[14..34]),
704            &ChecksumCapabilities::default(),
705        );
706        icmp_repr.emit(
707            &mut Icmpv4Packet::new_unchecked(&mut frame[34..]),
708            &ChecksumCapabilities::default(),
709        );
710
711        frame
712    }
713
714    /// Build a minimal Ethernet + ARP request frame.
715    fn build_arp_request_frame(src_mac: [u8; 6], src_ip: [u8; 4], target_ip: [u8; 4]) -> Vec<u8> {
716        let mut frame = vec![0u8; 14 + 28];
717
718        let mut eth_frame = EthernetFrame::new_unchecked(&mut frame);
719        EthernetRepr {
720            src_addr: EthernetAddress(src_mac),
721            dst_addr: EthernetAddress([0xff; 6]),
722            ethertype: EthernetProtocol::Arp,
723        }
724        .emit(&mut eth_frame);
725
726        ArpRepr::EthernetIpv4 {
727            operation: ArpOperation::Request,
728            source_hardware_addr: EthernetAddress(src_mac),
729            source_protocol_addr: Ipv4Addr::from(src_ip).into(),
730            target_hardware_addr: EthernetAddress([0x00; 6]),
731            target_protocol_addr: Ipv4Addr::from(target_ip).into(),
732        }
733        .emit(&mut ArpPacket::new_unchecked(&mut frame[14..]));
734
735        frame
736    }
737
738    #[test]
739    fn classify_tcp_syn() {
740        let frame = build_tcp_syn_frame([10, 0, 0, 2], [93, 184, 216, 34], 54321, 443);
741        match classify_frame(&frame) {
742            FrameAction::TcpSyn { src, dst } => {
743                assert_eq!(
744                    src,
745                    SocketAddr::new(Ipv4Addr::new(10, 0, 0, 2).into(), 54321)
746                );
747                assert_eq!(
748                    dst,
749                    SocketAddr::new(Ipv4Addr::new(93, 184, 216, 34).into(), 443)
750                );
751            }
752            _ => panic!("expected TcpSyn"),
753        }
754    }
755
756    #[test]
757    fn classify_tcp_ack_is_passthrough() {
758        let mut frame = build_tcp_syn_frame([10, 0, 0, 2], [93, 184, 216, 34], 54321, 443);
759        // Change flags to ACK only (not SYN).
760        frame[34 + 13] = 0x10; // ACK flag
761        assert!(matches!(classify_frame(&frame), FrameAction::Passthrough));
762    }
763
764    #[test]
765    fn classify_udp_dns() {
766        let frame = build_udp_frame([10, 0, 0, 2], [10, 0, 0, 1], 12345, 53);
767        assert!(matches!(classify_frame(&frame), FrameAction::Dns));
768    }
769
770    #[test]
771    fn classify_udp_non_dns() {
772        let frame = build_udp_frame([10, 0, 0, 2], [8, 8, 8, 8], 12345, 443);
773        match classify_frame(&frame) {
774            FrameAction::UdpRelay { src, dst } => {
775                assert_eq!(src.port(), 12345);
776                assert_eq!(dst.port(), 443);
777            }
778            _ => panic!("expected UdpRelay"),
779        }
780    }
781
782    #[test]
783    fn classify_arp_is_passthrough() {
784        let mut frame = vec![0u8; 42]; // ARP frame
785        frame[12] = 0x08;
786        frame[13] = 0x06; // EtherType: ARP
787        assert!(matches!(classify_frame(&frame), FrameAction::Passthrough));
788    }
789
790    #[test]
791    fn classify_garbage_is_passthrough() {
792        assert!(matches!(classify_frame(&[]), FrameAction::Passthrough));
793        assert!(matches!(classify_frame(&[0; 5]), FrameAction::Passthrough));
794    }
795
796    #[test]
797    fn gateway_replies_to_icmp_echo_requests() {
798        fn drive_one_frame(
799            device: &mut SmoltcpDevice,
800            iface: &mut Interface,
801            sockets: &mut SocketSet<'_>,
802            shared: &Arc<SharedState>,
803            poll_config: &PollLoopConfig,
804            now: Instant,
805        ) {
806            let frame = device.stage_next_frame().expect("expected staged frame");
807            if handle_gateway_icmp_echo(frame, poll_config, shared) {
808                device.drop_staged_frame();
809                return;
810            }
811            let _ = iface.poll_ingress_single(now, device, sockets);
812            let _ = iface.poll_egress(now, device, sockets);
813        }
814
815        let shared = Arc::new(SharedState::new(4));
816        let poll_config = PollLoopConfig {
817            gateway_mac: [0x02, 0x00, 0x00, 0x00, 0x00, 0x01],
818            guest_mac: [0x02, 0x00, 0x00, 0x00, 0x00, 0x02],
819            gateway_ipv4: Ipv4Addr::new(100, 96, 0, 1),
820            guest_ipv4: Ipv4Addr::new(100, 96, 0, 2),
821            gateway_ipv6: Ipv6Addr::LOCALHOST,
822            mtu: 1500,
823        };
824        let mut device = SmoltcpDevice::new(shared.clone(), poll_config.mtu);
825        let mut iface = create_interface(&mut device, &poll_config);
826        let mut sockets = SocketSet::new(vec![]);
827        let now = smoltcp_now();
828
829        // Mirror the real guest flow: resolve the gateway MAC before sending
830        // the ICMP echo request.
831        shared
832            .tx_ring
833            .push(build_arp_request_frame(
834                poll_config.guest_mac,
835                poll_config.guest_ipv4.octets(),
836                poll_config.gateway_ipv4.octets(),
837            ))
838            .unwrap();
839        shared
840            .tx_ring
841            .push(build_icmpv4_echo_frame(
842                poll_config.guest_mac,
843                poll_config.gateway_mac,
844                poll_config.guest_ipv4.octets(),
845                poll_config.gateway_ipv4.octets(),
846                0x1234,
847                0xABCD,
848                b"ping",
849            ))
850            .unwrap();
851
852        drive_one_frame(
853            &mut device,
854            &mut iface,
855            &mut sockets,
856            &shared,
857            &poll_config,
858            now,
859        );
860        let _ = shared.rx_ring.pop().expect("expected ARP reply");
861
862        drive_one_frame(
863            &mut device,
864            &mut iface,
865            &mut sockets,
866            &shared,
867            &poll_config,
868            now,
869        );
870
871        let reply = shared.rx_ring.pop().expect("expected ICMP echo reply");
872        let eth = EthernetFrame::new_checked(&reply).expect("valid ethernet frame");
873        assert_eq!(eth.src_addr(), EthernetAddress(poll_config.gateway_mac));
874        assert_eq!(eth.dst_addr(), EthernetAddress(poll_config.guest_mac));
875        assert_eq!(eth.ethertype(), EthernetProtocol::Ipv4);
876
877        let ipv4 = Ipv4Packet::new_checked(eth.payload()).expect("valid IPv4 packet");
878        assert_eq!(Ipv4Addr::from(ipv4.src_addr()), poll_config.gateway_ipv4);
879        assert_eq!(Ipv4Addr::from(ipv4.dst_addr()), poll_config.guest_ipv4);
880        assert_eq!(ipv4.next_header(), IpProtocol::Icmp);
881
882        let icmp = Icmpv4Packet::new_checked(ipv4.payload()).expect("valid ICMP packet");
883        let icmp_repr = Icmpv4Repr::parse(&icmp, &ChecksumCapabilities::default())
884            .expect("valid ICMP echo reply");
885        assert_eq!(
886            icmp_repr,
887            Icmpv4Repr::EchoReply {
888                ident: 0x1234,
889                seq_no: 0xABCD,
890                data: b"ping",
891            }
892        );
893    }
894
895    #[test]
896    fn external_icmp_echo_requests_are_not_answered_locally() {
897        fn drive_one_frame(
898            device: &mut SmoltcpDevice,
899            iface: &mut Interface,
900            sockets: &mut SocketSet<'_>,
901            shared: &Arc<SharedState>,
902            poll_config: &PollLoopConfig,
903            now: Instant,
904        ) {
905            let frame = device.stage_next_frame().expect("expected staged frame");
906            if handle_gateway_icmp_echo(frame, poll_config, shared) {
907                device.drop_staged_frame();
908                return;
909            }
910            let _ = iface.poll_ingress_single(now, device, sockets);
911            let _ = iface.poll_egress(now, device, sockets);
912        }
913
914        let shared = Arc::new(SharedState::new(4));
915        let poll_config = PollLoopConfig {
916            gateway_mac: [0x02, 0x00, 0x00, 0x00, 0x00, 0x01],
917            guest_mac: [0x02, 0x00, 0x00, 0x00, 0x00, 0x02],
918            gateway_ipv4: Ipv4Addr::new(100, 96, 0, 1),
919            guest_ipv4: Ipv4Addr::new(100, 96, 0, 2),
920            gateway_ipv6: Ipv6Addr::LOCALHOST,
921            mtu: 1500,
922        };
923        let mut device = SmoltcpDevice::new(shared.clone(), poll_config.mtu);
924        let mut iface = create_interface(&mut device, &poll_config);
925        let mut sockets = SocketSet::new(vec![]);
926        let now = smoltcp_now();
927
928        shared
929            .tx_ring
930            .push(build_arp_request_frame(
931                poll_config.guest_mac,
932                poll_config.guest_ipv4.octets(),
933                poll_config.gateway_ipv4.octets(),
934            ))
935            .unwrap();
936        shared
937            .tx_ring
938            .push(build_icmpv4_echo_frame(
939                poll_config.guest_mac,
940                poll_config.gateway_mac,
941                poll_config.guest_ipv4.octets(),
942                [142, 251, 216, 46],
943                0x1234,
944                0xABCD,
945                b"ping",
946            ))
947            .unwrap();
948
949        drive_one_frame(
950            &mut device,
951            &mut iface,
952            &mut sockets,
953            &shared,
954            &poll_config,
955            now,
956        );
957        let _ = shared.rx_ring.pop().expect("expected ARP reply");
958
959        drive_one_frame(
960            &mut device,
961            &mut iface,
962            &mut sockets,
963            &shared,
964            &poll_config,
965            now,
966        );
967        assert!(
968            shared.rx_ring.pop().is_none(),
969            "external ICMP should not be answered locally"
970        );
971    }
972}