Skip to main content

fakecloud_ec2/runtime/
firewall.rs

1//! Security-group + network-ACL packet filtering (issue #1745 phase 3).
2//!
3//! Phase 2 isolates instances at L3 by giving each subnet its own daemon
4//! bridge. That stops cross-VPC traffic but does nothing *within* a subnet —
5//! security-group and NACL rules still block nothing. This module closes that
6//! gap by translating the SG/NACL model into an **nftables** ruleset and
7//! applying it on the host, scoped to fakecloud's per-subnet bridges.
8//!
9//! ## Why nftables, and why opt-in
10//!
11//! Real packet filtering needs `CAP_NET_ADMIN`, which instance containers
12//! deliberately don't have. nftables (over iptables) is chosen for its atomic
13//! ruleset swaps — a clean fit for the dynamic Authorize/Revoke churn of
14//! security groups. Because applying host firewall rules is privileged and can
15//! interfere with a user's own networking, enforcement is **opt-in** via
16//! `FAKECLOUD_EC2_SG_ENFORCEMENT` and **degrades gracefully**: when nft or
17//! `CAP_NET_ADMIN` is missing (CI, Docker Desktop, rootless podman) the driver
18//! logs one warning and falls back to metadata-only — phase-2 isolation still
19//! holds, exactly as before (no regression).
20//!
21//! ## What's tested where
22//!
23//! The translation from the SG/NACL model to the nft ruleset
24//! ([`render_ruleset`]) is pure and exhaustively unit-tested. The apply path
25//! shells out to `nft -f -`; it cannot be exercised in CI (no `CAP_NET_ADMIN`),
26//! so it is kept thin and the *generated ruleset* is the verified artifact.
27
28use std::collections::BTreeMap;
29
30/// A single allow rule flattened out of a security group: one protocol/port
31/// range from one CIDR (referenced-group and prefix-list sources are resolved
32/// to CIDRs by the caller, or dropped when they can't be).
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct FirewallRule {
35    /// `tcp` | `udp` | `icmp` | `-1` (all protocols).
36    pub protocol: String,
37    /// Port range; `-1`/`-1` means "all ports" (omit the port match).
38    pub from_port: i64,
39    pub to_port: i64,
40    /// Source (ingress) / destination (egress) IPv4 CIDR. `None` = anywhere.
41    pub cidr: Option<String>,
42}
43
44/// One instance's firewall view: its address on the subnet bridge plus the
45/// ingress/egress rules flattened from every security group attached to it.
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct InstanceFirewall {
48    pub private_ip: String,
49    pub ingress: Vec<FirewallRule>,
50    pub egress: Vec<FirewallRule>,
51}
52
53/// One running instance's flattened firewall view, keyed by both its id (for
54/// the k8s NetworkPolicy `podSelector`) and its IP (for nft). The shared
55/// intermediate the service layer produces from EC2 state; the nft model
56/// builder and the k8s NetworkPolicy builder both consume it.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub struct InstanceRules {
59    pub instance_id: String,
60    pub subnet_id: String,
61    pub private_ip: String,
62    pub ingress: Vec<FirewallRule>,
63    pub egress: Vec<FirewallRule>,
64}
65
66/// A subnet-level NACL entry. NACLs are stateless and apply to the whole
67/// subnet; AWS evaluates them in ascending `rule_number` order, first match
68/// wins (so a lower-numbered `allow` shadows a higher-numbered `deny` for the
69/// same traffic).
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct NaclRule {
72    /// AWS rule number; lower numbers evaluate first.
73    pub rule_number: i64,
74    pub egress: bool,
75    /// True = allow, false = deny.
76    pub allow: bool,
77    pub protocol: String,
78    pub from_port: i64,
79    pub to_port: i64,
80    pub cidr: Option<String>,
81}
82
83/// Everything needed to render the firewall for one subnet bridge.
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub struct SubnetFirewall {
86    /// The daemon network name (`fakecloud-subnet-<id>`); doubles as the nft
87    /// chain comment so a human reading `nft list ruleset` can see which subnet
88    /// a rule belongs to.
89    pub network_name: String,
90    pub instances: Vec<InstanceFirewall>,
91    pub nacl: Vec<NaclRule>,
92}
93
94/// The nftables table fakecloud owns. Kept in its own table so a full
95/// `flush table` + re-add is an atomic, side-effect-free swap that never
96/// touches docker's own iptables/nftables rules.
97const TABLE: &str = "inet fakecloud_ec2";
98
99/// Render the complete nft ruleset for a set of subnets. Deterministic
100/// (subnets and rules emitted in the order given; the caller sorts for
101/// stability) so the output can be diffed and unit-tested.
102///
103/// Model: a single `forward` chain, default-accept, that for every instance
104/// emits its allow rules followed by a default-deny to that instance's IP.
105/// Established/related traffic is accepted up front so security groups behave
106/// statefully, like AWS. NACL deny rules are emitted per subnet before the
107/// per-instance rules (stateless, subnet-wide).
108pub fn render_ruleset(subnets: &[SubnetFirewall]) -> String {
109    let mut out = String::new();
110    // `add table` first so the following `flush` doesn't error on the *first*
111    // apply (when the table doesn't exist yet) — which would fail the entire
112    // `nft -f -` load and leave enforcement silently off. `add` is idempotent;
113    // `add`+`flush`+re-add is the canonical atomic-replace idiom.
114    out.push_str(&format!("add table {TABLE}\n"));
115    out.push_str(&format!("flush table {TABLE}\n"));
116    out.push_str(&format!("table {TABLE} {{\n"));
117    out.push_str("  chain forward {\n");
118    out.push_str("    type filter hook forward priority -5; policy accept;\n");
119    // Stateful: let replies through so SG rules only need to describe the
120    // opening direction, matching AWS security-group semantics.
121    out.push_str("    ct state established,related accept\n");
122
123    for subnet in subnets {
124        out.push_str(&format!("    # subnet {}\n", subnet.network_name));
125
126        // Subnet-wide NACL denies, evaluated in ascending rule-number order so
127        // a lower-numbered `allow` shadows a higher-numbered `deny` for the
128        // same traffic (AWS first-match semantics). A deny is emitted as a drop
129        // only when no earlier-numbered allow covers the identical
130        // direction/protocol/ports/CIDR — otherwise the allow wins and the deny
131        // never fires (bug-hunt 2026-06-18 finding 1.4). NACL allows ride the
132        // default-accept policy (the SG layer below still applies; NACL and SG
133        // are independent gates, both must permit).
134        let mut ordered = subnet.nacl.clone();
135        ordered.sort_by_key(|r| r.rule_number);
136        for (i, rule) in ordered.iter().enumerate() {
137            if rule.allow {
138                continue;
139            }
140            let shadowed = ordered[..i]
141                .iter()
142                .any(|earlier| earlier.allow && nacl_same_traffic(earlier, rule));
143            if shadowed {
144                continue;
145            }
146            if let Some(line) = render_nacl_drop(rule) {
147                out.push_str(&format!("    {line}\n"));
148            }
149        }
150
151        for inst in &subnet.instances {
152            // Ingress: allow matching, then default-deny to this instance.
153            for rule in &inst.ingress {
154                out.push_str(&format!(
155                    "    {}\n",
156                    render_rule(rule, Direction::Ingress, &inst.private_ip)
157                ));
158            }
159            out.push_str(&format!(
160                "    ip daddr {} drop comment \"default-deny ingress\"\n",
161                inst.private_ip
162            ));
163
164            // Egress: allow matching, then default-deny from this instance.
165            for rule in &inst.egress {
166                out.push_str(&format!(
167                    "    {}\n",
168                    render_rule(rule, Direction::Egress, &inst.private_ip)
169                ));
170            }
171            out.push_str(&format!(
172                "    ip saddr {} drop comment \"default-deny egress\"\n",
173                inst.private_ip
174            ));
175        }
176    }
177
178    out.push_str("  }\n");
179    out.push_str("}\n");
180    out
181}
182
183/// The bridge-family table fakecloud owns for **same-subnet L2 enforcement**.
184/// Kept separate from the `inet` table so it can be applied in its own
185/// best-effort `nft -f -` — a kernel without `nf_conntrack_bridge` rejects the
186/// `ct state` line, and isolating it means that failure never takes the `inet`
187/// table (cross-subnet routed enforcement) down with it.
188const BRIDGE_TABLE: &str = "bridge fakecloud_ec2_l2";
189
190/// Render the **bridge-family** mirror of [`render_ruleset`].
191///
192/// Traffic between two containers on the *same* fakecloud subnet bridge is
193/// L2-switched between bridge ports and only reaches the `inet` `forward` hook
194/// when `bridge-nf-call-iptables=1` — and on some kernels/runners not even
195/// then. The `bridge`-family `forward` hook sees those forwarded frames
196/// directly, so mirroring the SG/NACL drops here makes same-subnet enforcement
197/// hold regardless of the bridge-netfilter sysctl. IPv4 matches are guarded
198/// with `ether type ip` (required in the bridge family before an `ip` match);
199/// `ct state established,related` keeps replies flowing statefully via
200/// `nf_conntrack_bridge`.
201pub fn render_bridge_ruleset(subnets: &[SubnetFirewall]) -> String {
202    let mut out = String::new();
203    out.push_str(&format!("add table {BRIDGE_TABLE}\n"));
204    out.push_str(&format!("flush table {BRIDGE_TABLE}\n"));
205    out.push_str(&format!("table {BRIDGE_TABLE} {{\n"));
206    out.push_str("  chain forward {\n");
207    // Lower (earlier) priority than the default bridge filter so our decision
208    // lands before anything else in the bridge path.
209    out.push_str("    type filter hook forward priority -300; policy accept;\n");
210    out.push_str("    ct state established,related accept\n");
211
212    for subnet in subnets {
213        out.push_str(&format!("    # subnet {}\n", subnet.network_name));
214
215        let mut ordered = subnet.nacl.clone();
216        ordered.sort_by_key(|r| r.rule_number);
217        for (i, rule) in ordered.iter().enumerate() {
218            if rule.allow {
219                continue;
220            }
221            let shadowed = ordered[..i]
222                .iter()
223                .any(|earlier| earlier.allow && nacl_same_traffic(earlier, rule));
224            if shadowed {
225                continue;
226            }
227            if let Some(line) = render_nacl_drop(rule) {
228                out.push_str(&format!("    ether type ip {line}\n"));
229            }
230        }
231
232        for inst in &subnet.instances {
233            for rule in &inst.ingress {
234                out.push_str(&format!(
235                    "    ether type ip {}\n",
236                    render_rule(rule, Direction::Ingress, &inst.private_ip)
237                ));
238            }
239            out.push_str(&format!(
240                "    ether type ip ip daddr {} drop comment \"default-deny ingress\"\n",
241                inst.private_ip
242            ));
243
244            for rule in &inst.egress {
245                out.push_str(&format!(
246                    "    ether type ip {}\n",
247                    render_rule(rule, Direction::Egress, &inst.private_ip)
248                ));
249            }
250            out.push_str(&format!(
251                "    ether type ip ip saddr {} drop comment \"default-deny egress\"\n",
252                inst.private_ip
253            ));
254        }
255    }
256
257    out.push_str("  }\n");
258    out.push_str("}\n");
259    out
260}
261
262#[derive(Clone, Copy)]
263enum Direction {
264    Ingress,
265    Egress,
266}
267
268/// Render one allow rule. Ingress matches on `ip daddr <instance>` (+ optional
269/// `ip saddr <cidr>`); egress mirrors it.
270fn render_rule(rule: &FirewallRule, dir: Direction, instance_ip: &str) -> String {
271    let mut parts = Vec::new();
272    match dir {
273        Direction::Ingress => {
274            parts.push(format!("ip daddr {instance_ip}"));
275            if let Some(cidr) = normalized_cidr(&rule.cidr) {
276                parts.push(format!("ip saddr {cidr}"));
277            }
278        }
279        Direction::Egress => {
280            parts.push(format!("ip saddr {instance_ip}"));
281            if let Some(cidr) = normalized_cidr(&rule.cidr) {
282                parts.push(format!("ip daddr {cidr}"));
283            }
284        }
285    }
286    push_proto_ports(&mut parts, &rule.protocol, rule.from_port, rule.to_port);
287    parts.push("accept".to_string());
288    parts.join(" ")
289}
290
291/// Whether two NACL entries match the *same* traffic (same direction,
292/// protocol, port range, CIDR) — used to decide when a lower-numbered allow
293/// shadows a higher-numbered deny. Conservative: only exact matches shadow, so
294/// partially-overlapping rules still emit their drop (safer to over-deny than
295/// to silently allow).
296fn nacl_same_traffic(a: &NaclRule, b: &NaclRule) -> bool {
297    a.egress == b.egress
298        && a.protocol == b.protocol
299        && a.from_port == b.from_port
300        && a.to_port == b.to_port
301        && a.cidr == b.cidr
302}
303
304/// Render a NACL deny as a drop line scoped to its direction + match. Returns
305/// `None` for an allow rule (allows are the default-accept policy; only denies
306/// need an explicit line).
307fn render_nacl_drop(rule: &NaclRule) -> Option<String> {
308    if rule.allow {
309        return None;
310    }
311    let mut parts = Vec::new();
312    if let Some(cidr) = normalized_cidr(&rule.cidr) {
313        // Deny traffic from (ingress) / to (egress) the CIDR.
314        if rule.egress {
315            parts.push(format!("ip daddr {cidr}"));
316        } else {
317            parts.push(format!("ip saddr {cidr}"));
318        }
319    }
320    push_proto_ports(&mut parts, &rule.protocol, rule.from_port, rule.to_port);
321    parts.push("drop".to_string());
322    parts.push("comment \"nacl-deny\"".to_string());
323    Some(parts.join(" "))
324}
325
326/// Append protocol + (for tcp/udp) destination-port matching to an nft rule.
327/// Protocol `-1` matches everything (no clause); a `-1` port range likewise
328/// omits the port match.
329fn push_proto_ports(parts: &mut Vec<String>, protocol: &str, from: i64, to: i64) {
330    match protocol {
331        "-1" | "" => {}
332        "icmp" | "1" => parts.push("ip protocol icmp".to_string()),
333        proto @ ("tcp" | "udp" | "6" | "17") => {
334            let p = match proto {
335                "6" => "tcp",
336                "17" => "udp",
337                other => other,
338            };
339            parts.push(p.to_string());
340            if from >= 0 && to >= 0 {
341                if from == to {
342                    parts.push(format!("dport {from}"));
343                } else {
344                    parts.push(format!("dport {from}-{to}"));
345                }
346            }
347        }
348        // An unrecognized protocol is interpolated into the nft script, so
349        // restrict it to the protocol-token charset `[a-z0-9-]` to avoid
350        // ruleset injection (finding 2.2); anything else emits no proto match.
351        other if other.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') => {
352            parts.push(format!("ip protocol {other}"))
353        }
354        _ => {}
355    }
356}
357
358/// Drop `0.0.0.0/0` (which nft rejects as a no-op match) to `None`, and strip a
359/// redundant `/32` host suffix so single-host rules read cleanly.
360///
361/// Also **sanitizes**: the CIDR comes from an Authorize/RevokeSecurityGroup
362/// param and is interpolated raw into the `nft -f -` script, so a value
363/// containing nft metacharacters (whitespace, `;`, `{`, newline, …) could
364/// inject ruleset syntax. Anything outside the IPv4/IPv6-CIDR character set
365/// `[0-9a-fA-F.:/]` is rejected to `None` (the match clause is dropped, never
366/// the whole rule), closing that injection surface (bug-hunt 2026-06-18
367/// finding 2.2).
368fn normalized_cidr(cidr: &Option<String>) -> Option<String> {
369    let c = cidr.as_deref()?;
370    if c == "0.0.0.0/0" || c.is_empty() {
371        return None;
372    }
373    if !c
374        .chars()
375        .all(|ch| ch.is_ascii_hexdigit() || matches!(ch, '.' | ':' | '/'))
376    {
377        return None;
378    }
379    Some(c.trim_end_matches("/32").to_string())
380}
381
382/// How security-group enforcement is backed in this process.
383#[derive(Debug, Clone, Copy, PartialEq, Eq)]
384pub enum EnforcementMode {
385    /// nftables on the host (requires `CAP_NET_ADMIN` + `nft`).
386    Nftables,
387    /// Degraded: rules are tracked but not enforced (metadata-only).
388    Disabled,
389}
390
391/// Decide the enforcement mode from the environment. Enforcement is opt-in:
392/// `FAKECLOUD_EC2_SG_ENFORCEMENT` must be set to `1`/`true`/`nftables`, `nft`
393/// must be runnable, AND the daemon must run on this host's network namespace
394/// (`host_local`). `env`, `host_local`, and `nft_probe` are injected so the
395/// decision is unit-testable without touching the environment or running `nft`.
396///
397/// `host_local` guards the false-positive on Docker Desktop / podman-machine
398/// (macOS/Windows): there the per-subnet bridges live inside the daemon's Linux
399/// VM, so `nft` on the host installs rules against the wrong netfilter and
400/// silently filters nothing — yet the probe would pass on a Linux box. We treat
401/// only a native-Linux host as able to filter (bug-hunt 2026-06-18 finding 1.5),
402/// so `enforced` never claims active enforcement that can't take effect.
403pub fn resolve_enforcement_mode(
404    env: Option<&str>,
405    host_local: bool,
406    nft_probe: impl FnOnce() -> bool,
407) -> EnforcementMode {
408    let opted_in = matches!(
409        env.map(|v| v.to_ascii_lowercase()).as_deref(),
410        Some("1") | Some("true") | Some("nftables") | Some("on")
411    );
412    if !opted_in || !host_local {
413        return EnforcementMode::Disabled;
414    }
415    if nft_probe() {
416        EnforcementMode::Nftables
417    } else {
418        EnforcementMode::Disabled
419    }
420}
421
422/// Whether the container daemon shares this process's network namespace, so
423/// host nftables rules actually see the inter-container traffic. True only on a
424/// native-Linux host; Docker Desktop / podman-machine on macOS/Windows run the
425/// daemon in a separate Linux VM. (Honest default; can be overridden by the
426/// caller when fakecloud and the daemon are known to share a netns.)
427pub fn host_shares_daemon_netns() -> bool {
428    cfg!(target_os = "linux")
429}
430
431/// True when `nft list ruleset` runs successfully — i.e. nft exists and this
432/// process holds enough capability to read the ruleset (a good proxy for being
433/// able to write it).
434pub fn nft_available() -> bool {
435    std::process::Command::new("nft")
436        .args(["list", "ruleset"])
437        .stdout(std::process::Stdio::null())
438        .stderr(std::process::Stdio::null())
439        .status()
440        .map(|s| s.success())
441        .unwrap_or(false)
442}
443
444/// Group instances by their subnet network name into the per-subnet model the
445/// renderer consumes. Pure helper so the service layer can build the model from
446/// its own state without depending on render internals.
447pub fn group_by_subnet(
448    instances: Vec<(String, InstanceFirewall)>,
449    nacls: BTreeMap<String, Vec<NaclRule>>,
450) -> Vec<SubnetFirewall> {
451    let mut by_net: BTreeMap<String, Vec<InstanceFirewall>> = BTreeMap::new();
452    for (network_name, inst) in instances {
453        by_net.entry(network_name).or_default().push(inst);
454    }
455    by_net
456        .into_iter()
457        .map(|(network_name, mut instances)| {
458            instances.sort_by(|a, b| a.private_ip.cmp(&b.private_ip));
459            let nacl = nacls.get(&network_name).cloned().unwrap_or_default();
460            SubnetFirewall {
461                network_name,
462                instances,
463                nacl,
464            }
465        })
466        .collect()
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472
473    fn tcp(port: i64, cidr: Option<&str>) -> FirewallRule {
474        FirewallRule {
475            protocol: "tcp".into(),
476            from_port: port,
477            to_port: port,
478            cidr: cidr.map(str::to_string),
479        }
480    }
481
482    #[test]
483    fn renders_allow_then_default_deny_for_ingress() {
484        let model = vec![SubnetFirewall {
485            network_name: "fakecloud-subnet-a".into(),
486            instances: vec![InstanceFirewall {
487                private_ip: "172.30.0.2".into(),
488                ingress: vec![tcp(22, Some("10.0.0.0/8"))],
489                egress: vec![],
490            }],
491            nacl: vec![],
492        }];
493        let rs = render_ruleset(&model);
494        // `add table` must precede `flush table` so the first apply (table
495        // absent) doesn't error and abort the whole ruleset load.
496        let add = rs.find("add table inet fakecloud_ec2").expect("add table");
497        let flush = rs
498            .find("flush table inet fakecloud_ec2")
499            .expect("flush table");
500        assert!(add < flush, "add table must come before flush:\n{rs}");
501        assert!(rs.contains("ct state established,related accept"));
502        assert!(rs.contains("ip daddr 172.30.0.2 ip saddr 10.0.0.0/8 tcp dport 22 accept"));
503        assert!(rs.contains("ip daddr 172.30.0.2 drop comment \"default-deny ingress\""));
504        // egress had no explicit allows -> still a default-deny line
505        assert!(rs.contains("ip saddr 172.30.0.2 drop comment \"default-deny egress\""));
506    }
507
508    #[test]
509    fn bridge_ruleset_mirrors_inet_with_ether_type_guard() {
510        let model = vec![SubnetFirewall {
511            network_name: "fakecloud-subnet-a".into(),
512            instances: vec![InstanceFirewall {
513                private_ip: "172.30.0.2".into(),
514                ingress: vec![tcp(22, Some("10.0.0.0/8"))],
515                egress: vec![],
516            }],
517            nacl: vec![],
518        }];
519        let rs = render_bridge_ruleset(&model);
520        // Its own bridge-family table, atomically add-before-flush.
521        let add = rs
522            .find("add table bridge fakecloud_ec2_l2")
523            .expect("add table");
524        let flush = rs
525            .find("flush table bridge fakecloud_ec2_l2")
526            .expect("flush table");
527        assert!(add < flush, "add table must come before flush:\n{rs}");
528        // Bridge-family forward hook + conntrack for stateful replies.
529        assert!(rs.contains("type filter hook forward priority -300; policy accept;"));
530        assert!(rs.contains("ct state established,related accept"));
531        // Every IPv4 match is guarded with `ether type ip` (required in the
532        // bridge family before an `ip` match).
533        assert!(rs
534            .contains("ether type ip ip daddr 172.30.0.2 ip saddr 10.0.0.0/8 tcp dport 22 accept"));
535        assert!(
536            rs.contains("ether type ip ip daddr 172.30.0.2 drop comment \"default-deny ingress\"")
537        );
538        assert!(
539            rs.contains("ether type ip ip saddr 172.30.0.2 drop comment \"default-deny egress\"")
540        );
541        // No bare `ip daddr`/`ip saddr` outside an `ether type ip` guard.
542        for line in rs.lines().map(str::trim) {
543            if line.starts_with("ip daddr") || line.starts_with("ip saddr") {
544                panic!("unguarded ip match in bridge family:\n{line}");
545            }
546        }
547    }
548
549    #[test]
550    fn all_protocols_and_anywhere_omit_match_clauses() {
551        let rule = FirewallRule {
552            protocol: "-1".into(),
553            from_port: -1,
554            to_port: -1,
555            cidr: Some("0.0.0.0/0".into()),
556        };
557        let line = render_rule(&rule, Direction::Ingress, "172.30.0.5");
558        // no saddr (anywhere), no proto, no port:
559        assert_eq!(line, "ip daddr 172.30.0.5 accept");
560    }
561
562    #[test]
563    fn port_range_and_single_port() {
564        let range = FirewallRule {
565            protocol: "tcp".into(),
566            from_port: 8000,
567            to_port: 8100,
568            cidr: None,
569        };
570        assert!(render_rule(&range, Direction::Egress, "172.30.0.9")
571            .contains("tcp dport 8000-8100 accept"));
572        assert!(
573            render_rule(&tcp(443, None), Direction::Ingress, "172.30.0.9")
574                .contains("tcp dport 443 accept")
575        );
576    }
577
578    #[test]
579    fn icmp_and_numeric_protocols() {
580        let icmp = FirewallRule {
581            protocol: "icmp".into(),
582            from_port: -1,
583            to_port: -1,
584            cidr: None,
585        };
586        assert!(render_rule(&icmp, Direction::Ingress, "172.30.0.2").contains("ip protocol icmp"));
587        let udp = FirewallRule {
588            protocol: "17".into(),
589            from_port: 53,
590            to_port: 53,
591            cidr: None,
592        };
593        assert!(render_rule(&udp, Direction::Ingress, "172.30.0.2").contains("udp dport 53"));
594    }
595
596    #[test]
597    fn host_cidr_strips_slash_32() {
598        let r = tcp(22, Some("203.0.113.7/32"));
599        assert!(render_rule(&r, Direction::Ingress, "172.30.0.2")
600            .contains("ip saddr 203.0.113.7 tcp dport 22"));
601    }
602
603    #[test]
604    fn cidr_with_nft_metacharacters_is_dropped_not_injected() {
605        // A CIDR carrying nft syntax (`;`, spaces, words) must never reach the
606        // `nft -f -` script (finding 2.2). The match clause is omitted; the
607        // rule still renders safely and terminates in `accept`.
608        let r = tcp(22, Some("10.0.0.0/8; drop comment \"x\""));
609        let line = render_rule(&r, Direction::Ingress, "172.30.0.2");
610        assert!(!line.contains(';'), "no injected semicolon: {line}");
611        assert!(!line.contains("comment"), "no injected tokens: {line}");
612        assert!(
613            !line.contains("ip saddr"),
614            "malformed cidr clause omitted: {line}"
615        );
616        assert!(line.ends_with("accept"), "rule still valid: {line}");
617    }
618
619    #[test]
620    fn unknown_protocol_with_bad_chars_emits_no_proto_match() {
621        let r = FirewallRule {
622            protocol: "tcp; drop".into(),
623            from_port: -1,
624            to_port: -1,
625            cidr: None,
626        };
627        let line = render_rule(&r, Direction::Ingress, "172.30.0.2");
628        assert!(
629            !line.contains(';') && !line.contains("ip protocol"),
630            "{line}"
631        );
632        assert_eq!(line, "ip daddr 172.30.0.2 accept");
633    }
634
635    #[test]
636    fn nacl_deny_emitted_before_instance_rules() {
637        let model = vec![SubnetFirewall {
638            network_name: "fakecloud-subnet-a".into(),
639            instances: vec![InstanceFirewall {
640                private_ip: "172.30.0.2".into(),
641                ingress: vec![],
642                egress: vec![],
643            }],
644            nacl: vec![NaclRule {
645                rule_number: 100,
646                egress: false,
647                allow: false,
648                protocol: "tcp".into(),
649                from_port: 3389,
650                to_port: 3389,
651                cidr: Some("198.51.100.0/24".into()),
652            }],
653        }];
654        let rs = render_ruleset(&model);
655        let deny = rs
656            .find("ip saddr 198.51.100.0/24 tcp dport 3389 drop")
657            .unwrap();
658        let inst = rs.find("ip daddr 172.30.0.2 drop").unwrap();
659        assert!(
660            deny < inst,
661            "nacl deny must precede the instance default-deny"
662        );
663        // allow NACL entries produce no explicit line
664        assert!(!rs.contains("nacl-allow"));
665    }
666
667    #[test]
668    fn nacl_lower_numbered_allow_shadows_higher_numbered_deny() {
669        // AWS first-match-by-rule-number: `100 allow tcp/22 10/8` must win over
670        // `200 deny tcp/22 10/8`, so the deny is NOT emitted (finding 1.4).
671        let nacl_entry = |rule_number, allow| NaclRule {
672            rule_number,
673            egress: false,
674            allow,
675            protocol: "tcp".into(),
676            from_port: 22,
677            to_port: 22,
678            cidr: Some("10.0.0.0/8".into()),
679        };
680        let model = vec![SubnetFirewall {
681            network_name: "fakecloud-subnet-a".into(),
682            instances: vec![InstanceFirewall {
683                private_ip: "172.30.0.2".into(),
684                ingress: vec![],
685                egress: vec![],
686            }],
687            // Intentionally out of order to exercise the sort.
688            nacl: vec![nacl_entry(200, false), nacl_entry(100, true)],
689        }];
690        let rs = render_ruleset(&model);
691        assert!(
692            !rs.contains("ip saddr 10.0.0.0/8 tcp dport 22 drop"),
693            "a lower-numbered allow must shadow the deny:\n{rs}"
694        );
695
696        // Reverse the precedence: deny 100 before allow 200 -> deny fires.
697        let model2 = vec![SubnetFirewall {
698            network_name: "fakecloud-subnet-a".into(),
699            instances: vec![],
700            nacl: vec![nacl_entry(100, false), nacl_entry(200, true)],
701        }];
702        assert!(render_ruleset(&model2).contains("ip saddr 10.0.0.0/8 tcp dport 22 drop"));
703    }
704
705    #[test]
706    fn enforcement_mode_is_opt_in_and_capability_gated() {
707        // not opted in -> disabled regardless of nft availability / host
708        assert_eq!(
709            resolve_enforcement_mode(None, true, || true),
710            EnforcementMode::Disabled
711        );
712        assert_eq!(
713            resolve_enforcement_mode(Some("0"), true, || true),
714            EnforcementMode::Disabled
715        );
716        // opted in but nft missing -> degrade
717        assert_eq!(
718            resolve_enforcement_mode(Some("1"), true, || false),
719            EnforcementMode::Disabled
720        );
721        // opted in + capable but daemon not host-local (Docker Desktop/VM) ->
722        // degrade rather than falsely claim enforced (finding 1.5)
723        assert_eq!(
724            resolve_enforcement_mode(Some("1"), false, || true),
725            EnforcementMode::Disabled
726        );
727        // opted in + host-local + capable -> nftables
728        assert_eq!(
729            resolve_enforcement_mode(Some("nftables"), true, || true),
730            EnforcementMode::Nftables
731        );
732        assert_eq!(
733            resolve_enforcement_mode(Some("TRUE"), true, || true),
734            EnforcementMode::Nftables
735        );
736    }
737
738    #[test]
739    fn group_by_subnet_sorts_and_attaches_nacls() {
740        let instances = vec![
741            (
742                "net-a".to_string(),
743                InstanceFirewall {
744                    private_ip: "172.30.0.9".into(),
745                    ingress: vec![],
746                    egress: vec![],
747                },
748            ),
749            (
750                "net-a".to_string(),
751                InstanceFirewall {
752                    private_ip: "172.30.0.2".into(),
753                    ingress: vec![],
754                    egress: vec![],
755                },
756            ),
757        ];
758        let mut nacls = BTreeMap::new();
759        nacls.insert(
760            "net-a".to_string(),
761            vec![NaclRule {
762                rule_number: 100,
763                egress: false,
764                allow: false,
765                protocol: "-1".into(),
766                from_port: -1,
767                to_port: -1,
768                cidr: Some("10.0.0.0/8".into()),
769            }],
770        );
771        let grouped = group_by_subnet(instances, nacls);
772        assert_eq!(grouped.len(), 1);
773        assert_eq!(grouped[0].instances[0].private_ip, "172.30.0.2");
774        assert_eq!(grouped[0].instances[1].private_ip, "172.30.0.9");
775        assert_eq!(grouped[0].nacl.len(), 1);
776    }
777}