zlayer_overlay/firewall/mod.rs
1//! Inbound firewall-rule management for the overlay + API + Raft ports.
2//!
3//! On Windows this module installs three inbound-allow rules in Windows
4//! Defender Firewall via the `INetFwPolicy2` COM API:
5//!
6//! - `ZLayer Overlay (UDP)` — the Wintun/boringtun listen port
7//! - `ZLayer API (TCP)` — the daemon HTTP/gRPC port
8//! - `ZLayer Raft (TCP)` — the scheduler Raft port
9//!
10//! Rules are scoped to the **Private + Domain** profiles only. Public profile
11//! is intentionally excluded so laptops on untrusted networks (coffee-shop
12//! Wi-Fi, airport, etc.) do not start accepting inbound cluster traffic.
13//!
14//! [`ensure_overlay_rules`] is idempotent: if a rule with the same name
15//! already exists it is left in place rather than duplicated.
16//!
17//! On non-Windows targets both functions are no-ops that return `Ok(())`.
18//! Linux nodes are expected to manage their own `iptables`/`nftables` or
19//! `firewalld` state out-of-band, and macOS has its own model (`pfctl` /
20//! Application Firewall) that isn't in scope for this phase.
21
22use std::net::IpAddr;
23
24use thiserror::Error;
25
26/// Errors produced while installing or removing Windows firewall rules.
27#[derive(Error, Debug)]
28pub enum FirewallError {
29 /// A COM call failed. Includes the underlying `HRESULT` message.
30 #[error("Windows COM call failed: {0}")]
31 Com(String),
32
33 /// `CoInitializeEx` returned a failure status.
34 #[error("CoInitializeEx failed: {0}")]
35 ComInit(String),
36
37 /// The `INetFwPolicy2` interface could not be instantiated.
38 #[error("INetFwPolicy2 not available: {0}")]
39 PolicyUnavailable(String),
40
41 /// Adding a firewall rule failed. Includes the rule name.
42 #[error("Failed to add firewall rule '{name}': {reason}")]
43 AddRule {
44 /// Display name of the rule that could not be created.
45 name: String,
46 /// Underlying error message from the Windows API.
47 reason: String,
48 },
49
50 /// Removing a firewall rule failed. Includes the rule name.
51 #[error("Failed to remove firewall rule '{name}': {reason}")]
52 RemoveRule {
53 /// Display name of the rule that could not be removed.
54 name: String,
55 /// Underlying error message from the Windows API.
56 reason: String,
57 },
58
59 /// A string could not be converted to the `BSTR` / wide-string form
60 /// required by the Windows COM API.
61 #[error("String conversion failed: {0}")]
62 StringConversion(String),
63}
64
65#[cfg(windows)]
66mod windows;
67
68#[cfg(target_os = "linux")]
69mod linux;
70
71#[cfg(target_os = "macos")]
72mod macos;
73
74/// Display name of the inbound overlay (`WireGuard` UDP) firewall rule.
75pub const OVERLAY_RULE_NAME: &str = "ZLayer Overlay (UDP)";
76
77/// Display name of the inbound API (HTTP/gRPC TCP) firewall rule.
78pub const API_RULE_NAME: &str = "ZLayer API (TCP)";
79
80/// Display name of the inbound Raft (TCP) firewall rule.
81pub const RAFT_RULE_NAME: &str = "ZLayer Raft (TCP)";
82
83/// All three rule names that this module manages, in the order they are
84/// installed / removed.
85pub const MANAGED_RULE_NAMES: &[&str] = &[OVERLAY_RULE_NAME, API_RULE_NAME, RAFT_RULE_NAME];
86
87/// Ensure the three inbound allow-rules exist in Windows Defender Firewall
88/// for the overlay UDP, API TCP, and Raft TCP ports.
89///
90/// Idempotent: if a rule with the expected name already exists it is left
91/// untouched. Rules are scoped to the Private + Domain profiles only.
92///
93/// On non-Windows targets this is a no-op that returns `Ok(())`.
94///
95/// # Arguments
96///
97/// * `wg_port` — UDP inbound port for the overlay (boringtun)
98/// * `api_port` — TCP inbound port for the daemon API
99/// * `raft_port` — TCP inbound port for the Raft scheduler
100///
101/// # Errors
102///
103/// Returns a [`FirewallError`] if COM initialization fails, the
104/// `INetFwPolicy2` service is unavailable, or the Windows Firewall API
105/// rejects a rule creation (typically because the process lacks
106/// administrator privileges). On non-Windows targets this cannot fail.
107pub fn ensure_overlay_rules(
108 wg_port: u16,
109 api_port: u16,
110 raft_port: u16,
111) -> Result<(), FirewallError> {
112 #[cfg(target_os = "linux")]
113 {
114 self::linux::ensure_overlay_rules(wg_port, api_port, raft_port)
115 }
116 #[cfg(target_os = "macos")]
117 {
118 self::macos::ensure_overlay_rules(wg_port, api_port, raft_port)
119 }
120 #[cfg(windows)]
121 {
122 self::windows::ensure_overlay_rules(wg_port, api_port, raft_port)
123 }
124 #[cfg(not(any(target_os = "linux", target_os = "macos", windows)))]
125 {
126 let _ = (wg_port, api_port, raft_port);
127 Ok(())
128 }
129}
130
131/// Ensure a single dynamically-published host port is allowed inbound.
132///
133/// Used by the L4 proxy when an `OverlayMode::Shared` service publishes a
134/// free host port (`host:FREEPORT -> container_ip:port`): on a default-deny
135/// host the published port must be opened so peers on other nodes can reach
136/// the proxied service. `udp` selects the transport (UDP when true, else TCP).
137///
138/// Idempotent and best-effort, with the same non-fatal contract as
139/// [`ensure_overlay_rules`]. No-op on targets without a firewall backend.
140///
141/// # Errors
142///
143/// Returns a [`FirewallError`] only when the platform backend reports an
144/// unexpected failure (see the per-OS modules); a missing-privilege case is
145/// downgraded to a warning by the backend.
146pub fn ensure_published_port(port: u16, udp: bool) -> Result<(), FirewallError> {
147 #[cfg(target_os = "linux")]
148 {
149 self::linux::ensure_published_port(port, udp)
150 }
151 #[cfg(target_os = "macos")]
152 {
153 self::macos::ensure_published_port(port, udp)
154 }
155 #[cfg(windows)]
156 {
157 self::windows::ensure_published_port(port, udp)
158 }
159 #[cfg(not(any(target_os = "linux", target_os = "macos", windows)))]
160 {
161 let _ = (port, udp);
162 Ok(())
163 }
164}
165
166/// Remove the inbound allow-rule for a previously-published host port (the
167/// counterpart of [`ensure_published_port`]). Safe to call when nothing is
168/// installed; every backend tolerates a missing rule. No-op on targets without
169/// a firewall backend.
170pub fn remove_published_port(port: u16, udp: bool) {
171 #[cfg(target_os = "linux")]
172 {
173 self::linux::remove_published_port(port, udp);
174 }
175 #[cfg(target_os = "macos")]
176 {
177 self::macos::remove_published_port(port, udp);
178 }
179 #[cfg(windows)]
180 {
181 self::windows::remove_published_port(port, udp);
182 }
183 #[cfg(not(any(target_os = "linux", target_os = "macos", windows)))]
184 {
185 let _ = (port, udp);
186 }
187}
188
189/// Remove any ZLayer-managed inbound firewall rules that this module would
190/// otherwise install.
191///
192/// Safe to call when the rules do not exist — missing rules are treated as
193/// a successful no-op. On non-Windows targets this is a no-op that returns
194/// `Ok(())`.
195///
196/// # Errors
197///
198/// Returns a [`FirewallError`] if COM initialization fails, the
199/// `INetFwPolicy2` service is unavailable, or the Windows Firewall API
200/// rejects the remove call. "Rule not found" is not treated as an error.
201/// On non-Windows targets this cannot fail.
202pub fn remove_overlay_rules() -> Result<(), FirewallError> {
203 #[cfg(target_os = "linux")]
204 {
205 self::linux::remove_overlay_rules();
206 Ok(())
207 }
208 #[cfg(target_os = "macos")]
209 {
210 self::macos::remove_overlay_rules()
211 }
212 #[cfg(windows)]
213 {
214 self::windows::remove_overlay_rules()
215 }
216 #[cfg(not(any(target_os = "linux", target_os = "macos", windows)))]
217 {
218 Ok(())
219 }
220}
221
222/// Ensure the host firewall permits overlay traffic to/from `overlay_cidr`
223/// (e.g. the cluster CIDR `10.200.0.0/16`).
224///
225/// On a default-deny Linux host (UFW / firewalld / `iptables -P FORWARD DROP`),
226/// a container's DNS query to the node overlay IP — and inter-service overlay
227/// traffic — is dropped before it reaches `ZLayer`'s resolver. This installs a
228/// dedicated `ZLAYER-OVERLAY` chain (jumped from the top of `INPUT`/`FORWARD`)
229/// that ACCEPTs the overlay CIDR, so `ZLayer`'s own DNS and service-to-service
230/// networking work without the operator hand-authorising it. Idempotent.
231///
232/// On non-Linux targets this is a no-op that returns `Ok(())` (Windows manages
233/// per-port inbound rules via [`ensure_overlay_rules`]; macOS is out of scope).
234///
235/// # Errors
236///
237/// Returns a [`FirewallError`] only when the `iptables`/`ip6tables` binary
238/// cannot be spawned or rejects a rule. Callers should treat a failure as
239/// non-fatal (log + continue) — a restricted environment without `iptables`
240/// must not abort overlay setup.
241pub fn ensure_overlay_subnet_rules(overlay_cidr: &str) -> Result<(), FirewallError> {
242 #[cfg(target_os = "linux")]
243 {
244 self::linux::ensure_overlay_subnet_rules(overlay_cidr)
245 }
246 #[cfg(not(target_os = "linux"))]
247 {
248 let _ = overlay_cidr;
249 Ok(())
250 }
251}
252
253/// Remove the `ZLayer`-managed overlay-subnet firewall chain (the counterpart
254/// of [`ensure_overlay_subnet_rules`]). Safe to call when nothing is installed;
255/// missing rules are tolerated. No-op on non-Linux targets.
256pub fn remove_overlay_subnet_rules() {
257 #[cfg(target_os = "linux")]
258 self::linux::remove_overlay_subnet_rules();
259}
260
261/// Ensure overlay-sourced traffic is SNAT'd (masqueraded) when it egresses a
262/// non-overlay interface toward the LAN/internet, for the cluster `overlay_cidr`
263/// (e.g. `10.200.0.0/16`).
264///
265/// The host filter-table ACCEPTs (see [`ensure_overlay_subnet_rules`]) plus
266/// `ip_forward=1` get an overlay packet *forwarded* out the WAN NIC — but with
267/// its private overlay source address, so replies can never route back and every
268/// outbound connection from an overlay container hangs. This installs a
269/// `nat`-table masquerade (a dedicated `ZLAYER-OVERLAY-NAT` chain jumped from
270/// `POSTROUTING`) that rewrites the source to the host address for traffic
271/// leaving the overlay, while leaving intra-overlay (`zl-*`) forwarding alone.
272/// Idempotent.
273///
274/// On non-Linux targets this is a no-op that returns `Ok(())` (macOS/Windows use
275/// their own NAT models, out of scope here).
276///
277/// # Errors
278///
279/// Returns a [`FirewallError`] only when the `iptables`/`ip6tables` binary
280/// cannot be spawned or rejects a rule. Callers should treat a failure as
281/// non-fatal (log + continue) — a restricted environment without `iptables` must
282/// not abort overlay setup.
283pub fn ensure_overlay_masquerade(overlay_cidr: &str) -> Result<(), FirewallError> {
284 #[cfg(target_os = "linux")]
285 {
286 self::linux::ensure_overlay_masquerade(overlay_cidr)
287 }
288 #[cfg(not(target_os = "linux"))]
289 {
290 let _ = overlay_cidr;
291 Ok(())
292 }
293}
294
295/// Remove the `ZLayer`-managed overlay egress masquerade (the counterpart of
296/// [`ensure_overlay_masquerade`]). Safe to call when nothing is installed;
297/// missing rules are tolerated. No-op on non-Linux targets.
298pub fn remove_overlay_masquerade() {
299 #[cfg(target_os = "linux")]
300 self::linux::remove_overlay_masquerade();
301}
302
303/// Remove the `ZLayer`-managed per-member L3-isolation chain
304/// (`ZLAYER-OVERLAY-ISO`) and its `FORWARD` jump — the global-teardown
305/// counterpart of [`ensure_member_isolation`].
306///
307/// [`remove_member_isolation`] intentionally leaves the chain and jump resident
308/// because other members may still rely on them; this removes the whole chain on
309/// a full overlay teardown so nothing leaks. Safe to call when nothing is
310/// installed; missing rules are tolerated. No-op on non-Linux targets (macOS uses
311/// a node-side pf sub-anchor and Windows a per-network HCN vSwitch, each torn
312/// down by their own member-removal path).
313pub fn remove_overlay_isolation() {
314 #[cfg(target_os = "linux")]
315 self::linux::remove_overlay_isolation();
316}
317
318/// Install Docker-style per-network L3 isolation for one overlay member.
319///
320/// `member_ip` may reach each address in `peers` (bidirectionally), egress
321/// (LAN/internet), and the daemon `node_ip` — but NOT other networks' members or
322/// arbitrary cluster IPs within `overlay_cidr` (e.g. `10.200.0.0/16`). On Linux
323/// this is enforced via a dedicated `ZLAYER-OVERLAY-ISO` filter chain jumped from
324/// the TOP of `FORWARD` (above the blanket overlay accept), holding top-inserted
325/// `RETURN` allows for the peers/node and an appended `-d <overlay_cidr> -j DROP`
326/// catch-all. On macOS the node enforces the same policy on hairpinned VZ-guest
327/// traffic via a per-`network` `pf` table + sub-anchor.
328///
329/// `network` is the overlay network name: it keys the macOS `pf` table/anchor
330/// (one per isolated network) and is unused by the Linux backend (the pairwise
331/// rules already isolate). Idempotent.
332///
333/// On targets other than Linux/macOS this is a no-op that returns `Ok(())`
334/// (Windows uses its own networking model, out of scope here).
335///
336/// # Errors
337///
338/// Returns a [`FirewallError`] only when the `iptables` / `pfctl` binary cannot
339/// be spawned or rejects a rule. Callers should treat a failure as non-fatal
340/// (log and continue) — a restricted environment without the firewall binary
341/// must not abort overlay setup.
342pub fn ensure_member_isolation(
343 network: &str,
344 member_ip: IpAddr,
345 peers: &[IpAddr],
346 node_ip: IpAddr,
347 overlay_cidr: &str,
348) -> Result<(), FirewallError> {
349 #[cfg(target_os = "linux")]
350 {
351 self::linux::ensure_member_isolation(network, member_ip, peers, node_ip, overlay_cidr)
352 }
353 #[cfg(target_os = "macos")]
354 {
355 self::macos::ensure_member_isolation(network, member_ip, peers, node_ip, overlay_cidr)
356 }
357 #[cfg(not(any(target_os = "linux", target_os = "macos")))]
358 {
359 let _ = (network, member_ip, peers, node_ip, overlay_cidr);
360 Ok(())
361 }
362}
363
364/// Remove the per-member L3-isolation rules installed by
365/// [`ensure_member_isolation`] for `member_ip` on `network`. Best-effort and safe
366/// to call when nothing is installed; missing rules are tolerated. No-op on
367/// targets other than Linux/macOS.
368pub fn remove_member_isolation(
369 network: &str,
370 member_ip: IpAddr,
371 peers: &[IpAddr],
372 node_ip: IpAddr,
373 overlay_cidr: &str,
374) {
375 #[cfg(target_os = "linux")]
376 {
377 self::linux::remove_member_isolation(network, member_ip, peers, node_ip, overlay_cidr);
378 }
379 #[cfg(target_os = "macos")]
380 {
381 self::macos::remove_member_isolation(network, member_ip, peers, node_ip, overlay_cidr);
382 }
383 #[cfg(not(any(target_os = "linux", target_os = "macos")))]
384 {
385 let _ = (network, member_ip, peers, node_ip, overlay_cidr);
386 }
387}
388
389#[cfg(test)]
390mod tests {
391 use super::*;
392
393 /// The cross-platform [`remove_overlay_isolation`] wrapper is invoked
394 /// unconditionally on a full overlay teardown (see
395 /// `OverlaydServer::teardown_global_overlay`). It must be safe to call when
396 /// no isolation state was ever installed: a no-op on non-Linux targets, and
397 /// a tolerant best-effort cleanup on Linux (each `iptables` step swallows its
398 /// exit status). Always-run, non-root: on a box lacking `iptables` or
399 /// privileges the Linux path simply fails each step silently and returns. The
400 /// regression this guards: teardown must never panic or abort because the ISO
401 /// chain is absent, and it must be idempotent across repeated calls.
402 #[test]
403 fn remove_overlay_isolation_wrapper_is_idempotent_and_panic_free() {
404 remove_overlay_isolation();
405 remove_overlay_isolation();
406 }
407
408 /// [`remove_member_isolation`] is the per-member counterpart, also called on
409 /// the detach/teardown path. Removing isolation for a member whose rules were
410 /// never installed must be a tolerant no-op (missing-rule exit codes are
411 /// swallowed), never a panic — exercised here with throwaway addresses on a
412 /// throwaway network name so no production chain is touched.
413 #[test]
414 fn remove_member_isolation_no_state_is_panic_free() {
415 let member: IpAddr = "10.200.99.2".parse().expect("valid member ip");
416 let node: IpAddr = "10.200.0.1".parse().expect("valid node ip");
417 let peers: [IpAddr; 1] = ["10.200.99.3".parse().expect("valid peer ip")];
418 // No ensure_member_isolation was ever called for this (network, member),
419 // so every probe/delete misses — must stay a clean no-op, twice over.
420 remove_member_isolation(
421 "zl-test-never-installed",
422 member,
423 &peers,
424 node,
425 "10.200.0.0/16",
426 );
427 remove_member_isolation(
428 "zl-test-never-installed",
429 member,
430 &peers,
431 node,
432 "10.200.0.0/16",
433 );
434 }
435}