zlayer-overlay 0.13.0

Encrypted overlay networking for containers using boringtun userspace WireGuard
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
//! Windows Defender Firewall rule management via `INetFwPolicy2` COM.
//!
//! The `windows_firewall` crate on crates.io was evaluated and rejected:
//! its `FirewallRuleBuilder::profiles` takes an `Option<Profile>` (single
//! variant), so restricting a rule to Private + Domain simultaneously --
//! which is a hard requirement for this module -- would require creating
//! two separate rules per port and is not how the underlying COM API
//! models it (the profile mask is a bitflag on a single rule). We go
//! direct to `windows-rs` instead.

#![cfg(target_os = "windows")]
// This module is the Windows Firewall COM FFI boundary. Every `unsafe`
// block below has a `SAFETY:` comment explaining why the required COM
// invariants hold; the workspace-wide `-W unsafe-code` policy remains in
// force everywhere else.
#![allow(unsafe_code)]

use windows::core::BSTR;
use windows::Win32::Foundation::VARIANT_TRUE;
use windows::Win32::NetworkManagement::WindowsFirewall::{
    INetFwPolicy2, INetFwRules, NetFwPolicy2, NetFwRule, NET_FW_ACTION_ALLOW,
    NET_FW_IP_PROTOCOL_TCP, NET_FW_IP_PROTOCOL_UDP, NET_FW_PROFILE2_DOMAIN,
    NET_FW_PROFILE2_PRIVATE, NET_FW_RULE_DIR_IN,
};
use windows::Win32::System::Com::{
    CoCreateInstance, CoInitializeEx, CoUninitialize, CLSCTX_INPROC_SERVER,
    COINIT_APARTMENTTHREADED, COINIT_DISABLE_OLE1DDE,
};

use super::{FirewallError, API_RULE_NAME, MANAGED_RULE_NAMES, OVERLAY_RULE_NAME, RAFT_RULE_NAME};

/// Display name of the inbound overlay-DNS responder rule (UDP/53).
///
/// Defined here rather than in `mod.rs` because the cross-platform
/// surface (and `MANAGED_RULE_NAMES`) is Windows-agnostic; the DNS
/// responder is a Windows-only firewall concern. `remove_overlay_rules`
/// tears these down explicitly in addition to `MANAGED_RULE_NAMES`.
const DNS_UDP_RULE_NAME: &str = "ZLayer DNS (UDP)";

/// Display name of the inbound overlay-DNS responder rule (TCP/53).
const DNS_TCP_RULE_NAME: &str = "ZLayer DNS (TCP)";

/// Fixed port the overlay DNS responder listens on (both UDP and TCP).
const DNS_PORT: u16 = 53;

/// RAII guard that pairs `CoInitializeEx` with `CoUninitialize` on drop.
///
/// Windows requires COM to be initialized on the calling thread before
/// any `CoCreateInstance` calls. We initialize apartment-threaded (the
/// Windows Firewall COM server is STA) and tear down on scope exit so
/// repeated calls to `ensure_overlay_rules` don't leak COM state.
struct ComGuard;

impl ComGuard {
    fn new() -> Result<Self, FirewallError> {
        // SAFETY: CoInitializeEx is safe to call on any thread; the
        // returned HRESULT is checked below. COINIT_APARTMENTTHREADED
        // | COINIT_DISABLE_OLE1DDE is the standard non-OLE STA flag
        // combo recommended by Microsoft for modern COM clients.
        let hr = unsafe { CoInitializeEx(None, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE) };
        // S_OK, S_FALSE (already initialized on this thread), and
        // RPC_E_CHANGED_MODE (already initialized with a different
        // concurrency model -- someone else owns the apartment) are all
        // acceptable: we can still make COM calls. `HRESULT::is_err`
        // treats those three as success.
        if hr.is_err() {
            return Err(FirewallError::ComInit(format!("{hr:?}")));
        }
        Ok(Self)
    }
}

impl Drop for ComGuard {
    fn drop(&mut self) {
        // SAFETY: We only call CoUninitialize once per successful
        // CoInitializeEx, matching the documented reference-count
        // contract.
        unsafe { CoUninitialize() };
    }
}

/// Handle for talking to the live firewall policy.
///
/// Holds the `INetFwRules` collection plus the `ComGuard` so that COM
/// stays initialized for the full lifetime of the handle. The raw
/// `INetFwPolicy2` pointer isn't re-used after we pull `Rules()` off it,
/// so it is dropped at the end of [`FirewallPolicy::open`].
struct FirewallPolicy {
    rules: INetFwRules,
    // Drop order matters: `_com` is the last field, so interface
    // pointers in `rules` are released before CoUninitialize fires.
    _com: ComGuard,
}

impl FirewallPolicy {
    fn open() -> Result<Self, FirewallError> {
        let com = ComGuard::new()?;
        // SAFETY: CLSID_NetFwPolicy2 is a well-known COM class shipped
        // with every supported Windows SKU. CLSCTX_INPROC_SERVER is the
        // activation context Microsoft documents for this class.
        let policy: INetFwPolicy2 = unsafe {
            CoCreateInstance::<Option<&windows::core::IUnknown>, INetFwPolicy2>(
                &NetFwPolicy2,
                None,
                CLSCTX_INPROC_SERVER,
            )
        }
        .map_err(|e| FirewallError::PolicyUnavailable(format!("{e}")))?;

        // SAFETY: Rules is a [propget] accessor on INetFwPolicy2 that
        // returns the shared INetFwRules collection; safe to call on a
        // valid policy pointer.
        let rules = unsafe { policy.Rules() }
            .map_err(|e| FirewallError::Com(format!("INetFwPolicy2::Rules: {e}")))?;
        Ok(Self { rules, _com: com })
    }

    /// Return true if a rule with the given display name already exists.
    fn rule_exists(&self, name: &str) -> bool {
        let key = BSTR::from(name);
        // SAFETY: `Item` is the [propget] lookup on INetFwRules; it
        // returns a failing HRESULT (typically E_INVALIDARG / item not
        // found) when no rule matches, which we translate to `false`.
        unsafe { self.rules.Item(&key) }.is_ok()
    }

    /// Create and install a rule. Caller must guarantee `rule_exists`
    /// returned `false` first to keep the operation idempotent.
    fn add_rule(&self, spec: &RuleSpec<'_>) -> Result<(), FirewallError> {
        // SAFETY: CLSID_NetFwRule is also a shipped-with-Windows COM
        // class and supports CLSCTX_INPROC_SERVER.
        let rule = unsafe {
            CoCreateInstance::<Option<&windows::core::IUnknown>, _>(
                &NetFwRule,
                None,
                CLSCTX_INPROC_SERVER,
            )
        }
        .map_err(|e| FirewallError::AddRule {
            name: spec.name.to_string(),
            reason: format!("CoCreateInstance(NetFwRule): {e}"),
        })?;

        let name_bstr = BSTR::from(spec.name);
        let desc_bstr = BSTR::from(spec.description);
        let ports_bstr = BSTR::from(spec.port.to_string().as_str());
        let group_bstr = BSTR::from("ZLayer");

        let configure_and_add = || -> windows::core::Result<()> {
            use windows::Win32::NetworkManagement::WindowsFirewall::INetFwRule;
            let rule: &INetFwRule = &rule;
            // SAFETY: Every setter below is a [propput] on INetFwRule;
            // each call takes ownership of its argument or makes an
            // internal copy per the COM contract. We hold the BSTRs
            // alive until the full configure+Add sequence returns.
            unsafe {
                rule.SetName(&name_bstr)?;
                rule.SetDescription(&desc_bstr)?;
                rule.SetProtocol(spec.protocol)?;
                rule.SetLocalPorts(&ports_bstr)?;
                rule.SetDirection(NET_FW_RULE_DIR_IN)?;
                rule.SetAction(NET_FW_ACTION_ALLOW)?;
                rule.SetEnabled(VARIANT_TRUE)?;
                // Private + Domain only. Public is intentionally omitted.
                let profile_mask = NET_FW_PROFILE2_DOMAIN.0 | NET_FW_PROFILE2_PRIVATE.0;
                rule.SetProfiles(profile_mask)?;
                rule.SetGrouping(&group_bstr)?;
                self.rules.Add(rule)?;
            }
            Ok(())
        };

        configure_and_add().map_err(|e| FirewallError::AddRule {
            name: spec.name.to_string(),
            reason: format!("{e}"),
        })
    }

    /// Delete a rule by display name. Treats "not found" as success.
    fn remove_rule(&self, name: &str) -> Result<(), FirewallError> {
        if !self.rule_exists(name) {
            return Ok(());
        }
        let key = BSTR::from(name);
        // SAFETY: Remove is a mutating method on the shared INetFwRules
        // collection; the BSTR outlives the call.
        unsafe { self.rules.Remove(&key) }.map_err(|e| FirewallError::RemoveRule {
            name: name.to_string(),
            reason: format!("{e}"),
        })
    }
}

/// Parameters for a single rule we want to install. Kept as a plain
/// struct so the add-rule code path is a straight-line sequence of COM
/// setter calls rather than a matrix of conditionals.
struct RuleSpec<'a> {
    name: &'a str,
    description: &'a str,
    port: u16,
    /// `NET_FW_IP_PROTOCOL_TCP` (6) or `NET_FW_IP_PROTOCOL_UDP` (17).
    protocol: i32,
}

/// Idempotently install the three inbound rules.
pub(super) fn ensure_overlay_rules(
    wg_port: u16,
    api_port: u16,
    raft_port: u16,
) -> Result<(), FirewallError> {
    let policy = FirewallPolicy::open()?;

    let specs = [
        RuleSpec {
            name: OVERLAY_RULE_NAME,
            description: "ZLayer encrypted overlay (WireGuard/boringtun) inbound UDP",
            port: wg_port,
            protocol: NET_FW_IP_PROTOCOL_UDP.0,
        },
        RuleSpec {
            name: API_RULE_NAME,
            description: "ZLayer daemon HTTP/gRPC API inbound TCP",
            port: api_port,
            protocol: NET_FW_IP_PROTOCOL_TCP.0,
        },
        RuleSpec {
            name: RAFT_RULE_NAME,
            description: "ZLayer Raft scheduler inbound TCP",
            port: raft_port,
            protocol: NET_FW_IP_PROTOCOL_TCP.0,
        },
        RuleSpec {
            name: DNS_UDP_RULE_NAME,
            description: "ZLayer overlay DNS responder inbound UDP",
            port: DNS_PORT,
            protocol: NET_FW_IP_PROTOCOL_UDP.0,
        },
        RuleSpec {
            name: DNS_TCP_RULE_NAME,
            description: "ZLayer overlay DNS responder inbound TCP",
            port: DNS_PORT,
            protocol: NET_FW_IP_PROTOCOL_TCP.0,
        },
    ];

    for spec in specs {
        if policy.rule_exists(spec.name) {
            tracing::debug!(rule = spec.name, "firewall rule already present; skipping");
            continue;
        }
        let port = spec.port;
        let name = spec.name;
        policy.add_rule(&spec)?;
        tracing::info!(rule = name, port = port, "installed firewall rule");
    }

    Ok(())
}

/// Remove every rule this module would install. Missing rules are OK.
///
/// Covers the cross-platform `MANAGED_RULE_NAMES` (overlay / API / Raft)
/// plus the Windows-only DNS responder rules defined locally in this
/// module, since `MANAGED_RULE_NAMES` does not enumerate the latter.
pub(super) fn remove_overlay_rules() -> Result<(), FirewallError> {
    let policy = FirewallPolicy::open()?;
    for name in MANAGED_RULE_NAMES {
        policy.remove_rule(name)?;
    }
    for name in [DNS_UDP_RULE_NAME, DNS_TCP_RULE_NAME] {
        policy.remove_rule(name)?;
    }
    Ok(())
}

/// Deterministic display name for a dynamically-published host port rule.
///
/// The name is derived purely from `port` + protocol so the rule is both
/// idempotent (re-publishing the same port hits the existing rule) and
/// individually removable without tracking extra state.
fn published_port_rule_name(port: u16, udp: bool) -> String {
    format!(
        "ZLayer Published {}/{}",
        port,
        if udp { "UDP" } else { "TCP" }
    )
}

/// Idempotently install a single inbound allow-rule for a dynamically
/// published host port, scoped (like every other rule here) to the
/// Private + Domain profiles only — Public is intentionally excluded.
pub(super) fn ensure_published_port(port: u16, udp: bool) -> Result<(), FirewallError> {
    let policy = FirewallPolicy::open()?;
    let name = published_port_rule_name(port, udp);

    if policy.rule_exists(&name) {
        tracing::debug!(rule = %name, "published-port firewall rule already present; skipping");
        return Ok(());
    }

    let protocol = if udp {
        NET_FW_IP_PROTOCOL_UDP.0
    } else {
        NET_FW_IP_PROTOCOL_TCP.0
    };
    let description = if udp {
        "ZLayer dynamically-published host port inbound UDP"
    } else {
        "ZLayer dynamically-published host port inbound TCP"
    };

    let spec = RuleSpec {
        name: &name,
        description,
        port,
        protocol,
    };
    policy.add_rule(&spec)?;
    tracing::info!(rule = %name, port = port, "installed published-port firewall rule");
    Ok(())
}

/// Remove the single inbound rule installed by [`ensure_published_port`]
/// for `port` + protocol. A missing rule is treated as success.
pub(super) fn remove_published_port(port: u16, udp: bool) {
    let name = published_port_rule_name(port, udp);
    match FirewallPolicy::open() {
        Ok(policy) => {
            if let Err(e) = policy.remove_rule(&name) {
                tracing::warn!(rule = %name, error = %e, "failed to remove published-port firewall rule");
            }
        }
        Err(e) => {
            tracing::warn!(rule = %name, error = %e, "failed to open firewall policy to remove published-port rule");
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Round-trip: ensure then remove. Requires admin -- marked `#[ignore]`
    /// so it doesn't fire in the default `cargo test` run.
    #[test]
    #[ignore = "requires administrator privileges + Windows Defender Firewall service"]
    fn ensure_then_remove_roundtrip() {
        // Use high, unlikely-to-conflict ports so a real cluster's rules
        // aren't clobbered by the test.
        ensure_overlay_rules(51820, 13669, 13670).expect("ensure failed");

        let policy = FirewallPolicy::open().expect("open policy");
        assert!(policy.rule_exists(OVERLAY_RULE_NAME));
        assert!(policy.rule_exists(API_RULE_NAME));
        assert!(policy.rule_exists(RAFT_RULE_NAME));
        assert!(policy.rule_exists(DNS_UDP_RULE_NAME));
        assert!(policy.rule_exists(DNS_TCP_RULE_NAME));
        drop(policy);

        remove_overlay_rules().expect("remove failed");

        let policy = FirewallPolicy::open().expect("reopen policy");
        assert!(!policy.rule_exists(OVERLAY_RULE_NAME));
        assert!(!policy.rule_exists(API_RULE_NAME));
        assert!(!policy.rule_exists(RAFT_RULE_NAME));
        assert!(!policy.rule_exists(DNS_UDP_RULE_NAME));
        assert!(!policy.rule_exists(DNS_TCP_RULE_NAME));
    }

    /// A published-port rule round-trips: ensure installs it, remove tears
    /// it down, and re-ensuring is idempotent.
    #[test]
    #[ignore = "requires administrator privileges + Windows Defender Firewall service"]
    fn published_port_roundtrip() {
        ensure_published_port(18080, false).expect("ensure tcp");
        ensure_published_port(18080, false).expect("ensure tcp idempotent");
        ensure_published_port(18081, true).expect("ensure udp");

        let policy = FirewallPolicy::open().expect("open policy");
        assert!(policy.rule_exists(&published_port_rule_name(18080, false)));
        assert!(policy.rule_exists(&published_port_rule_name(18081, true)));
        // Different protocol on the same port is a distinct rule.
        assert!(!policy.rule_exists(&published_port_rule_name(18080, true)));
        drop(policy);

        remove_published_port(18080, false);
        remove_published_port(18081, true);
        // Removing a non-existent rule must be tolerated.
        remove_published_port(18080, false);

        let policy = FirewallPolicy::open().expect("reopen policy");
        assert!(!policy.rule_exists(&published_port_rule_name(18080, false)));
        assert!(!policy.rule_exists(&published_port_rule_name(18081, true)));
    }

    /// Calling `ensure_overlay_rules` twice in a row must not create
    /// duplicate rules or return an error.
    #[test]
    #[ignore = "requires administrator privileges + Windows Defender Firewall service"]
    fn ensure_is_idempotent() {
        // Clean slate before and after to avoid leaking rules.
        let _ = remove_overlay_rules();

        ensure_overlay_rules(51820, 13669, 13670).expect("first ensure");
        ensure_overlay_rules(51820, 13669, 13670).expect("second ensure (idempotent)");

        let policy = FirewallPolicy::open().expect("open policy");
        assert!(policy.rule_exists(OVERLAY_RULE_NAME));
        assert!(policy.rule_exists(API_RULE_NAME));
        assert!(policy.rule_exists(RAFT_RULE_NAME));
        drop(policy);

        remove_overlay_rules().expect("cleanup");
    }
}