zlayer_overlayd/netlink.rs
1//! Rust netlink helpers that replace shell-outs to `ip`/`nsenter`/`sysctl`
2//! for per-container overlay network setup.
3//!
4//! This module is populated incrementally through a phased migration.
5//! Stage 1: `move_link_into_netns_and_rename` replaces the shell pair
6//! `ip link set <name> netns <pid>` + `nsenter -t <pid> -n ip
7//! link set <name> name <new>` with a single atomic RTNETLINK
8//! `SetLink` carrying both `IFLA_NET_NS_FD` and `IFLA_IFNAME`.
9//! This bypasses the `/proc/<pid>/ns/net` access problem caused
10//! by libcontainer setting `PR_SET_DUMPABLE(false)` on the
11//! container init process under `SELinux` enforcing.
12//! Stage 2: `create_veth_pair`, `delete_link_by_name`, and
13//! `set_link_up_by_name` replace the host-side veth shell
14//! commands (`ip link add ... type veth peer name ...`,
15//! `ip link delete ...`, `ip link set ... up`) used by
16//! `overlay_manager::attach_to_interface` and the orphan
17//! sweeper. These helpers talk RTNETLINK directly via the
18//! `rtnetlink` crate (async, tokio-backed).
19//! Stage 3: `with_netns`, `add_address_to_link_by_name`, and
20//! `add_default_route_via_dev` replace the remaining
21//! container-netns shell-outs in
22//! `overlay_manager::attach_to_interface`. `with_netns`
23//! runs a closure on a dedicated OS thread that has joined
24//! the target container's network namespace via `setns(2)`,
25//! while the two new RTNETLINK helpers operate on the
26//! current netns (so they must be invoked from inside a
27//! `with_netns` closure). This removes the last three
28//! `nsenter -t <pid> -n ip ...` shell-outs used to assign
29//! the container IP, bring `eth0` / `lo` up, and add the
30//! default route.
31
32#![cfg_attr(
33 not(target_os = "linux"),
34 allow(clippy::missing_errors_doc, clippy::unused_async)
35)]
36
37use thiserror::Error;
38
39/// Errors returned by the netlink helpers in this module.
40#[derive(Debug, Error)]
41pub enum NetlinkError {
42 /// Failed to open or access a file (typically `/proc/<pid>/ns/net`).
43 #[error("io error: {0}")]
44 Io(#[from] std::io::Error),
45
46 /// The requested link was not found in the current network namespace.
47 #[error("link '{0}' not found in current netns")]
48 NotFound(String),
49
50 /// A netlink operation failed.
51 #[error("netlink operation failed: {0}")]
52 Netlink(String),
53}
54
55/// Move a link from the current network namespace into the network
56/// namespace referenced by `ns_fd`, renaming it in the same atomic
57/// operation.
58///
59/// This is the fd-based variant of [`move_link_into_netns_and_rename`].
60/// Callers that have already opened `/proc/<pid>/ns/net` (e.g. to pin
61/// the namespace across multiple operations and survive a racing
62/// container init exit) should use this form so we don't reopen the
63/// path and lose the race.
64///
65/// The single RTNETLINK `SetLink` request carries both `IFLA_NET_NS_FD`
66/// and `IFLA_IFNAME`, so the kernel performs the move and the rename
67/// atomically.
68///
69/// # Errors
70///
71/// Returns [`NetlinkError::NotFound`] if `link_name` does not exist in
72/// the current netns. Returns [`NetlinkError::Netlink`] for any other
73/// netlink-level failure (permission denied, name collision in the
74/// target netns, etc.).
75///
76/// Implemented directly against the `rtnetlink` crate (overlayd has no
77/// libcontainer dependency): a single `LinkSetRequest` carrying
78/// `setns_by_fd` + `name` performs the move and rename atomically.
79#[cfg(target_os = "linux")]
80pub fn move_link_into_netns_fd_and_rename(
81 link_name: &str,
82 ns_fd: std::os::fd::BorrowedFd<'_>,
83 new_name: &str,
84) -> Result<(), NetlinkError> {
85 use std::os::fd::AsRawFd;
86
87 // `setns` of the moved link must reference the fd while the request
88 // executes, so we drive the whole sequence on a local current-thread
89 // runtime rather than requiring an ambient tokio context. The raw fd
90 // is borrowed (the caller retains ownership of `ns_fd`).
91 let raw_fd = ns_fd.as_raw_fd();
92 let link_name = link_name.to_string();
93 let new_name = new_name.to_string();
94
95 let rt = tokio::runtime::Builder::new_current_thread()
96 .enable_all()
97 .build()
98 .map_err(|e| NetlinkError::Netlink(format!("local runtime build failed: {e}")))?;
99
100 rt.block_on(async move {
101 use futures_util::stream::TryStreamExt;
102
103 let (connection, handle, _) = rtnetlink::new_connection()
104 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
105 tokio::spawn(connection);
106
107 // Resolve the host-side interface index. Treat "No such device"
108 // as our dedicated NotFound variant so callers can distinguish
109 // "nothing to move" from real failures.
110 let link = handle
111 .link()
112 .get()
113 .match_name(link_name.clone())
114 .execute()
115 .try_next()
116 .await
117 .map_err(|e| {
118 let msg = e.to_string();
119 if msg.contains("No such device") {
120 NetlinkError::NotFound(link_name.clone())
121 } else {
122 NetlinkError::Netlink(format!("link lookup failed for {link_name}: {msg}"))
123 }
124 })?
125 .ok_or_else(|| NetlinkError::NotFound(link_name.clone()))?;
126
127 let index = link.header.index;
128
129 // Atomically move the link into the target netns and rename it.
130 handle
131 .link()
132 .set(index)
133 .setns_by_fd(raw_fd)
134 .name(new_name.clone())
135 .execute()
136 .await
137 .map_err(|e| {
138 NetlinkError::Netlink(format!(
139 "setns_by_fd(index={index}, new_name={new_name}) failed: {e}"
140 ))
141 })
142 })
143}
144
145/// Stub for non-Linux Unix platforms (macOS/BSD).
146///
147/// Not emitted on Windows: `attach_container` (the sole caller chain) is
148/// itself gated `#[cfg(target_os = "linux")]` in `server.rs`, so there are
149/// no Windows callers, and the `BorrowedFd` parameter type is Unix-only.
150///
151/// # Errors
152///
153/// Always returns [`NetlinkError::Netlink`] — this function is unsupported on
154/// the current target.
155#[cfg(all(not(target_os = "linux"), unix))]
156pub fn move_link_into_netns_fd_and_rename(
157 _link_name: &str,
158 _ns_fd: std::os::fd::BorrowedFd<'_>,
159 _new_name: &str,
160) -> Result<(), NetlinkError> {
161 Err(NetlinkError::Netlink(
162 "move_link_into_netns_fd_and_rename is only supported on Linux".to_string(),
163 ))
164}
165
166/// Move a link from the current network namespace into the target PID's
167/// network namespace, renaming it in the same atomic operation.
168///
169/// Thin wrapper around [`move_link_into_netns_fd_and_rename`] that
170/// opens `/proc/<target_pid>/ns/net` then delegates. Kept for
171/// backward compatibility and for callers that only need a single
172/// operation on the target netns. Callers that need to perform
173/// multiple operations on the same netns (and want to survive a
174/// racing exit of the container init process) should open the fd
175/// themselves and call [`move_link_into_netns_fd_and_rename`]
176/// directly.
177///
178/// # Errors
179///
180/// Returns [`NetlinkError::Io`] if `/proc/<target_pid>/ns/net` cannot be
181/// opened (e.g. the container process is gone or is not dumpable and we
182/// lack `CAP_SYS_PTRACE`). Returns [`NetlinkError::NotFound`] if
183/// `link_name` does not exist in the current netns. Returns
184/// [`NetlinkError::Netlink`] for any other netlink-level failure
185/// (permission denied, name collision in the target netns, etc.).
186#[cfg(target_os = "linux")]
187pub fn move_link_into_netns_and_rename(
188 link_name: &str,
189 target_pid: u32,
190 new_name: &str,
191) -> Result<(), NetlinkError> {
192 use std::os::fd::{AsFd, OwnedFd};
193
194 let ns_file = std::fs::File::open(format!("/proc/{target_pid}/ns/net"))?;
195 let ns_fd: OwnedFd = OwnedFd::from(ns_file);
196 move_link_into_netns_fd_and_rename(link_name, ns_fd.as_fd(), new_name)
197}
198
199/// Non-Linux stub: the overlay manager never calls this on non-Linux
200/// platforms (libcontainer itself is a Linux-only dep), but keeping the
201/// signature available lets `overlay_manager.rs` stay platform-agnostic.
202#[cfg(not(target_os = "linux"))]
203pub fn move_link_into_netns_and_rename(
204 _link_name: &str,
205 _target_pid: u32,
206 _new_name: &str,
207) -> Result<(), NetlinkError> {
208 Err(NetlinkError::Netlink(
209 "move_link_into_netns_and_rename is only supported on Linux".to_string(),
210 ))
211}
212
213/// Create a veth pair with the two ends named `host_name` and `peer_name`.
214///
215/// Both ends start in the current network namespace. The caller is
216/// responsible for moving the peer end into the container netns (see
217/// [`move_link_into_netns_and_rename`]) and bringing the host end up
218/// (see [`set_link_up_by_name`]).
219///
220/// Replaces the shell-out:
221/// ip link add `<host_name>` type veth peer name `<peer_name>`
222///
223/// # Errors
224///
225/// Returns [`NetlinkError::Netlink`] if RTNETLINK fails for any
226/// reason. `EEXIST` / "File exists" is surfaced verbatim so the caller
227/// can distinguish a leaked endpoint (typically a sign the orphan
228/// sweeper missed something) from a permission or interface-name
229/// problem.
230#[cfg(target_os = "linux")]
231pub async fn create_veth_pair(host_name: &str, peer_name: &str) -> Result<(), NetlinkError> {
232 let (connection, handle, _) = rtnetlink::new_connection()
233 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
234 tokio::spawn(connection);
235
236 handle
237 .link()
238 .add()
239 .veth(host_name.to_string(), peer_name.to_string())
240 .execute()
241 .await
242 .map_err(|e| {
243 let msg = e.to_string();
244 if msg.contains("File exists") || msg.contains("EEXIST") {
245 NetlinkError::Netlink(format!(
246 "veth pair already exists: host={host_name} peer={peer_name}: {msg}"
247 ))
248 } else {
249 NetlinkError::Netlink(format!(
250 "veth create failed (host={host_name}, peer={peer_name}): {msg}"
251 ))
252 }
253 })
254}
255
256/// Non-Linux stub.
257#[cfg(not(target_os = "linux"))]
258pub async fn create_veth_pair(_host_name: &str, _peer_name: &str) -> Result<(), NetlinkError> {
259 Err(NetlinkError::Netlink(
260 "create_veth_pair is only supported on Linux".to_string(),
261 ))
262}
263
264/// Delete the link by name. Idempotent: returns `Ok(())` if the link
265/// does not exist. Any other error surfaces as
266/// [`NetlinkError::Netlink`].
267///
268/// Replaces the shell-out:
269/// ip link delete `<name>`
270///
271/// Used in `overlay_manager::attach_to_interface` pre-cleanup,
272/// cleanup-on-error, and the orphan-veth sweeper.
273///
274/// # Errors
275///
276/// Returns [`NetlinkError::Netlink`] if RTNETLINK reports a failure
277/// other than `ENODEV` / "No such device" (which are treated as
278/// success so this is safe to call unconditionally).
279#[cfg(target_os = "linux")]
280pub async fn delete_link_by_name(name: &str) -> Result<(), NetlinkError> {
281 use futures_util::stream::TryStreamExt;
282
283 let (connection, handle, _) = rtnetlink::new_connection()
284 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
285 tokio::spawn(connection);
286
287 // Look up the link by name. Treat "not found" as success so the
288 // helper is safe to call unconditionally in cleanup paths.
289 let lookup = handle
290 .link()
291 .get()
292 .match_name(name.to_string())
293 .execute()
294 .try_next()
295 .await;
296
297 let link = match lookup {
298 Ok(Some(link)) => link,
299 Ok(None) => return Ok(()),
300 Err(rtnetlink::Error::NetlinkError(err)) => {
301 // libc::ENODEV == 19. netlink-packet-core reports the raw
302 // errno as a negative i32 in `code`, but the exact type has
303 // moved between versions, so match by both numeric code and
304 // the human-readable message for belt-and-suspenders safety.
305 let msg = err.to_string();
306 let is_enodev = err
307 .code
308 .is_some_and(|c| c.get().unsigned_abs() == libc::ENODEV as u32);
309 if is_enodev || msg.contains("No such device") {
310 return Ok(());
311 }
312 return Err(NetlinkError::Netlink(format!(
313 "link lookup failed for {name}: {msg}"
314 )));
315 }
316 Err(e) => {
317 let msg = e.to_string();
318 if msg.contains("No such device") {
319 return Ok(());
320 }
321 return Err(NetlinkError::Netlink(format!(
322 "link lookup failed for {name}: {msg}"
323 )));
324 }
325 };
326
327 let index = link.header.index;
328
329 handle
330 .link()
331 .del(index)
332 .execute()
333 .await
334 .map_err(|e| NetlinkError::Netlink(format!("link delete failed for {name}: {e}")))
335}
336
337/// Non-Linux stub.
338#[cfg(not(target_os = "linux"))]
339pub async fn delete_link_by_name(_name: &str) -> Result<(), NetlinkError> {
340 Err(NetlinkError::Netlink(
341 "delete_link_by_name is only supported on Linux".to_string(),
342 ))
343}
344
345/// List all network interfaces in the current netns.
346///
347/// Returns a `Vec` of `(index, name)` tuples for every link the kernel
348/// reports. Used by the orphan veth sweeper to find `veth-<pid>` and
349/// `vc-<pid>` links whose owning PID is dead, so it can clean them up
350/// via [`delete_link_by_name`].
351///
352/// Replaces the shell-out:
353/// ip -br link
354///
355/// Issues a single RTNETLINK `RTM_GETLINK` dump request and iterates
356/// the resulting stream of `LinkMessage`s. Each message contributes
357/// one `(index, name)` tuple; messages without an `IFLA_IFNAME`
358/// attribute (extremely rare in practice — the kernel always emits
359/// one for configured devices) are silently skipped.
360///
361/// # Errors
362///
363/// Returns [`NetlinkError::Netlink`] if the rtnetlink socket cannot
364/// be created or if the dump stream itself reports a failure.
365#[cfg(target_os = "linux")]
366pub async fn list_all_links() -> Result<Vec<(u32, String)>, NetlinkError> {
367 use futures_util::stream::TryStreamExt;
368 use netlink_packet_route::link::LinkAttribute;
369
370 let (connection, handle, _) = rtnetlink::new_connection()
371 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
372 tokio::spawn(connection);
373
374 let mut stream = handle.link().get().execute();
375 let mut links = Vec::new();
376
377 while let Some(msg) = stream
378 .try_next()
379 .await
380 .map_err(|e| NetlinkError::Netlink(format!("link dump failed: {e}")))?
381 {
382 // LinkHeader.index is already u32 in netlink-packet-route
383 // 0.19 — no cast needed.
384 let index = msg.header.index;
385 let Some(name) = msg.attributes.iter().find_map(|a| match a {
386 LinkAttribute::IfName(n) => Some(n.clone()),
387 _ => None,
388 }) else {
389 continue;
390 };
391 links.push((index, name));
392 }
393
394 Ok(links)
395}
396
397/// Non-Linux stub.
398#[cfg(not(target_os = "linux"))]
399pub async fn list_all_links() -> Result<Vec<(u32, String)>, NetlinkError> {
400 Err(NetlinkError::Netlink(
401 "list_all_links is only supported on Linux".to_string(),
402 ))
403}
404
405/// Set the link identified by `name` to the "up" administrative state.
406///
407/// Replaces the shell-out:
408/// ip link set `<name>` up
409///
410/// Unlike [`delete_link_by_name`] this is *not* idempotent for missing
411/// links: if the link does not exist the caller almost certainly has a
412/// bug upstream (we only call this on a veth end we just created), so
413/// we return [`NetlinkError::NotFound`] rather than silently succeeding.
414///
415/// # Errors
416///
417/// Returns [`NetlinkError::NotFound`] if no link with the given name
418/// exists in the current netns. Returns [`NetlinkError::Netlink`] for
419/// any other RTNETLINK failure (permission denied, etc.).
420#[cfg(target_os = "linux")]
421pub async fn set_link_up_by_name(name: &str) -> Result<(), NetlinkError> {
422 use futures_util::stream::TryStreamExt;
423
424 let (connection, handle, _) = rtnetlink::new_connection()
425 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
426 tokio::spawn(connection);
427
428 let link = handle
429 .link()
430 .get()
431 .match_name(name.to_string())
432 .execute()
433 .try_next()
434 .await
435 .map_err(|e| {
436 let msg = e.to_string();
437 if msg.contains("No such device") {
438 NetlinkError::NotFound(name.to_string())
439 } else {
440 NetlinkError::Netlink(format!("link lookup failed for {name}: {msg}"))
441 }
442 })?
443 .ok_or_else(|| NetlinkError::NotFound(name.to_string()))?;
444
445 let index = link.header.index;
446
447 handle
448 .link()
449 .set(index)
450 .up()
451 .execute()
452 .await
453 .map_err(|e| NetlinkError::Netlink(format!("link set up failed for {name}: {e}")))
454}
455
456/// Non-Linux stub.
457#[cfg(not(target_os = "linux"))]
458pub async fn set_link_up_by_name(_name: &str) -> Result<(), NetlinkError> {
459 Err(NetlinkError::Netlink(
460 "set_link_up_by_name is only supported on Linux".to_string(),
461 ))
462}
463
464/// Add an IP address to the link identified by `name` in the current
465/// network namespace.
466///
467/// Replaces (in combination with [`with_netns`]):
468/// nsenter -t `<pid>` -n ip \[-6\] addr add `<addr>/<prefix_len>` dev `<name>`
469///
470/// `addr` may be v4 or v6. `prefix_len` is the CIDR prefix length
471/// (24 for a `/24`, 64 for a `/64`, etc.).
472///
473/// This helper operates on the CURRENT network namespace — it looks
474/// up the interface index via a local rtnetlink socket. To target a
475/// container's netns, wrap the call inside [`with_netns`].
476///
477/// # Errors
478///
479/// Returns [`NetlinkError::NotFound`] if the link is missing. Returns
480/// [`NetlinkError::Netlink`] for any other rtnetlink failure
481/// (permission denied, EEXIST on a duplicate address, etc.).
482#[cfg(target_os = "linux")]
483pub async fn add_address_to_link_by_name(
484 name: &str,
485 addr: std::net::IpAddr,
486 prefix_len: u8,
487) -> Result<(), NetlinkError> {
488 use futures_util::stream::TryStreamExt;
489
490 let (connection, handle, _) = rtnetlink::new_connection()
491 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
492 tokio::spawn(connection);
493
494 let link = handle
495 .link()
496 .get()
497 .match_name(name.to_string())
498 .execute()
499 .try_next()
500 .await
501 .map_err(|e| {
502 let msg = e.to_string();
503 if msg.contains("No such device") {
504 NetlinkError::NotFound(name.to_string())
505 } else {
506 NetlinkError::Netlink(format!("link lookup failed for {name}: {msg}"))
507 }
508 })?
509 .ok_or_else(|| NetlinkError::NotFound(name.to_string()))?;
510
511 let index = link.header.index;
512
513 handle
514 .address()
515 .add(index, addr, prefix_len)
516 .execute()
517 .await
518 .map_err(|e| {
519 NetlinkError::Netlink(format!(
520 "address add failed for {name} ({addr}/{prefix_len}): {e}"
521 ))
522 })
523}
524
525/// Non-Linux stub.
526#[cfg(not(target_os = "linux"))]
527pub async fn add_address_to_link_by_name(
528 _name: &str,
529 _addr: std::net::IpAddr,
530 _prefix_len: u8,
531) -> Result<(), NetlinkError> {
532 Err(NetlinkError::Netlink(
533 "add_address_to_link_by_name is only supported on Linux".to_string(),
534 ))
535}
536
537/// Add a default route via the given device name in the current
538/// network namespace.
539///
540/// Replaces (in combination with [`with_netns`]):
541/// nsenter -t `<pid>` -n ip \[-6\] route add default dev `<dev_name>`
542///
543/// The route is a direct, link-scope route: no gateway, the kernel
544/// ARPs / uses NDISC on the device for destination resolution. This
545/// is the correct form for a point-to-point veth link where the peer
546/// is reachable directly.
547///
548/// For IPv4 the destination prefix is `0.0.0.0/0`. For IPv6 it is
549/// `::/0`. Controlled by `is_v6`.
550///
551/// # Errors
552///
553/// Returns [`NetlinkError::NotFound`] if the device is missing.
554/// Returns [`NetlinkError::Netlink`] for any other rtnetlink failure.
555#[cfg(target_os = "linux")]
556pub async fn add_default_route_via_dev(dev_name: &str, is_v6: bool) -> Result<(), NetlinkError> {
557 use futures_util::stream::TryStreamExt;
558 use netlink_packet_route::route::RouteScope;
559
560 let (connection, handle, _) = rtnetlink::new_connection()
561 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
562 tokio::spawn(connection);
563
564 let link = handle
565 .link()
566 .get()
567 .match_name(dev_name.to_string())
568 .execute()
569 .try_next()
570 .await
571 .map_err(|e| {
572 let msg = e.to_string();
573 if msg.contains("No such device") {
574 NetlinkError::NotFound(dev_name.to_string())
575 } else {
576 NetlinkError::Netlink(format!("link lookup failed for {dev_name}: {msg}"))
577 }
578 })?
579 .ok_or_else(|| NetlinkError::NotFound(dev_name.to_string()))?;
580
581 let oif_idx = link.header.index;
582
583 if is_v6 {
584 handle
585 .route()
586 .add()
587 .v6()
588 .destination_prefix(std::net::Ipv6Addr::UNSPECIFIED, 0)
589 .output_interface(oif_idx)
590 .scope(RouteScope::Link)
591 .execute()
592 .await
593 .map_err(|e| {
594 NetlinkError::Netlink(format!("default route add v6 via {dev_name} failed: {e}"))
595 })
596 } else {
597 handle
598 .route()
599 .add()
600 .v4()
601 .destination_prefix(std::net::Ipv4Addr::UNSPECIFIED, 0)
602 .output_interface(oif_idx)
603 .scope(RouteScope::Link)
604 .execute()
605 .await
606 .map_err(|e| {
607 NetlinkError::Netlink(format!("default route add v4 via {dev_name} failed: {e}"))
608 })
609 }
610}
611
612/// Non-Linux stub.
613#[cfg(not(target_os = "linux"))]
614pub async fn add_default_route_via_dev(_dev_name: &str, _is_v6: bool) -> Result<(), NetlinkError> {
615 Err(NetlinkError::Netlink(
616 "add_default_route_via_dev is only supported on Linux".to_string(),
617 ))
618}
619
620/// Add a default route pointing at the given gateway IP in the current
621/// network namespace.
622///
623/// Replaces (in combination with [`with_netns`]):
624/// nsenter -t `<pid>` -n ip \[-6\] route add default via `<gateway>`
625///
626/// Used by the per-service bridge attach path: containers join the
627/// service bridge via a veth pair and reach the rest of the overlay
628/// through the bridge's L3 gateway IP. The address family of the route
629/// is inferred from `gateway`.
630///
631/// # Errors
632///
633/// Returns [`NetlinkError::Netlink`] for any rtnetlink failure.
634#[cfg(target_os = "linux")]
635pub async fn add_default_route_via_gateway(gateway: std::net::IpAddr) -> Result<(), NetlinkError> {
636 let (connection, handle, _) = rtnetlink::new_connection()
637 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
638 tokio::spawn(connection);
639
640 match gateway {
641 std::net::IpAddr::V4(gw) => handle
642 .route()
643 .add()
644 .v4()
645 .destination_prefix(std::net::Ipv4Addr::UNSPECIFIED, 0)
646 .gateway(gw)
647 .execute()
648 .await
649 .map_err(|e| {
650 NetlinkError::Netlink(format!("default route add v4 via gateway {gw} failed: {e}"))
651 }),
652 std::net::IpAddr::V6(gw) => handle
653 .route()
654 .add()
655 .v6()
656 .destination_prefix(std::net::Ipv6Addr::UNSPECIFIED, 0)
657 .gateway(gw)
658 .execute()
659 .await
660 .map_err(|e| {
661 NetlinkError::Netlink(format!("default route add v6 via gateway {gw} failed: {e}"))
662 }),
663 }
664}
665
666/// Non-Linux stub.
667#[cfg(not(target_os = "linux"))]
668pub async fn add_default_route_via_gateway(_gateway: std::net::IpAddr) -> Result<(), NetlinkError> {
669 Err(NetlinkError::Netlink(
670 "add_default_route_via_gateway is only supported on Linux".to_string(),
671 ))
672}
673
674/// Add or replace a route to `dest/prefix_len` that forwards via the
675/// interface named `dev_name`. Optional `src` sets the preferred source
676/// address.
677///
678/// Replaces the shell-outs:
679/// ip route replace `<dest>/<prefix_len>` dev `<dev_name>` \[src `<src>`\]
680/// ip -6 route replace `<dest>/<prefix_len>` dev `<dev_name>` \[src `<src>`\]
681///
682/// Uses `NLM_F_REPLACE | NLM_F_CREATE` semantics (via rtnetlink's
683/// `.replace()` on the route add builder) so stale routes left behind
684/// by a previous daemon run don't cause `EEXIST`.
685///
686/// The route is installed with link scope (direct-via-dev, no
687/// gateway) which is the correct form for a per-container `/32` or
688/// `/128` pointing at a host-side veth endpoint.
689///
690/// `dest` and `src` (if provided) must have matching address families
691/// — passing a v4 `dest` with a v6 `src` returns
692/// [`NetlinkError::Netlink`] without touching the kernel.
693///
694/// # Errors
695///
696/// Returns [`NetlinkError::NotFound`] if `dev_name` does not exist in
697/// the current netns. Returns [`NetlinkError::Netlink`] on address
698/// family mismatch or any RTNETLINK failure.
699#[cfg(target_os = "linux")]
700pub async fn replace_route_via_dev(
701 dest: std::net::IpAddr,
702 prefix_len: u8,
703 dev_name: &str,
704 src: Option<std::net::IpAddr>,
705) -> Result<(), NetlinkError> {
706 use std::net::IpAddr;
707
708 use futures_util::stream::TryStreamExt;
709 use netlink_packet_route::route::RouteScope;
710
711 let (connection, handle, _) = rtnetlink::new_connection()
712 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
713 tokio::spawn(connection);
714
715 let link = handle
716 .link()
717 .get()
718 .match_name(dev_name.to_string())
719 .execute()
720 .try_next()
721 .await
722 .map_err(|e| {
723 let msg = e.to_string();
724 if msg.contains("No such device") {
725 NetlinkError::NotFound(dev_name.to_string())
726 } else {
727 NetlinkError::Netlink(format!("link lookup failed for {dev_name}: {msg}"))
728 }
729 })?
730 .ok_or_else(|| NetlinkError::NotFound(dev_name.to_string()))?;
731
732 let oif_idx = link.header.index;
733
734 match (dest, src) {
735 (IpAddr::V4(d), Some(IpAddr::V4(s))) => handle
736 .route()
737 .add()
738 .v4()
739 .destination_prefix(d, prefix_len)
740 .output_interface(oif_idx)
741 .scope(RouteScope::Link)
742 .pref_source(s)
743 .replace()
744 .execute()
745 .await
746 .map_err(|e| {
747 NetlinkError::Netlink(format!(
748 "route replace v4 {d}/{prefix_len} dev {dev_name} src {s} failed: {e}"
749 ))
750 }),
751 (IpAddr::V4(d), None) => handle
752 .route()
753 .add()
754 .v4()
755 .destination_prefix(d, prefix_len)
756 .output_interface(oif_idx)
757 .scope(RouteScope::Link)
758 .replace()
759 .execute()
760 .await
761 .map_err(|e| {
762 NetlinkError::Netlink(format!(
763 "route replace v4 {d}/{prefix_len} dev {dev_name} failed: {e}"
764 ))
765 }),
766 (IpAddr::V6(d), Some(IpAddr::V6(s))) => handle
767 .route()
768 .add()
769 .v6()
770 .destination_prefix(d, prefix_len)
771 .output_interface(oif_idx)
772 .scope(RouteScope::Link)
773 .pref_source(s)
774 .replace()
775 .execute()
776 .await
777 .map_err(|e| {
778 NetlinkError::Netlink(format!(
779 "route replace v6 {d}/{prefix_len} dev {dev_name} src {s} failed: {e}"
780 ))
781 }),
782 (IpAddr::V6(d), None) => handle
783 .route()
784 .add()
785 .v6()
786 .destination_prefix(d, prefix_len)
787 .output_interface(oif_idx)
788 .scope(RouteScope::Link)
789 .replace()
790 .execute()
791 .await
792 .map_err(|e| {
793 NetlinkError::Netlink(format!(
794 "route replace v6 {d}/{prefix_len} dev {dev_name} failed: {e}"
795 ))
796 }),
797 (IpAddr::V4(_), Some(IpAddr::V6(_))) | (IpAddr::V6(_), Some(IpAddr::V4(_))) => Err(
798 NetlinkError::Netlink(format!("address family mismatch: dest={dest} src={src:?}")),
799 ),
800 }
801}
802
803/// Non-Linux stub.
804#[cfg(not(target_os = "linux"))]
805pub async fn replace_route_via_dev(
806 _dest: std::net::IpAddr,
807 _prefix_len: u8,
808 _dev_name: &str,
809 _src: Option<std::net::IpAddr>,
810) -> Result<(), NetlinkError> {
811 Err(NetlinkError::Netlink(
812 "replace_route_via_dev is only supported on Linux".to_string(),
813 ))
814}
815
816/// Set a sysctl via the `/proc/sys/...` filesystem.
817///
818/// `key` uses dotted form like `net.ipv4.ip_forward`; dots are
819/// translated to path separators so the effective path is
820/// `/proc/sys/net/ipv4/ip_forward`. Writes the string form of
821/// `value` to the file.
822///
823/// Replaces the shell-outs:
824/// sysctl -w `<key>`=`<value>`
825///
826/// Writing to `/proc/sys/...` is the kernel-standard way of setting
827/// sysctls and works under any confinement that still allows write
828/// access to `/proc/sys` (which the overlay manager needs anyway for
829/// its other operations).
830///
831/// # Errors
832///
833/// Returns [`NetlinkError::Io`] if the write fails (e.g. permission
834/// denied, file missing because the sysctl doesn't exist on this
835/// kernel, etc.).
836pub fn set_sysctl(key: &str, value: &str) -> Result<(), NetlinkError> {
837 let path = format!("/proc/sys/{}", key.replace('.', "/"));
838 std::fs::write(&path, value)?;
839 Ok(())
840}
841
842/// Run a synchronous closure inside the network namespace referenced
843/// by the given `OwnedFd`.
844///
845/// This is the fd-based variant of [`with_netns`]. Callers that have
846/// already opened `/proc/<pid>/ns/net` (e.g. to pin the namespace
847/// across multiple operations) should use this form to reuse the
848/// same fd and avoid re-opening the procfs path — the reopen would
849/// fail with `ENOENT` if the container init process has exited in
850/// the meantime, even though the namespace itself is still alive
851/// because our pinned fd holds a reference.
852///
853/// The `OwnedFd` is moved into the dedicated worker thread and
854/// closed when the thread exits. Spawns a fresh OS thread (not a
855/// tokio blocking worker) because `setns` affects the whole thread
856/// and we don't want to contaminate a shared worker.
857///
858/// # Errors
859///
860/// Returns [`NetlinkError::Netlink`] if `setns` fails or the
861/// dedicated thread panics. Any error returned by the closure itself
862/// is propagated verbatim.
863#[cfg(target_os = "linux")]
864pub fn with_netns_fd<F, T>(ns_fd: std::os::fd::OwnedFd, f: F) -> Result<T, NetlinkError>
865where
866 F: FnOnce() -> Result<T, NetlinkError> + Send + 'static,
867 T: Send + 'static,
868{
869 let join_handle = std::thread::spawn(move || -> Result<T, NetlinkError> {
870 nix::sched::setns(&ns_fd, nix::sched::CloneFlags::CLONE_NEWNET)
871 .map_err(|e| NetlinkError::Netlink(format!("setns(ns_fd) failed: {e}")))?;
872 // Keep the fd alive for the duration of the closure even
873 // though setns only needs it for the syscall itself. Dropping
874 // it explicitly after the closure makes the lifetime obvious.
875 let result = f();
876 drop(ns_fd);
877 result
878 });
879
880 join_handle
881 .join()
882 .map_err(|_| NetlinkError::Netlink("with_netns_fd thread panicked".to_string()))?
883}
884
885/// Non-Linux Unix (macOS/BSD) stub. Not emitted on Windows — the sole caller
886/// chain (`attach_to_interface` in `overlay_manager.rs`) is
887/// `#[cfg(target_os = "linux")]`-gated, and `OwnedFd` is Unix-only.
888#[cfg(all(not(target_os = "linux"), unix))]
889pub fn with_netns_fd<F, T>(_ns_fd: std::os::fd::OwnedFd, _f: F) -> Result<T, NetlinkError>
890where
891 F: FnOnce() -> Result<T, NetlinkError> + Send + 'static,
892 T: Send + 'static,
893{
894 Err(NetlinkError::Netlink(
895 "with_netns_fd is only supported on Linux".to_string(),
896 ))
897}
898
899/// Run a synchronous closure inside the network namespace of the
900/// given PID.
901///
902/// Thin wrapper around [`with_netns_fd`] that opens
903/// `/proc/<target_pid>/ns/net` then delegates. Kept for backward
904/// compatibility and for callers that only need a single operation
905/// on the target netns. Callers that need to pin the namespace
906/// across multiple operations (and survive a racing exit of the
907/// container init) should open the fd themselves and call
908/// [`with_netns_fd`] directly.
909///
910/// Because `setns` is synchronous and `rtnetlink` is async, the
911/// typical usage pattern inside the closure is to build a local
912/// current-thread tokio runtime and `block_on` the netlink calls.
913/// See [`with_netns_async`] for a convenience wrapper that does
914/// exactly this.
915///
916/// # Errors
917///
918/// Returns [`NetlinkError::Io`] if `/proc/<target_pid>/ns/net` cannot
919/// be opened. Returns [`NetlinkError::Netlink`] if `setns` fails or
920/// the dedicated thread panics. Any error returned by the closure
921/// itself is propagated verbatim.
922#[cfg(target_os = "linux")]
923pub fn with_netns<F, T>(target_pid: u32, f: F) -> Result<T, NetlinkError>
924where
925 F: FnOnce() -> Result<T, NetlinkError> + Send + 'static,
926 T: Send + 'static,
927{
928 use std::os::fd::OwnedFd;
929
930 let ns_file = std::fs::File::open(format!("/proc/{target_pid}/ns/net"))?;
931 let ns_fd: OwnedFd = OwnedFd::from(ns_file);
932 with_netns_fd(ns_fd, f)
933}
934
935/// Non-Linux stub.
936#[cfg(not(target_os = "linux"))]
937pub fn with_netns<F, T>(_target_pid: u32, _f: F) -> Result<T, NetlinkError>
938where
939 F: FnOnce() -> Result<T, NetlinkError> + Send + 'static,
940 T: Send + 'static,
941{
942 Err(NetlinkError::Netlink(
943 "with_netns is only supported on Linux".to_string(),
944 ))
945}
946
947/// Convenience wrapper around [`with_netns_fd`] that builds a local
948/// current-thread tokio runtime inside the dedicated thread and
949/// drives the provided async future to completion.
950///
951/// The future is produced by calling `f()` from inside the thread
952/// that has already joined the target netns, so any rtnetlink
953/// operations awaited inside the future will talk to the target
954/// netns's kernel.
955///
956/// The local runtime is lightweight (single-thread, built per call)
957/// and only drives a handful of netlink messages before being
958/// dropped with the thread.
959///
960/// The `OwnedFd` is moved into the worker thread and closed when
961/// the thread exits.
962///
963/// # Errors
964///
965/// Returns [`NetlinkError::Netlink`] per [`with_netns_fd`], plus
966/// [`NetlinkError::Netlink`] if the local runtime fails to build.
967/// Any error returned by the future is propagated verbatim.
968#[cfg(target_os = "linux")]
969pub fn with_netns_fd_async<F, Fut, T>(ns_fd: std::os::fd::OwnedFd, f: F) -> Result<T, NetlinkError>
970where
971 F: FnOnce() -> Fut + Send + 'static,
972 Fut: std::future::Future<Output = Result<T, NetlinkError>>,
973 T: Send + 'static,
974{
975 with_netns_fd(ns_fd, move || {
976 let rt = tokio::runtime::Builder::new_current_thread()
977 .enable_all()
978 .build()
979 .map_err(|e| NetlinkError::Netlink(format!("local runtime build failed: {e}")))?;
980 rt.block_on(f())
981 })
982}
983
984/// Non-Linux Unix (macOS/BSD) stub. Not emitted on Windows — the sole caller
985/// chain (`attach_to_interface` in `overlay_manager.rs`) is
986/// `#[cfg(target_os = "linux")]`-gated, and `OwnedFd` is Unix-only.
987#[cfg(all(not(target_os = "linux"), unix))]
988pub fn with_netns_fd_async<F, Fut, T>(
989 _ns_fd: std::os::fd::OwnedFd,
990 _f: F,
991) -> Result<T, NetlinkError>
992where
993 F: FnOnce() -> Fut + Send + 'static,
994 Fut: std::future::Future<Output = Result<T, NetlinkError>>,
995 T: Send + 'static,
996{
997 Err(NetlinkError::Netlink(
998 "with_netns_fd_async is only supported on Linux".to_string(),
999 ))
1000}
1001
1002/// Convenience wrapper around [`with_netns`] that builds a local
1003/// current-thread tokio runtime inside the dedicated thread and
1004/// drives the provided async future to completion.
1005///
1006/// Thin wrapper around [`with_netns_fd_async`] that opens
1007/// `/proc/<target_pid>/ns/net` then delegates.
1008///
1009/// # Errors
1010///
1011/// Returns [`NetlinkError::Io`] / [`NetlinkError::Netlink`] per
1012/// [`with_netns`], plus [`NetlinkError::Netlink`] if the local
1013/// runtime fails to build. Any error returned by the future is
1014/// propagated verbatim.
1015#[cfg(target_os = "linux")]
1016pub fn with_netns_async<F, Fut, T>(target_pid: u32, f: F) -> Result<T, NetlinkError>
1017where
1018 F: FnOnce() -> Fut + Send + 'static,
1019 Fut: std::future::Future<Output = Result<T, NetlinkError>>,
1020 T: Send + 'static,
1021{
1022 use std::os::fd::OwnedFd;
1023
1024 let ns_file = std::fs::File::open(format!("/proc/{target_pid}/ns/net"))?;
1025 let ns_fd: OwnedFd = OwnedFd::from(ns_file);
1026 with_netns_fd_async(ns_fd, f)
1027}
1028
1029/// Non-Linux stub.
1030#[cfg(not(target_os = "linux"))]
1031pub fn with_netns_async<F, Fut, T>(_target_pid: u32, _f: F) -> Result<T, NetlinkError>
1032where
1033 F: FnOnce() -> Fut + Send + 'static,
1034 Fut: std::future::Future<Output = Result<T, NetlinkError>>,
1035 T: Send + 'static,
1036{
1037 Err(NetlinkError::Netlink(
1038 "with_netns_async is only supported on Linux".to_string(),
1039 ))
1040}
1041
1042/// Create a Linux bridge interface with the given name.
1043///
1044/// Replaces the shell-out:
1045/// ip link add name `<name>` type bridge
1046///
1047/// Idempotent: if a link with that name already exists this returns
1048/// `Ok(())`. This matches how the overlay manager's per-service bridge
1049/// creation path needs to behave — multiple containers landing on the
1050/// same service-on-node bridge must all see "bridge ready" after a
1051/// successful call without racing against existence checks.
1052///
1053/// The bridge is created in the current network namespace. Callers
1054/// that need a different netns should wrap with [`with_netns_async`].
1055/// The bridge is created in the administratively-down state — call
1056/// [`set_link_up_by_name`] separately once any other attributes
1057/// ([`set_bridge_stp`] etc.) have been applied.
1058///
1059/// # Errors
1060///
1061/// Returns [`NetlinkError::Netlink`] for any RTNETLINK failure other
1062/// than `EEXIST` (which is treated as success).
1063#[cfg(target_os = "linux")]
1064pub async fn create_bridge(name: &str) -> Result<(), NetlinkError> {
1065 let (connection, handle, _) = rtnetlink::new_connection()
1066 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
1067 tokio::spawn(connection);
1068
1069 match handle.link().add().bridge(name.to_string()).execute().await {
1070 Ok(()) => Ok(()),
1071 Err(rtnetlink::Error::NetlinkError(err)) => {
1072 // EEXIST means a link with this name already exists. We
1073 // intentionally do NOT verify that the existing link is
1074 // actually a bridge — callers using stable per-service
1075 // names own that invariant, and re-checking here would
1076 // require another rtnetlink round-trip on the hot path.
1077 let is_eexist = err
1078 .code
1079 .is_some_and(|c| c.get().unsigned_abs() == libc::EEXIST as u32);
1080 let msg = err.to_string();
1081 if is_eexist || msg.contains("File exists") {
1082 Ok(())
1083 } else {
1084 Err(NetlinkError::Netlink(format!(
1085 "bridge create failed for {name}: {msg}"
1086 )))
1087 }
1088 }
1089 Err(e) => {
1090 let msg = e.to_string();
1091 if msg.contains("File exists") {
1092 Ok(())
1093 } else {
1094 Err(NetlinkError::Netlink(format!(
1095 "bridge create failed for {name}: {msg}"
1096 )))
1097 }
1098 }
1099 }
1100}
1101
1102/// Non-Linux stub.
1103#[cfg(not(target_os = "linux"))]
1104pub async fn create_bridge(_name: &str) -> Result<(), NetlinkError> {
1105 Err(NetlinkError::Netlink(
1106 "create_bridge is only supported on Linux".to_string(),
1107 ))
1108}
1109
1110/// Delete the bridge interface with the given name.
1111///
1112/// Replaces the shell-out:
1113/// ip link delete `<name>` type bridge
1114///
1115/// Idempotent: returns `Ok(())` if the bridge does not exist.
1116/// Delegates to [`delete_link_by_name`] — from RTNETLINK's perspective
1117/// deleting a bridge is the same `RTM_DELLINK` as deleting any other
1118/// link, and `delete_link_by_name` already has the ENODEV-as-success
1119/// handling we want.
1120///
1121/// # Errors
1122///
1123/// Returns [`NetlinkError::Netlink`] for any RTNETLINK failure other
1124/// than `ENODEV` (which is treated as success).
1125#[cfg(target_os = "linux")]
1126pub async fn delete_bridge(name: &str) -> Result<(), NetlinkError> {
1127 delete_link_by_name(name).await
1128}
1129
1130/// Non-Linux stub.
1131#[cfg(not(target_os = "linux"))]
1132pub async fn delete_bridge(_name: &str) -> Result<(), NetlinkError> {
1133 Err(NetlinkError::Netlink(
1134 "delete_bridge is only supported on Linux".to_string(),
1135 ))
1136}
1137
1138/// Attach `link` to `bridge` by setting the link's `IFLA_MASTER` to
1139/// the bridge's ifindex.
1140///
1141/// Replaces the shell-out:
1142/// ip link set `<link>` master `<bridge>`
1143///
1144/// Both interfaces must already exist in the current network
1145/// namespace. This is what the overlay manager will call to splice a
1146/// container's host-side veth end into the per-service bridge instead
1147/// of /32-routing it directly.
1148///
1149/// # Errors
1150///
1151/// Returns [`NetlinkError::NotFound`] if either `link` or `bridge`
1152/// does not exist in the current netns. Returns
1153/// [`NetlinkError::Netlink`] for any other RTNETLINK failure.
1154#[cfg(target_os = "linux")]
1155pub async fn add_link_to_bridge(link: &str, bridge: &str) -> Result<(), NetlinkError> {
1156 use futures_util::stream::TryStreamExt;
1157
1158 let (connection, handle, _) = rtnetlink::new_connection()
1159 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
1160 tokio::spawn(connection);
1161
1162 let bridge_link = handle
1163 .link()
1164 .get()
1165 .match_name(bridge.to_string())
1166 .execute()
1167 .try_next()
1168 .await
1169 .map_err(|e| {
1170 let msg = e.to_string();
1171 if msg.contains("No such device") {
1172 NetlinkError::NotFound(bridge.to_string())
1173 } else {
1174 NetlinkError::Netlink(format!("link lookup failed for {bridge}: {msg}"))
1175 }
1176 })?
1177 .ok_or_else(|| NetlinkError::NotFound(bridge.to_string()))?;
1178 let bridge_idx = bridge_link.header.index;
1179
1180 let member_link = handle
1181 .link()
1182 .get()
1183 .match_name(link.to_string())
1184 .execute()
1185 .try_next()
1186 .await
1187 .map_err(|e| {
1188 let msg = e.to_string();
1189 if msg.contains("No such device") {
1190 NetlinkError::NotFound(link.to_string())
1191 } else {
1192 NetlinkError::Netlink(format!("link lookup failed for {link}: {msg}"))
1193 }
1194 })?
1195 .ok_or_else(|| NetlinkError::NotFound(link.to_string()))?;
1196 let member_idx = member_link.header.index;
1197
1198 handle
1199 .link()
1200 .set(member_idx)
1201 .controller(bridge_idx)
1202 .execute()
1203 .await
1204 .map_err(|e| {
1205 NetlinkError::Netlink(format!(
1206 "set master failed: link={link} bridge={bridge}: {e}"
1207 ))
1208 })
1209}
1210
1211/// Non-Linux stub.
1212#[cfg(not(target_os = "linux"))]
1213pub async fn add_link_to_bridge(_link: &str, _bridge: &str) -> Result<(), NetlinkError> {
1214 Err(NetlinkError::Netlink(
1215 "add_link_to_bridge is only supported on Linux".to_string(),
1216 ))
1217}
1218
1219/// Enable or disable Spanning Tree Protocol (STP) on the named bridge.
1220///
1221/// STP is disabled by default on bridges created via [`create_bridge`]
1222/// (the kernel default for a freshly-created bridge is STP off), and
1223/// for `ZLayer`'s per-service bridges we want to keep it off: each
1224/// bridge is single-host, has no possibility of a loop, and STP's
1225/// initial 30s forwarding-delay would stall container traffic on
1226/// attach.
1227///
1228/// rtnetlink 0.14 does not expose a typed builder for `IFLA_BR_STP_STATE`
1229/// (it lives inside the nested `IFLA_LINKINFO` -> `IFLA_INFO_DATA` ->
1230/// `IFLA_BR_STP_STATE` attribute and the crate's bridge builder only
1231/// covers it at create-time, not as a post-create modification). The
1232/// portable kernel-supported alternative is the sysfs knob at
1233/// `/sys/class/net/<name>/bridge/stp_state`, which is what
1234/// `brctl stp <name> on|off` writes under the hood. We use the sysfs
1235/// path so the helper works on every kernel that has bridge support
1236/// without depending on an rtnetlink API surface that may move
1237/// between crate versions.
1238///
1239/// # Errors
1240///
1241/// Returns [`NetlinkError::NotFound`] if the bridge does not exist (no
1242/// `/sys/class/net/<name>/bridge` directory). Returns
1243/// [`NetlinkError::Io`] for any other write failure (permission
1244/// denied, the link exists but is not a bridge, etc.).
1245#[cfg(target_os = "linux")]
1246pub fn set_bridge_stp(name: &str, stp_on: bool) -> Result<(), NetlinkError> {
1247 let bridge_dir = format!("/sys/class/net/{name}/bridge");
1248 if !std::path::Path::new(&bridge_dir).exists() {
1249 return Err(NetlinkError::NotFound(name.to_string()));
1250 }
1251 let path = format!("{bridge_dir}/stp_state");
1252 let value = if stp_on { "1" } else { "0" };
1253 std::fs::write(&path, value)?;
1254 Ok(())
1255}
1256
1257/// Non-Linux stub.
1258#[cfg(not(target_os = "linux"))]
1259pub fn set_bridge_stp(_name: &str, _stp_on: bool) -> Result<(), NetlinkError> {
1260 Err(NetlinkError::Netlink(
1261 "set_bridge_stp is only supported on Linux".to_string(),
1262 ))
1263}
1264
1265#[cfg(test)]
1266mod tests {
1267 // The helpers and tests in this module are Linux-only (they require
1268 // netlink + CAP_NET_ADMIN). Keep imports/fixtures gated so the lib
1269 // tests still compile on Windows/macOS cross-checks.
1270 #[cfg(target_os = "linux")]
1271 use super::*;
1272
1273 /// Generate a short random-ish suffix for test interface names so
1274 /// parallel `cargo test` invocations don't collide. Bounded to 6
1275 /// chars so the full name (`zlb-` prefix + suffix) stays under the
1276 /// 15-char `IFNAMSIZ` limit.
1277 #[cfg(target_os = "linux")]
1278 fn rand_suffix() -> String {
1279 use std::time::{SystemTime, UNIX_EPOCH};
1280 const CHARS: &[u8] = b"0123456789abcdefghijklmnopqrstuvwxyz";
1281 let nanos = SystemTime::now()
1282 .duration_since(UNIX_EPOCH)
1283 .map(|d| d.subsec_nanos())
1284 .unwrap_or(0);
1285 // base36-ish, 6 chars
1286 let mut n = u64::from(nanos);
1287 let mut out = String::new();
1288 let base = CHARS.len() as u64;
1289 for _ in 0..6 {
1290 let idx = usize::try_from(n % base).unwrap_or(0);
1291 out.push(CHARS[idx] as char);
1292 n /= base;
1293 }
1294 out
1295 }
1296
1297 /// Create a dummy interface with the given name (used as a stand-in
1298 /// for a host-side veth end in `bridge_add_link_membership`).
1299 #[cfg(target_os = "linux")]
1300 async fn create_dummy(name: &str) -> Result<(), NetlinkError> {
1301 let (connection, handle, _) = rtnetlink::new_connection()
1302 .map_err(|e| NetlinkError::Netlink(format!("new_connection failed: {e}")))?;
1303 tokio::spawn(connection);
1304 handle
1305 .link()
1306 .add()
1307 .dummy(name.to_string())
1308 .execute()
1309 .await
1310 .map_err(|e| NetlinkError::Netlink(format!("dummy create failed for {name}: {e}")))
1311 }
1312
1313 #[cfg(target_os = "linux")]
1314 #[tokio::test]
1315 #[ignore = "requires CAP_NET_ADMIN; run manually or in privileged CI"]
1316 async fn bridge_create_idempotent() {
1317 let name = format!("zlb-{}", rand_suffix());
1318 assert!(name.len() <= 15, "interface name exceeds IFNAMSIZ: {name}");
1319
1320 // First create.
1321 create_bridge(&name).await.expect("first create_bridge");
1322 assert!(
1323 std::path::Path::new(&format!("/sys/class/net/{name}")).exists(),
1324 "bridge {name} should exist after create"
1325 );
1326
1327 // Second create on same name must be Ok.
1328 create_bridge(&name)
1329 .await
1330 .expect("second create_bridge should be idempotent");
1331
1332 // Delete and confirm gone.
1333 delete_bridge(&name).await.expect("delete_bridge");
1334 assert!(
1335 !std::path::Path::new(&format!("/sys/class/net/{name}")).exists(),
1336 "bridge {name} should be gone after delete"
1337 );
1338
1339 // Second delete on missing name must be Ok.
1340 delete_bridge(&name)
1341 .await
1342 .expect("second delete_bridge should be idempotent");
1343 }
1344
1345 #[cfg(target_os = "linux")]
1346 #[tokio::test]
1347 #[ignore = "requires CAP_NET_ADMIN; run manually or in privileged CI"]
1348 async fn bridge_add_link_membership() {
1349 let suffix = rand_suffix();
1350 let bridge = format!("zlb-{suffix}");
1351 let dummy = format!("zld-{suffix}");
1352 assert!(bridge.len() <= 15);
1353 assert!(dummy.len() <= 15);
1354
1355 create_bridge(&bridge).await.expect("create_bridge");
1356 create_dummy(&dummy).await.expect("create_dummy");
1357
1358 add_link_to_bridge(&dummy, &bridge)
1359 .await
1360 .expect("add_link_to_bridge");
1361
1362 // The dummy's master/ifindex symlink should resolve to the
1363 // bridge's ifindex.
1364 let master_ifindex_path = format!("/sys/class/net/{dummy}/master/ifindex");
1365 let dummy_master_ifindex = std::fs::read_to_string(&master_ifindex_path)
1366 .expect("read dummy master ifindex")
1367 .trim()
1368 .parse::<u32>()
1369 .expect("parse dummy master ifindex");
1370
1371 let bridge_ifindex = std::fs::read_to_string(format!("/sys/class/net/{bridge}/ifindex"))
1372 .expect("read bridge ifindex")
1373 .trim()
1374 .parse::<u32>()
1375 .expect("parse bridge ifindex");
1376
1377 assert_eq!(
1378 dummy_master_ifindex, bridge_ifindex,
1379 "dummy's master ifindex should equal bridge's ifindex"
1380 );
1381
1382 // Cleanup.
1383 delete_link_by_name(&dummy).await.expect("delete dummy");
1384 delete_bridge(&bridge).await.expect("delete bridge");
1385 }
1386
1387 #[cfg(target_os = "linux")]
1388 #[tokio::test]
1389 #[ignore = "requires CAP_NET_ADMIN; run manually or in privileged CI"]
1390 async fn bridge_stp_off() {
1391 let name = format!("zlb-{}", rand_suffix());
1392 assert!(name.len() <= 15);
1393
1394 create_bridge(&name).await.expect("create_bridge");
1395
1396 set_bridge_stp(&name, false).expect("set_bridge_stp off");
1397 let stp_state = std::fs::read_to_string(format!("/sys/class/net/{name}/bridge/stp_state"))
1398 .expect("read stp_state")
1399 .trim()
1400 .to_string();
1401 assert_eq!(
1402 stp_state, "0",
1403 "stp_state should be 0 after set_bridge_stp(false)"
1404 );
1405
1406 // Cleanup.
1407 delete_bridge(&name).await.expect("delete_bridge");
1408 }
1409}