Skip to main content

synwire_sandbox/platform/linux/
namespace.rs

1#![allow(
2    clippy::similar_names,
3    clippy::match_same_arms,
4    clippy::option_if_let_else,
5    clippy::map_unwrap_or,
6    clippy::manual_unwrap_or_default
7)]
8//! Linux namespace container via an OCI runtime (runc or crun).
9//!
10//! [`NamespaceContainer`] locates an OCI-compliant container runtime on
11//! `$PATH`, generates an [OCI runtime spec][oci] from a [`ContainerConfig`],
12//! and spawns the container with `<runtime> run`.
13//!
14//! [oci]: https://github.com/opencontainers/runtime-spec
15//!
16//! # Non-interactive mode
17//!
18//! [`NamespaceContainer::spawn`] runs the container in the foreground.
19//! stdout/stderr of the runtime process are the container's output.
20//!
21//! # Captured output mode
22//!
23//! [`NamespaceContainer::spawn_captured`] redirects stdout/stderr to files in
24//! a temporary directory. Output persists even if the process is killed.
25//!
26//! # Interactive / PTY mode
27//!
28//! [`NamespaceContainer::spawn_interactive`] uses the runtime's
29//! `--console-socket` mechanism to receive a PTY controller fd from the
30//! runtime. Stage 2 of the runtime sets up the controlling terminal inside
31//! the container.
32
33use std::os::fd::OwnedFd;
34use std::path::{Path, PathBuf};
35use std::sync::Arc;
36
37use oci_spec::runtime::{
38    Arch, Capability, LinuxBuilder, LinuxCapabilitiesBuilder, LinuxIdMappingBuilder,
39    LinuxNamespaceBuilder, LinuxNamespaceType, LinuxSeccompAction, LinuxSeccompBuilder,
40    LinuxSyscallBuilder, Mount, MountBuilder, ProcessBuilder, RootBuilder, Spec, SpecBuilder,
41    UserBuilder,
42};
43use serde::{Deserialize, Serialize};
44use tokio::process::Command;
45use tracing::{debug, warn};
46
47use crate::SandboxError;
48use crate::output::{CapturedOutput, OutputMode, ProcessCapture};
49
50// ── Config types ──────────────────────────────────────────────────────────────
51
52/// Clone flags requested for the namespace container.
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
54#[non_exhaustive]
55pub enum CloneFlag {
56    /// New PID namespace (`CLONE_NEWPID`).
57    NewPid,
58    /// New UTS namespace — isolates hostname/domainname (`CLONE_NEWUTS`).
59    NewUts,
60    /// New IPC namespace (`CLONE_NEWIPC`).
61    NewIpc,
62    /// New mount namespace (`CLONE_NEWNS`).
63    NewNs,
64    /// New cgroup namespace (`CLONE_NEWCGROUP`).
65    NewCgroup,
66    /// New network namespace (`CLONE_NEWNET`).
67    NewNet,
68    /// New user namespace (`CLONE_NEWUSER`). Attempted; silently skipped on
69    /// kernels or system configs that prohibit unprivileged user namespaces.
70    NewUser,
71}
72
73/// A single bind mount to set up inside the container.
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct BindMount {
76    /// Host source path.
77    pub source: String,
78    /// Container target path.
79    pub target: String,
80    /// Mount read-only.
81    pub read_only: bool,
82}
83
84/// Security parameters for the container.
85#[derive(Debug, Clone, Serialize, Deserialize)]
86pub struct ContainerSecurity {
87    /// Seccomp profile to apply.
88    pub seccomp: ContainerSeccomp,
89    /// Capabilities to drop (e.g. `["ALL"]`).
90    pub capabilities_drop: Vec<String>,
91    /// Capabilities to add after dropping.
92    pub capabilities_add: Vec<String>,
93    /// Set `PR_SET_NO_NEW_PRIVS` before exec.
94    pub no_new_privileges: bool,
95    /// Run as this UID (None = inherit).
96    pub run_as_user: Option<u32>,
97    /// Run as this GID (None = inherit).
98    pub run_as_group: Option<u32>,
99}
100
101/// Seccomp profile selection.
102#[derive(Debug, Clone, Serialize, Deserialize)]
103#[non_exhaustive]
104pub enum ContainerSeccomp {
105    /// No seccomp filter.
106    Unconfined,
107    /// Built-in `RuntimeDefault` profile (deny-list of ~18 dangerous syscalls).
108    RuntimeDefault,
109    /// Load profile from a JSON file path.
110    Localhost {
111        /// Path to the OCI-format seccomp profile.
112        path: String,
113    },
114}
115
116/// Container configuration — translated to an OCI runtime spec before launch.
117///
118/// Use [`NamespaceContainer::build_config`] to derive this from a high-level
119/// [`SandboxConfig`](synwire_core::agents::sandbox::SandboxConfig).
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct ContainerConfig {
122    /// Namespace flags to apply.
123    pub clone_flags: Vec<CloneFlag>,
124    /// Isolate the network namespace.
125    pub network_isolation: bool,
126    /// Request a user namespace mapping (rootless containers).
127    pub user_namespace: bool,
128    /// Request a cgroup namespace.
129    pub cgroup_namespace: bool,
130    /// Bind mounts to create inside the container.
131    pub bind_mounts: Vec<BindMount>,
132    /// Path to the agent's cgroup (used for resource limits inside the ns).
133    pub cgroup_path: Option<String>,
134    /// Security parameters.
135    pub security: ContainerSecurity,
136    /// Command to exec inside the container.
137    pub command: String,
138    /// Arguments for the command.
139    pub args: Vec<String>,
140    /// Environment variables (complete set — parent env is not inherited
141    /// inside the namespace unless explicitly passed).
142    pub env: std::collections::HashMap<String, String>,
143}
144
145// ── PtySession ──────────────────────────────────────────────────────────────
146
147/// Handle to an interactive PTY session running inside a namespace container.
148///
149/// `controller` is the host-side controller end of the PTY. Read from it to
150/// receive output; write to it to send input to the contained process. Wrap
151/// it in [`tokio::io::unix::AsyncFd`] for non-blocking async I/O.
152///
153/// `child` is the OCI runtime process. Killing the runtime kills the
154/// container.
155#[derive(Debug)]
156pub struct PtySession {
157    /// Controller end of the PTY (host side).
158    pub controller: OwnedFd,
159    /// The OCI runtime child process.
160    pub child: tokio::process::Child,
161    /// Bundle directory — kept alive while the container runs.
162    _bundle: tempfile::TempDir,
163}
164
165// ── ContainerProcess ────────────────────────────────────────────────────────
166
167/// A running non-interactive container process.
168///
169/// Holds the OCI runtime child process and the bundle directory. The bundle
170/// is automatically cleaned up when this handle is dropped.
171#[derive(Debug)]
172pub struct ContainerProcess {
173    /// The OCI runtime child process.
174    pub child: tokio::process::Child,
175    /// Bundle directory — kept alive while the container runs.
176    _bundle: tempfile::TempDir,
177}
178
179// ── OCI runtime selection ─────────────────────────────────────────────────
180
181/// Which OCI runtime backend to use.
182#[derive(Debug, Clone, Copy, PartialEq, Eq)]
183#[non_exhaustive]
184pub enum OciRuntime {
185    /// Standard runc — Linux namespaces + seccomp + capabilities.
186    ///
187    /// Processes share the host kernel. Isolation relies on kernel
188    /// namespace boundaries.
189    Runc,
190    /// gVisor (runsc) — user-space kernel sandbox.
191    ///
192    /// Processes run on a Go-based kernel that intercepts syscalls,
193    /// providing a much stronger isolation boundary than namespaces
194    /// alone. Requires `runsc` on `$PATH`.
195    Gvisor,
196}
197
198/// Which gVisor platform to use for syscall interception.
199#[derive(Debug, Clone, Copy, PartialEq, Eq)]
200#[non_exhaustive]
201pub enum GvisorPlatform {
202    /// Systrap — patches syscall instruction sites. Fastest, but requires
203    /// `CAP_SYS_PTRACE` (broken in rootless + host-network mode due to a
204    /// gVisor bug in `ConfigureCmdForRootless`).
205    Systrap,
206    /// Ptrace — uses `PTRACE_SYSEMU` / `CLONE_PTRACE`. Slower but
207    /// universally compatible. Same isolation guarantees as systrap.
208    Ptrace,
209}
210
211/// Process-wide cache: once we discover that systrap fails for gVisor,
212/// all subsequent containers skip the probe and go straight to ptrace.
213///
214/// States: 0 = not probed, 1 = systrap works, 2 = ptrace fallback.
215static GVISOR_PLATFORM_CACHE: std::sync::atomic::AtomicU8 = std::sync::atomic::AtomicU8::new(0);
216
217const _PLATFORM_NOT_PROBED: u8 = 0;
218const PLATFORM_SYSTRAP: u8 = 1;
219const PLATFORM_PTRACE: u8 = 2;
220
221// ── NamespaceContainer ─────────────────────────────────────────────────────
222
223/// Spawns processes inside Linux containers via an OCI runtime.
224///
225/// Supports [`OciRuntime::Runc`] (standard namespace isolation) and
226/// [`OciRuntime::Gvisor`] (user-space kernel via `runsc`).
227///
228/// For gVisor, the constructor probes whether the `systrap` platform works
229/// (it requires `CAP_SYS_PTRACE` which is missing in rootless + host-network
230/// mode due to a gVisor bug). If systrap fails, it falls back to `ptrace`
231/// and caches the result for the lifetime of the process — all subsequent
232/// gVisor containers skip the probe.
233#[derive(Debug)]
234pub struct NamespaceContainer {
235    /// Path to the OCI runtime binary.
236    runtime_path: PathBuf,
237    /// Which runtime backend is in use.
238    runtime_kind: OciRuntime,
239    /// For gVisor: which platform to use (systrap or ptrace).
240    gvisor_platform: GvisorPlatform,
241}
242
243impl NamespaceContainer {
244    /// Create a container using `runc` from `$PATH`.
245    ///
246    /// # Errors
247    ///
248    /// Returns [`SandboxError::RuntimeNotFound`] if `runc` is not on `$PATH`.
249    pub fn new() -> Result<Self, SandboxError> {
250        Self::with_runtime(OciRuntime::Runc)
251    }
252
253    /// Create a container using gVisor (`runsc`) from `$PATH`.
254    ///
255    /// On first call, probes the `systrap` platform by running a trivial
256    /// container. If systrap works, uses it for all future containers
257    /// (fastest). If it fails (e.g., missing `CAP_SYS_PTRACE` in rootless
258    /// mode), falls back to `ptrace` and logs a warning. The result is
259    /// cached process-wide — subsequent calls skip the probe.
260    ///
261    /// # Errors
262    ///
263    /// Returns [`SandboxError::RuntimeNotFound`] if `runsc` is not on `$PATH`.
264    pub fn with_gvisor() -> Result<Self, SandboxError> {
265        Self::with_runtime(OciRuntime::Gvisor)
266    }
267
268    /// Create a container using the specified OCI runtime.
269    ///
270    /// # Errors
271    ///
272    /// Returns [`SandboxError::RuntimeNotFound`] if the runtime binary is
273    /// not on `$PATH`.
274    pub fn with_runtime(kind: OciRuntime) -> Result<Self, SandboxError> {
275        let name = match kind {
276            OciRuntime::Runc => "runc",
277            OciRuntime::Gvisor => "runsc",
278        };
279        let path =
280            which_binary(name).map_err(|()| SandboxError::RuntimeNotFound { name: name.into() })?;
281        debug!(runtime = name, path = %path.display(), "found OCI runtime");
282
283        let gvisor_platform = if kind == OciRuntime::Gvisor {
284            resolve_gvisor_platform(&path)
285        } else {
286            GvisorPlatform::Systrap // unused for runc
287        };
288
289        Ok(Self {
290            runtime_path: path,
291            runtime_kind: kind,
292            gvisor_platform,
293        })
294    }
295
296    /// Spawn a command inside a namespace container.
297    ///
298    /// Creates a temporary OCI bundle, generates a runtime spec from `config`,
299    /// and runs the container in the foreground. Returns a
300    /// [`ContainerProcess`] that holds the runtime child and the bundle dir.
301    ///
302    /// # Errors
303    ///
304    /// Returns a [`SandboxError`] if bundle creation or process spawn fails.
305    pub fn spawn(&self, config: &ContainerConfig) -> Result<ContainerProcess, SandboxError> {
306        let (bundle, container_id) = self.prepare_bundle(config, false)?;
307
308        debug!(runtime = %self.runtime_path.display(), %container_id, "spawning namespace container");
309
310        let child = self
311            .build_run_command(&bundle, &container_id, None)
312            .stdin(std::process::Stdio::null())
313            .stdout(std::process::Stdio::piped())
314            .stderr(std::process::Stdio::piped())
315            .kill_on_drop(true)
316            .spawn()
317            .map_err(|e| SandboxError::RuntimeFailed {
318                reason: format!("spawn failed: {e}"),
319            })?;
320
321        Ok(ContainerProcess {
322            child,
323            _bundle: bundle,
324        })
325    }
326
327    /// Spawn a command inside a namespace container with output captured to
328    /// files in a temporary directory.
329    ///
330    /// stdout and stderr are redirected to files rather than pipes. The files
331    /// persist even if the process is killed, and the temporary directory is
332    /// deleted when the last `Arc<CapturedOutput>` reference is dropped.
333    ///
334    /// # Errors
335    ///
336    /// Returns a [`SandboxError`] if directory creation, file opening, or
337    /// process spawn fails.
338    pub fn spawn_captured(
339        &self,
340        config: &ContainerConfig,
341        mode: OutputMode,
342    ) -> Result<ProcessCapture, SandboxError> {
343        let output = CapturedOutput::new(mode).map_err(|e| SandboxError::RuntimeFailed {
344            reason: format!("create capture directory: {e}"),
345        })?;
346
347        let stdout_file = std::fs::OpenOptions::new()
348            .create(true)
349            .truncate(true)
350            .write(true)
351            .open(output.stdout_path())
352            .map_err(|e| SandboxError::RuntimeFailed {
353                reason: format!("open stdout capture file: {e}"),
354            })?;
355
356        let stderr_file = match mode {
357            OutputMode::Combined => {
358                stdout_file
359                    .try_clone()
360                    .map_err(|e| SandboxError::RuntimeFailed {
361                        reason: format!("clone stdout handle for combined stderr: {e}"),
362                    })?
363            }
364            OutputMode::Separate => {
365                let stderr_path =
366                    output
367                        .stderr_path()
368                        .ok_or_else(|| SandboxError::RuntimeFailed {
369                            reason: "separate mode missing stderr path".into(),
370                        })?;
371                std::fs::OpenOptions::new()
372                    .create(true)
373                    .truncate(true)
374                    .write(true)
375                    .open(stderr_path)
376                    .map_err(|e| SandboxError::RuntimeFailed {
377                        reason: format!("open stderr capture file: {e}"),
378                    })?
379            }
380        };
381
382        let (bundle, container_id) = self.prepare_bundle(config, false)?;
383
384        debug!(runtime = %self.runtime_path.display(), %container_id, "spawning captured namespace container");
385
386        let child = self
387            .build_run_command(&bundle, &container_id, None)
388            .stdin(std::process::Stdio::null())
389            .stdout(std::process::Stdio::from(stdout_file))
390            .stderr(std::process::Stdio::from(stderr_file))
391            .kill_on_drop(true)
392            .spawn()
393            .map_err(|e| SandboxError::RuntimeFailed {
394                reason: format!("spawn failed: {e}"),
395            })?;
396
397        Ok(ProcessCapture {
398            output: Arc::new(output),
399            child,
400            _bundle: Some(bundle),
401        })
402    }
403
404    /// Spawn a command inside a namespace container with full PTY support
405    /// for human-in-the-loop interaction.
406    ///
407    /// Uses the OCI runtime's `--console-socket` mechanism: the runtime
408    /// creates a PTY inside the container and sends the controller fd back
409    /// over a Unix socket via `SCM_RIGHTS`. The returned [`PtySession`]
410    /// contains the controller fd for host-side I/O.
411    ///
412    /// # Errors
413    ///
414    /// Returns a [`SandboxError`] if socket setup, process spawn, or PTY
415    /// fd handshake fails.
416    pub fn spawn_interactive(&self, config: &ContainerConfig) -> Result<PtySession, SandboxError> {
417        let (bundle, container_id) = self.prepare_bundle(config, true)?;
418        let socket_path = bundle.path().join("console.sock");
419
420        let listener = std::os::unix::net::UnixListener::bind(&socket_path).map_err(|e| {
421            SandboxError::RuntimeFailed {
422                reason: format!("bind console socket: {e}"),
423            }
424        })?;
425
426        debug!(runtime = %self.runtime_path.display(), %container_id, "spawning interactive namespace container");
427
428        let child = self
429            .build_run_command(&bundle, &container_id, Some(&socket_path))
430            .stdin(std::process::Stdio::null())
431            .stdout(std::process::Stdio::null())
432            .stderr(std::process::Stdio::piped())
433            .kill_on_drop(true)
434            .spawn()
435            .map_err(|e| SandboxError::RuntimeFailed {
436                reason: format!("spawn failed: {e}"),
437            })?;
438
439        // The runtime connects to our socket and sends the PTY controller
440        // fd via SCM_RIGHTS.
441        let (stream, _) = listener.accept().map_err(|e| SandboxError::RuntimeFailed {
442            reason: format!("accept console socket: {e}"),
443        })?;
444
445        let controller = recv_pty_controller(&stream)?;
446
447        Ok(PtySession {
448            controller,
449            child,
450            _bundle: bundle,
451        })
452    }
453
454    /// Build a [`ContainerConfig`] from synwire-core's `SandboxConfig`.
455    ///
456    /// Derives namespace flags, bind mounts, and security parameters from the
457    /// high-level configuration.
458    #[must_use]
459    pub fn build_config(
460        sandbox: &synwire_core::agents::sandbox::SandboxConfig,
461        command: impl Into<String>,
462        args: Vec<String>,
463    ) -> ContainerConfig {
464        use synwire_core::agents::sandbox::SeccompProfile;
465
466        let network_enabled = sandbox.network.as_ref().is_some_and(|n| n.enabled);
467
468        let mut clone_flags = vec![
469            CloneFlag::NewUts,
470            CloneFlag::NewIpc,
471            CloneFlag::NewNs,
472            CloneFlag::NewCgroup,
473            CloneFlag::NewPid,
474        ];
475        if !network_enabled {
476            clone_flags.push(CloneFlag::NewNet);
477        }
478
479        // Build bind mounts from filesystem config.
480        let bind_mounts = sandbox
481            .filesystem
482            .as_ref()
483            .map(|fs| {
484                let mut mounts: Vec<BindMount> = fs
485                    .allow_write
486                    .iter()
487                    .filter_map(|p| {
488                        let abs = to_absolute(p)?;
489                        Some(BindMount {
490                            source: abs.clone(),
491                            target: abs,
492                            read_only: false,
493                        })
494                    })
495                    .collect();
496                if fs.inherit_readable {
497                    mounts.push(BindMount {
498                        source: "/".into(),
499                        target: "/".into(),
500                        read_only: true,
501                    });
502                }
503                mounts
504            })
505            .unwrap_or_default();
506
507        // Build environment.
508        let mut env: std::collections::HashMap<String, String> = if sandbox.env.inherit_parent {
509            std::env::vars().collect()
510        } else {
511            std::collections::HashMap::new()
512        };
513        for k in &sandbox.env.unset {
514            let _ = env.remove(k);
515        }
516        env.extend(sandbox.env.set.clone());
517
518        // Security.
519        let seccomp = match &sandbox.security.seccomp {
520            SeccompProfile::Unconfined => ContainerSeccomp::Unconfined,
521            SeccompProfile::Localhost { path } => {
522                ContainerSeccomp::Localhost { path: path.clone() }
523            }
524            SeccompProfile::RuntimeDefault | _ => ContainerSeccomp::RuntimeDefault,
525        };
526
527        let security = ContainerSecurity {
528            seccomp,
529            capabilities_drop: sandbox.security.capabilities.drop.clone(),
530            capabilities_add: sandbox.security.capabilities.add.clone(),
531            no_new_privileges: sandbox.security.no_new_privileges,
532            run_as_user: sandbox.security.run_as_user,
533            run_as_group: sandbox.security.run_as_group,
534        };
535
536        ContainerConfig {
537            clone_flags,
538            network_isolation: !network_enabled,
539            user_namespace: true,
540            cgroup_namespace: true,
541            bind_mounts,
542            cgroup_path: None,
543            security,
544            command: command.into(),
545            args,
546            env,
547        }
548    }
549
550    // ── internal ──────────────────────────────────────────────────────────
551
552    /// Build a `Command` for `<runtime> run` with runtime-specific flags.
553    ///
554    /// gVisor (`runsc`) needs `--rootless` and `--network=none` (when network
555    /// isolation is requested) since it manages namespaces internally.
556    fn build_run_command(
557        &self,
558        bundle: &tempfile::TempDir,
559        container_id: &str,
560        console_socket: Option<&Path>,
561    ) -> Command {
562        let mut cmd = Command::new(&self.runtime_path);
563
564        // gVisor-specific global flags (before the subcommand).
565        if self.runtime_kind == OciRuntime::Gvisor {
566            let platform_flag = match self.gvisor_platform {
567                GvisorPlatform::Systrap => "--platform=systrap",
568                GvisorPlatform::Ptrace => "--platform=ptrace",
569            };
570            let _cmd = cmd
571                .arg("--rootless")
572                .arg("--network=host")
573                .arg(platform_flag);
574        }
575
576        let _cmd = cmd.arg("run");
577
578        if let Some(sock) = console_socket {
579            let _cmd = cmd.arg("--console-socket").arg(sock);
580        }
581
582        let _cmd = cmd.arg("--bundle").arg(bundle.path()).arg(container_id);
583
584        cmd
585    }
586
587    /// Create a temporary OCI bundle directory with `config.json` and `rootfs/`.
588    fn prepare_bundle(
589        &self,
590        config: &ContainerConfig,
591        terminal: bool,
592    ) -> Result<(tempfile::TempDir, String), SandboxError> {
593        let bundle = tempfile::TempDir::with_prefix("synwire-").map_err(|e| {
594            SandboxError::RuntimeFailed {
595                reason: format!("create bundle dir: {e}"),
596            }
597        })?;
598        let rootfs = bundle.path().join("rootfs");
599        let container_id = uuid::Uuid::new_v4().to_string();
600
601        // Generate /etc/passwd and /etc/group so the current user is
602        // resolvable inside the container (whoami, id, ls -la all work).
603        let passwd_path = bundle.path().join("passwd");
604        let group_path = bundle.path().join("group");
605        generate_user_files(&passwd_path, &group_path).map_err(|e| {
606            SandboxError::RuntimeFailed {
607                reason: format!("generate user files: {e}"),
608            }
609        })?;
610
611        let spec = build_oci_spec(
612            config,
613            terminal,
614            &passwd_path,
615            &group_path,
616            self.runtime_kind,
617        )
618        .map_err(|e| SandboxError::RuntimeFailed {
619            reason: format!("build OCI spec: {e}"),
620        })?;
621
622        // Create mount-point directories inside rootfs.
623        prepare_rootfs(&rootfs, &spec).map_err(|e| SandboxError::RuntimeFailed {
624            reason: format!("prepare rootfs: {e}"),
625        })?;
626
627        let spec_json = serde_json::to_string_pretty(&spec).map_err(SandboxError::SerdeError)?;
628        std::fs::write(bundle.path().join("config.json"), spec_json).map_err(|e| {
629            SandboxError::RuntimeFailed {
630                reason: format!("write config.json: {e}"),
631            }
632        })?;
633
634        Ok((bundle, container_id))
635    }
636}
637
638// ── OCI spec generation ───────────────────────────────────────────────────────
639
640/// Convert a capability name string (e.g. `"KILL"`, `"CAP_KILL"`) to a
641/// [`Capability`] enum variant. Returns `None` for unrecognised names.
642fn parse_capability(name: &str) -> Option<Capability> {
643    let canon = format!("CAP_{}", name.trim_start_matches("CAP_"));
644    // Capability implements Deserialize which handles the "CAP_*" format.
645    serde_json::from_value(serde_json::Value::String(canon)).ok()
646}
647
648/// Build an OCI runtime spec from a [`ContainerConfig`].
649#[allow(clippy::too_many_lines)]
650fn build_oci_spec(
651    config: &ContainerConfig,
652    terminal: bool,
653    passwd_path: &Path,
654    group_path: &Path,
655    runtime: OciRuntime,
656) -> Result<Spec, oci_spec::OciSpecError> {
657    let uid = nix::unistd::getuid().as_raw();
658    let gid = nix::unistd::getgid().as_raw();
659
660    // Build process.args: [command, ...args]
661    let mut args = vec![config.command.clone()];
662    args.extend(config.args.clone());
663
664    // Build process.env: ["KEY=val", ...]
665    let env: Vec<String> = config.env.iter().map(|(k, v)| format!("{k}={v}")).collect();
666
667    // Build linux.namespaces
668    let mut namespaces = Vec::new();
669    for flag in &config.clone_flags {
670        let ns_type = match flag {
671            CloneFlag::NewPid => LinuxNamespaceType::Pid,
672            CloneFlag::NewUts => LinuxNamespaceType::Uts,
673            CloneFlag::NewIpc => LinuxNamespaceType::Ipc,
674            CloneFlag::NewNs => LinuxNamespaceType::Mount,
675            CloneFlag::NewCgroup => LinuxNamespaceType::Cgroup,
676            CloneFlag::NewNet => LinuxNamespaceType::Network,
677            CloneFlag::NewUser => continue, // handled separately below
678        };
679        namespaces.push(LinuxNamespaceBuilder::default().typ(ns_type).build()?);
680    }
681    // gVisor manages its own user namespace internally — don't request one
682    // in the OCI spec or it will conflict with runsc's sandbox model.
683    if config.user_namespace && runtime != OciRuntime::Gvisor {
684        namespaces.push(
685            LinuxNamespaceBuilder::default()
686                .typ(LinuxNamespaceType::User)
687                .build()?,
688        );
689    }
690
691    // Build mounts
692    let mut mounts = essential_mounts()?;
693    for bm in &config.bind_mounts {
694        let mut opts = vec!["rbind".to_string()];
695        if bm.read_only {
696            opts.push("ro".to_string());
697        }
698        mounts.push(
699            MountBuilder::default()
700                .destination(&bm.target)
701                .typ("bind")
702                .source(&bm.source)
703                .options(opts)
704                .build()?,
705        );
706    }
707    // If no explicit mounts but user wants host fs, add key dirs.
708    if config.bind_mounts.is_empty() {
709        for dir in &[
710            "/usr", "/bin", "/sbin", "/lib", "/lib64", "/etc", "/home", "/tmp",
711        ] {
712            if Path::new(dir).exists() {
713                mounts.push(
714                    MountBuilder::default()
715                        .destination(*dir)
716                        .typ("bind")
717                        .source(*dir)
718                        .options(vec!["rbind".into(), "ro".into()])
719                        .build()?,
720                );
721            }
722        }
723    }
724
725    // Overlay /etc/passwd and /etc/group with generated files so the
726    // current user is resolvable inside the container. These are added
727    // AFTER the /etc bind mount so they take precedence.
728    mounts.push(
729        MountBuilder::default()
730            .destination("/etc/passwd")
731            .typ("bind")
732            .source(passwd_path)
733            .options(vec!["bind".into(), "ro".into()])
734            .build()?,
735    );
736    mounts.push(
737        MountBuilder::default()
738            .destination("/etc/group")
739            .typ("bind")
740            .source(group_path)
741            .options(vec!["bind".into(), "ro".into()])
742            .build()?,
743    );
744
745    // Build capabilities
746    let caps = build_capabilities(&config.security)?;
747
748    // Build seccomp (optional)
749    let seccomp = build_seccomp(&config.security.seccomp)?;
750
751    let masked_paths = vec![
752        "/proc/acpi".into(),
753        "/proc/asound".into(),
754        "/proc/kcore".into(),
755        "/proc/keys".into(),
756        "/proc/latency_stats".into(),
757        "/proc/timer_list".into(),
758        "/proc/timer_stats".into(),
759        "/proc/sched_debug".into(),
760        "/proc/scsi".into(),
761        "/sys/firmware".into(),
762        "/sys/devices/virtual/powercap".into(),
763    ];
764    let readonly_paths = vec![
765        "/proc/bus".into(),
766        "/proc/fs".into(),
767        "/proc/irq".into(),
768        "/proc/sys".into(),
769        "/proc/sysrq-trigger".into(),
770    ];
771
772    let mut linux_builder = LinuxBuilder::default();
773    linux_builder = linux_builder
774        .namespaces(namespaces)
775        .masked_paths(masked_paths)
776        .readonly_paths(readonly_paths);
777
778    // UID/GID mappings only apply when we explicitly create a user namespace
779    // (runc). gVisor handles UID mapping internally via its --rootless flag.
780    if config.user_namespace && runtime != OciRuntime::Gvisor {
781        // Rootless user namespaces only allow a single UID/GID mapping
782        // entry (without the setuid `newuidmap` helper). runc's init
783        // process requires UID 0, so we map containerID 0 → the host
784        // user's real UID. The process runs as UID 0 inside the
785        // namespace, which the kernel translates to the real UID for all
786        // host-side operations (file ownership in bind mounts, etc.).
787        //
788        // The generated /etc/passwd maps UID 0 to the real username so
789        // `whoami`, `id`, and `ls -la` show the expected user identity.
790        linux_builder = linux_builder
791            .uid_mappings(vec![
792                LinuxIdMappingBuilder::default()
793                    .container_id(0u32)
794                    .host_id(uid)
795                    .size(1u32)
796                    .build()?,
797            ])
798            .gid_mappings(vec![
799                LinuxIdMappingBuilder::default()
800                    .container_id(0u32)
801                    .host_id(gid)
802                    .size(1u32)
803                    .build()?,
804            ]);
805    }
806
807    // gVisor provides its own syscall filtering via its sentry kernel —
808    // applying an OCI seccomp profile on top is redundant and can cause
809    // compatibility issues with runsc's internal syscall handling.
810    if runtime != OciRuntime::Gvisor
811        && let Some(sec) = seccomp
812    {
813        linux_builder = linux_builder.seccomp(sec);
814    }
815
816    let linux = linux_builder.build()?;
817
818    // In a user namespace the process runs as UID 0 (mapped to the host UID).
819    // Without a user namespace, run as the real UID directly.
820    #[allow(clippy::similar_names)]
821    let container_uid = if config.user_namespace { 0 } else { uid };
822    #[allow(clippy::similar_names)]
823    let container_gid = if config.user_namespace { 0 } else { gid };
824
825    let user = UserBuilder::default()
826        .uid(config.security.run_as_user.unwrap_or(container_uid))
827        .gid(config.security.run_as_group.unwrap_or(container_gid))
828        .build()?;
829
830    let process = ProcessBuilder::default()
831        .terminal(terminal)
832        .user(user)
833        .args(args)
834        .env(env)
835        .cwd("/")
836        .capabilities(caps)
837        .no_new_privileges(config.security.no_new_privileges)
838        .build()?;
839
840    let root = RootBuilder::default()
841        .path("rootfs")
842        .readonly(true)
843        .build()?;
844
845    SpecBuilder::default()
846        .version("1.0.2")
847        .process(process)
848        .root(root)
849        .hostname("synwire")
850        .mounts(mounts)
851        .linux(linux)
852        .build()
853}
854
855/// Essential OCI mounts (proc, dev, devpts, sysfs).
856fn essential_mounts() -> Result<Vec<Mount>, oci_spec::OciSpecError> {
857    Ok(vec![
858        MountBuilder::default()
859            .destination("/proc")
860            .typ("proc")
861            .source("proc")
862            .options(vec!["nosuid".into(), "noexec".into(), "nodev".into()])
863            .build()?,
864        MountBuilder::default()
865            .destination("/dev")
866            .typ("tmpfs")
867            .source("tmpfs")
868            .options(vec![
869                "nosuid".into(),
870                "strictatime".into(),
871                "mode=755".into(),
872                "size=65536k".into(),
873            ])
874            .build()?,
875        MountBuilder::default()
876            .destination("/dev/pts")
877            .typ("devpts")
878            .source("devpts")
879            .options(vec![
880                "nosuid".into(),
881                "noexec".into(),
882                "newinstance".into(),
883                "ptmxmode=0666".into(),
884                "mode=0620".into(),
885            ])
886            .build()?,
887        MountBuilder::default()
888            .destination("/dev/shm")
889            .typ("tmpfs")
890            .source("shm")
891            .options(vec![
892                "nosuid".into(),
893                "noexec".into(),
894                "nodev".into(),
895                "mode=1777".into(),
896                "size=65536k".into(),
897            ])
898            .build()?,
899        MountBuilder::default()
900            .destination("/dev/mqueue")
901            .typ("mqueue")
902            .source("mqueue")
903            .options(vec!["nosuid".into(), "noexec".into(), "nodev".into()])
904            .build()?,
905        MountBuilder::default()
906            .destination("/sys")
907            .typ("none")
908            .source("/sys")
909            .options(vec![
910                "rbind".into(),
911                "nosuid".into(),
912                "noexec".into(),
913                "nodev".into(),
914                "ro".into(),
915            ])
916            .build()?,
917    ])
918}
919
920/// Build OCI `process.capabilities` from security config.
921fn build_capabilities(
922    security: &ContainerSecurity,
923) -> Result<oci_spec::runtime::LinuxCapabilities, oci_spec::OciSpecError> {
924    let drop_all = security.capabilities_drop.iter().any(|c| c == "ALL");
925    let caps: oci_spec::runtime::Capabilities = if drop_all {
926        security
927            .capabilities_add
928            .iter()
929            .filter_map(|c| parse_capability(c))
930            .collect()
931    } else {
932        // Minimal capability set for agent sandboxes. Intentionally much
933        // tighter than Docker's default — agents run as a single user and
934        // don't need DAC_OVERRIDE, CHOWN, FOWNER, SETUID/GID, or
935        // SYS_CHROOT (runc handles pivot_root before the process starts).
936        //
937        // CAP_KILL: signal child processes spawned by the agent.
938        // CAP_NET_BIND_SERVICE: bind ports <1024 if networking is enabled.
939        // CAP_SETPCAP: drop further capabilities (supports no_new_privileges).
940        let mut caps: oci_spec::runtime::Capabilities = [
941            Capability::Kill,
942            Capability::NetBindService,
943            Capability::Setpcap,
944        ]
945        .into_iter()
946        .collect();
947
948        for drop in &security.capabilities_drop {
949            if let Some(cap) = parse_capability(drop) {
950                let _ = caps.remove(&cap);
951            }
952        }
953        caps
954    };
955
956    LinuxCapabilitiesBuilder::default()
957        .bounding(caps.clone())
958        .effective(caps.clone())
959        .inheritable(caps.clone())
960        .permitted(caps.clone())
961        .ambient(caps)
962        .build()
963}
964
965/// Build OCI `linux.seccomp` from seccomp config. Returns `None` for `Unconfined`.
966fn build_seccomp(
967    seccomp: &ContainerSeccomp,
968) -> Result<Option<oci_spec::runtime::LinuxSeccomp>, oci_spec::OciSpecError> {
969    match seccomp {
970        ContainerSeccomp::Unconfined => Ok(None),
971        ContainerSeccomp::RuntimeDefault => {
972            let syscall = LinuxSyscallBuilder::default()
973                .names(vec![
974                    "kexec_file_load".into(),
975                    "kexec_load".into(),
976                    "open_by_handle_at".into(),
977                    "perf_event_open".into(),
978                    "process_vm_readv".into(),
979                    "process_vm_writev".into(),
980                    "ptrace".into(),
981                    "reboot".into(),
982                    "request_key".into(),
983                    "set_mempolicy".into(),
984                    "swapon".into(),
985                    "swapoff".into(),
986                    "syslog".into(),
987                    "umount2".into(),
988                    "unshare".into(),
989                    "uselib".into(),
990                    "userfaultfd".into(),
991                ])
992                .action(LinuxSeccompAction::ScmpActErrno)
993                .errno_ret(1u32)
994                .build()?;
995
996            Ok(Some(
997                LinuxSeccompBuilder::default()
998                    .default_action(LinuxSeccompAction::ScmpActAllow)
999                    .architectures(vec![
1000                        Arch::ScmpArchX86_64,
1001                        Arch::ScmpArchX86,
1002                        Arch::ScmpArchAarch64,
1003                    ])
1004                    .syscalls(vec![syscall])
1005                    .build()?,
1006            ))
1007        }
1008        ContainerSeccomp::Localhost { path } => {
1009            // Load the profile from file — expected to be in OCI seccomp format.
1010            Ok(std::fs::read_to_string(path)
1011                .ok()
1012                .and_then(|s| serde_json::from_str(&s).ok()))
1013        }
1014    }
1015}
1016
1017/// Create mount-point directories inside rootfs for each OCI mount.
1018fn prepare_rootfs(rootfs: &Path, spec: &Spec) -> std::io::Result<()> {
1019    std::fs::create_dir_all(rootfs)?;
1020    if let Some(mounts) = spec.mounts() {
1021        for mount in mounts {
1022            let dest = mount.destination();
1023            let target = rootfs.join(dest.strip_prefix("/").unwrap_or(dest));
1024            std::fs::create_dir_all(&target)?;
1025        }
1026    }
1027    Ok(())
1028}
1029
1030/// Receive the PTY controller fd from the OCI runtime via `SCM_RIGHTS`.
1031///
1032/// The runtime sends exactly one fd (the PTY controller) over the console
1033/// socket after creating the PTY inside the container.
1034fn recv_pty_controller(stream: &std::os::unix::net::UnixStream) -> Result<OwnedFd, SandboxError> {
1035    use nix::sys::socket::{ControlMessageOwned, MsgFlags, recvmsg};
1036    use std::os::fd::{AsRawFd, FromRawFd};
1037
1038    let mut buf = [0u8; 1];
1039    let mut iov = [std::io::IoSliceMut::new(&mut buf)];
1040    let mut cmsg_buf = nix::cmsg_space!(std::os::fd::RawFd);
1041
1042    let msg = recvmsg::<()>(
1043        stream.as_raw_fd(),
1044        &mut iov,
1045        Some(&mut cmsg_buf),
1046        MsgFlags::empty(),
1047    )
1048    .map_err(|e| SandboxError::RuntimeFailed {
1049        reason: format!("recvmsg on console socket: {e}"),
1050    })?;
1051
1052    let iter = msg.cmsgs().map_err(|e| SandboxError::RuntimeFailed {
1053        reason: format!("parse control messages: {e}"),
1054    })?;
1055    for cmsg in iter {
1056        if let ControlMessageOwned::ScmRights(fds) = cmsg
1057            && let Some(&raw_fd) = fds.first()
1058        {
1059            // SAFETY: The fd was received via SCM_RIGHTS from the OCI
1060            // runtime's console socket protocol. The runtime guarantees
1061            // this is a valid, newly-created PTY controller fd that we
1062            // now exclusively own.
1063            #[allow(unsafe_code)]
1064            let owned = unsafe { std::os::fd::OwnedFd::from_raw_fd(raw_fd) };
1065            return Ok(owned);
1066        }
1067    }
1068
1069    Err(SandboxError::RuntimeFailed {
1070        reason: "no PTY controller fd received from runtime".into(),
1071    })
1072}
1073
1074/// Determine the best gVisor platform, with process-wide caching.
1075///
1076/// First checks the cache. If not yet probed, runs a trivial `runsc` container
1077/// with `--platform=systrap`. If it succeeds, caches `Systrap`. If it fails
1078/// (typically `PTRACE_ATTACH EPERM` from the `CAP_SYS_PTRACE` bug in rootless
1079/// + host-network mode), falls back to `Ptrace`, logs a warning, and caches
1080/// the result so all future containers skip the probe.
1081#[allow(clippy::doc_lazy_continuation)]
1082fn resolve_gvisor_platform(runsc_path: &Path) -> GvisorPlatform {
1083    use std::sync::atomic::Ordering;
1084
1085    let cached = GVISOR_PLATFORM_CACHE.load(Ordering::Relaxed);
1086    if cached == PLATFORM_SYSTRAP {
1087        return GvisorPlatform::Systrap;
1088    }
1089    if cached == PLATFORM_PTRACE {
1090        return GvisorPlatform::Ptrace;
1091    }
1092
1093    // Probe: try systrap with a trivial container.
1094    debug!("probing gVisor systrap platform");
1095    if probe_gvisor_platform(runsc_path, "systrap") {
1096        debug!("gVisor systrap platform works — using for all future containers");
1097        GVISOR_PLATFORM_CACHE.store(PLATFORM_SYSTRAP, Ordering::Relaxed);
1098        return GvisorPlatform::Systrap;
1099    }
1100
1101    // Systrap failed. Try ptrace to confirm it works at all.
1102    if probe_gvisor_platform(runsc_path, "ptrace") {
1103        warn!(
1104            "gVisor systrap platform failed (likely missing CAP_SYS_PTRACE in \
1105             rootless+host-network mode — see runsc/sandbox/sandbox.go \
1106             ConfigureCmdForRootless). Falling back to ptrace platform for all \
1107             future gVisor containers in this process."
1108        );
1109        GVISOR_PLATFORM_CACHE.store(PLATFORM_PTRACE, Ordering::Relaxed);
1110        return GvisorPlatform::Ptrace;
1111    }
1112
1113    // Neither works — default to ptrace and let the actual spawn surface the error.
1114    warn!("gVisor probe failed for both systrap and ptrace — defaulting to ptrace");
1115    GVISOR_PLATFORM_CACHE.store(PLATFORM_PTRACE, Ordering::Relaxed);
1116    GvisorPlatform::Ptrace
1117}
1118
1119/// Run a trivial `runsc --platform=<p> run` and check if it exits 0.
1120fn probe_gvisor_platform(runsc_path: &Path, platform: &str) -> bool {
1121    let Ok(bundle_dir) = tempfile::TempDir::with_prefix("synwire-") else {
1122        return false;
1123    };
1124    let rootfs = bundle_dir.path().join("rootfs");
1125    if std::fs::create_dir_all(&rootfs).is_err() {
1126        return false;
1127    }
1128
1129    let Ok(spec) = build_gvisor_probe_spec() else {
1130        return false;
1131    };
1132
1133    // Create mount-point directories inside rootfs.
1134    if let Err(_e) = prepare_rootfs(&rootfs, &spec) {
1135        return false;
1136    }
1137
1138    let Ok(spec_json) = serde_json::to_string_pretty(&spec) else {
1139        return false;
1140    };
1141    if std::fs::write(bundle_dir.path().join("config.json"), spec_json).is_err() {
1142        return false;
1143    }
1144
1145    let container_id = format!("probe-{}", uuid::Uuid::new_v4());
1146    let result = std::process::Command::new(runsc_path)
1147        .arg("--rootless")
1148        .arg("--network=host")
1149        .arg(format!("--platform={platform}"))
1150        .arg("run")
1151        .arg("--bundle")
1152        .arg(bundle_dir.path())
1153        .arg(&container_id)
1154        .stdin(std::process::Stdio::null())
1155        .stdout(std::process::Stdio::null())
1156        .stderr(std::process::Stdio::null())
1157        .status();
1158
1159    match result {
1160        Ok(status) => status.success(),
1161        Err(_) => false,
1162    }
1163}
1164
1165/// Build a minimal OCI spec for the gVisor platform probe (runs `/bin/true`).
1166fn build_gvisor_probe_spec() -> Result<Spec, oci_spec::OciSpecError> {
1167    let uid = nix::unistd::getuid().as_raw();
1168    let gid = nix::unistd::getgid().as_raw();
1169
1170    let empty_caps: oci_spec::runtime::Capabilities = std::collections::HashSet::default();
1171    let caps = LinuxCapabilitiesBuilder::default()
1172        .bounding(empty_caps.clone())
1173        .effective(empty_caps.clone())
1174        .inheritable(empty_caps.clone())
1175        .permitted(empty_caps.clone())
1176        .ambient(empty_caps)
1177        .build()?;
1178
1179    let process = ProcessBuilder::default()
1180        .terminal(false)
1181        .user(UserBuilder::default().uid(0u32).gid(0u32).build()?)
1182        .args(vec!["/bin/true".into()])
1183        .env(vec!["PATH=/usr/bin:/bin".into()])
1184        .cwd("/")
1185        .capabilities(caps)
1186        .no_new_privileges(true)
1187        .build()?;
1188
1189    let root = RootBuilder::default()
1190        .path("rootfs")
1191        .readonly(true)
1192        .build()?;
1193
1194    let namespaces = vec![
1195        LinuxNamespaceBuilder::default()
1196            .typ(LinuxNamespaceType::Pid)
1197            .build()?,
1198        LinuxNamespaceBuilder::default()
1199            .typ(LinuxNamespaceType::Mount)
1200            .build()?,
1201        LinuxNamespaceBuilder::default()
1202            .typ(LinuxNamespaceType::Ipc)
1203            .build()?,
1204        LinuxNamespaceBuilder::default()
1205            .typ(LinuxNamespaceType::Uts)
1206            .build()?,
1207        LinuxNamespaceBuilder::default()
1208            .typ(LinuxNamespaceType::Cgroup)
1209            .build()?,
1210    ];
1211
1212    let linux = LinuxBuilder::default()
1213        .namespaces(namespaces)
1214        .uid_mappings(vec![
1215            LinuxIdMappingBuilder::default()
1216                .container_id(0u32)
1217                .host_id(uid)
1218                .size(1u32)
1219                .build()?,
1220        ])
1221        .gid_mappings(vec![
1222            LinuxIdMappingBuilder::default()
1223                .container_id(0u32)
1224                .host_id(gid)
1225                .size(1u32)
1226                .build()?,
1227        ])
1228        .build()?;
1229
1230    SpecBuilder::default()
1231        .version("1.0.2")
1232        .process(process)
1233        .root(root)
1234        .mounts(probe_mounts()?)
1235        .linux(linux)
1236        .build()
1237}
1238
1239/// Minimal mount list for gVisor probe — just enough to run `/bin/true`.
1240fn probe_mounts() -> Result<Vec<Mount>, oci_spec::OciSpecError> {
1241    let mut mounts = vec![
1242        MountBuilder::default()
1243            .destination("/proc")
1244            .typ("proc")
1245            .source("proc")
1246            .options(vec!["nosuid".into(), "noexec".into(), "nodev".into()])
1247            .build()?,
1248        MountBuilder::default()
1249            .destination("/dev")
1250            .typ("tmpfs")
1251            .source("tmpfs")
1252            .options(vec![
1253                "nosuid".into(),
1254                "strictatime".into(),
1255                "mode=755".into(),
1256                "size=65536k".into(),
1257            ])
1258            .build()?,
1259    ];
1260    for dir in &["/usr", "/bin", "/sbin", "/lib", "/lib64"] {
1261        if Path::new(dir).exists() {
1262            mounts.push(
1263                MountBuilder::default()
1264                    .destination(*dir)
1265                    .typ("bind")
1266                    .source(*dir)
1267                    .options(vec!["rbind".into(), "ro".into()])
1268                    .build()?,
1269            );
1270        }
1271    }
1272    Ok(mounts)
1273}
1274
1275/// Generate minimal `/etc/passwd` and `/etc/group` files for the current user.
1276///
1277/// Includes a `root` entry (required by many tools) and the real user so
1278/// that `whoami`, `id`, `ls -la`, `$HOME`, and `~` all resolve correctly
1279/// inside the container.
1280fn generate_user_files(passwd_path: &Path, group_path: &Path) -> std::io::Result<()> {
1281    // Try to get the real username; fall back to "user".
1282    let username = std::env::var("USER")
1283        .or_else(|_| std::env::var("LOGNAME"))
1284        .unwrap_or_else(|_| "user".into());
1285
1286    let home = std::env::var("HOME").unwrap_or_else(|_| format!("/home/{username}"));
1287
1288    let shell = std::env::var("SHELL").unwrap_or_else(|_| "/bin/sh".into());
1289
1290    let gid = nix::unistd::getgid().as_raw();
1291
1292    // Try to resolve the group name from the host /etc/group.
1293    let groupname = resolve_group_name(gid).unwrap_or_else(|| username.clone());
1294
1295    // In a rootless user namespace the process runs as UID 0 inside (which
1296    // is mapped to the real host UID). We make `whoami` and `id` show the
1297    // real username by mapping UID 0 to the real user's name, home, and
1298    // shell. This is the same trick Podman uses for rootless containers.
1299    //
1300    // passwd format: name:x:uid:gid:gecos:home:shell
1301    let passwd = format!(
1302        "{username}:x:0:0::{home}:{shell}\nnobody:x:65534:65534:nobody:/nonexistent:/sbin/nologin\n"
1303    );
1304
1305    // group format: name:x:gid:members
1306    let group = format!("{groupname}:x:0:{username}\nnobody:x:65534:\n");
1307
1308    std::fs::write(passwd_path, passwd)?;
1309    std::fs::write(group_path, group)?;
1310    Ok(())
1311}
1312
1313/// Try to resolve a GID to a group name by scanning `/etc/group`.
1314fn resolve_group_name(gid: u32) -> Option<String> {
1315    let content = std::fs::read_to_string("/etc/group").ok()?;
1316    for line in content.lines() {
1317        let mut parts = line.splitn(4, ':');
1318        let name = parts.next()?;
1319        let _ = parts.next(); // password
1320        let group_gid: u32 = parts.next()?.parse().ok()?;
1321        if group_gid == gid {
1322            return Some(name.to_string());
1323        }
1324    }
1325    None
1326}
1327
1328// ── helpers ────────────────────────────────────────────────────────────────
1329
1330fn which_binary(name: &str) -> Result<PathBuf, ()> {
1331    which::which(name).map_err(|_| ())
1332}
1333
1334fn to_absolute(path: &str) -> Option<String> {
1335    let p = std::path::Path::new(path);
1336    if p.is_absolute() {
1337        return Some(path.to_string());
1338    }
1339    std::env::current_dir()
1340        .ok()
1341        .map(|cwd| cwd.join(p).display().to_string())
1342}