rustwide 0.24.0

use crate::Workspace;
use crate::cmd::{Command, CommandError, ProcessLinesActions, ProcessOutput, ProcessStatistics};
use log::{error, info};
use serde::Deserialize;
use std::{
    error::Error,
    fmt,
    ops::RangeInclusive,
    path::{Path, PathBuf},
    time::Duration,
};

/// The Docker image used for sandboxing.
#[derive(Debug)]
pub struct SandboxImage {
    name: String,
}

impl SandboxImage {
    /// Load a local image present in the host machine.
    ///
    /// If the image is not available locally an error will be returned instead.
    pub fn local(name: &str) -> Result<Self, CommandError> {
        let image = SandboxImage { name: name.into() };
        info!("sandbox image is local, skipping pull");
        image.ensure_exists_locally()?;
        Ok(image)
    }

    /// Pull an image from its Docker registry.
    ///
    /// This will access the network to download the image from the registry. If pulling fails an
    /// error will be returned instead.
    pub fn remote(name: &str) -> Result<Self, CommandError> {
        let mut image = SandboxImage { name: name.into() };
        info!("pulling image {name} from Docker Hub");
        Command::new_workspaceless("docker")
            .args(&["pull", name])
            .run()
            .map_err(|e| CommandError::SandboxImagePullFailed(Box::new(e)))?;
        if let Some(name_with_hash) = image.get_name_with_hash() {
            image.name = name_with_hash;
            info!("pulled image {}", image.name);
        }
        image.ensure_exists_locally()?;
        Ok(image)
    }

    fn ensure_exists_locally(&self) -> Result<(), CommandError> {
        info!("checking the image {} is available locally", self.name);
        Command::new_workspaceless("docker")
            .args(&["image", "inspect", &self.name])
            .log_output(false)
            .run()
            .map_err(|e| CommandError::SandboxImageMissing(Box::new(e)))?;
        Ok(())
    }

    /// Get the image name with its hash, if available.
    /// In case of a github package registry image, something like:
    ///    ghcr.io/rust-lang/crates-build-env/linux@sha256:61361fe0a...
    pub fn get_name_with_hash(&self) -> Option<String> {
        Command::new_workspaceless("docker")
            .args(&[
                "inspect",
                &self.name,
                "--format",
                "{{index .RepoDigests 0}}",
            ])
            .log_output(false)
            .run_capture()
            .ok()?
            .stdout_lines()
            .first()
            .cloned()
    }
}

/// Whether to mount a path in the sandbox with write permissions or not.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum MountKind {
    /// Allow the sandboxed code to change the mounted data.
    ReadWrite,
    /// Prevent the sandboxed code from changing the mounted data.
    ReadOnly,
}

#[derive(Clone)]
struct MountConfig {
    host_path: PathBuf,
    sandbox_path: PathBuf,
    perm: MountKind,
}

impl MountConfig {
    fn host_path(&self, workspace: &Workspace) -> Result<PathBuf, CommandError> {
        if let Some(container) = workspace.current_container() {
            // If we're inside a Docker container we'll need to remap the mount sources to point to
            // the directories in the host system instead of the containers. To do that we try to
            // see if the mount source is inside an existing mount point, and "rebase" the path.
            let inside_container_path = crate::utils::normalize_path(&self.host_path);
            for mount in container.mounts() {
                let dest = crate::utils::normalize_path(Path::new(mount.destination()));
                if let Ok(shared) = inside_container_path.strip_prefix(&dest) {
                    return Ok(Path::new(mount.source()).join(shared));
                }
            }
            Err(CommandError::WorkspaceNotMountedCorrectly)
        } else {
            Ok(crate::utils::normalize_path(&self.host_path))
        }
    }

    fn to_volume_arg(&self, workspace: &Workspace) -> Result<String, CommandError> {
        let perm = match self.perm {
            MountKind::ReadWrite => "rw",
            MountKind::ReadOnly => "ro",
        };
        Ok(format!(
            "{}:{}:{},Z",
            self.host_path(workspace)?.to_string_lossy(),
            self.sandbox_path.to_string_lossy(),
            perm
        ))
    }

    fn to_mount_arg(&self, workspace: &Workspace) -> Result<String, CommandError> {
        let mut opts_with_leading_comma = vec![];

        if self.perm == MountKind::ReadOnly {
            opts_with_leading_comma.push(",readonly");
        }

        Ok(format!(
            "type=bind,src={},dst={}{}",
            self.host_path(workspace)?.to_string_lossy(),
            self.sandbox_path.to_string_lossy(),
            opts_with_leading_comma.join(""),
        ))
    }
}

/// The sandbox builder allows to configure a sandbox, used later in a
/// [`Command`](struct.Command.html).
#[derive(Clone)]
pub struct SandboxBuilder {
    mounts: Vec<MountConfig>,
    env: Vec<(String, String)>,
    memory_limit: Option<usize>,
    cpu_limit: Option<f32>,
    cpuset_cpus: Option<RangeInclusive<usize>>,
    workdir: Option<String>,
    user: Option<String>,
    cmd: Vec<String>,
    enable_networking: bool,
}

impl SandboxBuilder {
    /// Create a new sandbox builder.
    pub fn new() -> Self {
        Self {
            mounts: Vec::new(),
            env: Vec::new(),
            workdir: None,
            memory_limit: None,
            cpu_limit: None,
            cpuset_cpus: None,
            user: None,
            cmd: Vec::new(),
            enable_networking: true,
        }
    }

    /// Mount a path inside the sandbox. It's possible to choose whether to mount the path
    /// read-only or writeable through the [`MountKind`](enum.MountKind.html) enum.
    pub fn mount(mut self, host_path: &Path, sandbox_path: &Path, kind: MountKind) -> Self {
        self.mounts.push(MountConfig {
            host_path: host_path.into(),
            sandbox_path: sandbox_path.into(),
            perm: kind,
        });
        self
    }

    /// Enable or disable the sandbox's memory limit. When the processes inside the sandbox use
    /// more memory than the limit the sandbox will be killed.
    ///
    /// By default no memory limit is present, and its size is provided in bytes.
    pub fn memory_limit(mut self, limit: Option<usize>) -> Self {
        self.memory_limit = limit;
        self
    }

    /// Enable or disable the sandbox's CPU limit. The value of the limit is the fraction of CPU
    /// cores the sandbox is allowed to use.
    ///
    /// For example, on a 4-core machine, setting a CPU limit of `2.0` will only allow two of the
    /// cores to be used, while a CPU limit of `0.5` will only allow half of a single CPU core to
    /// be used.
    pub fn cpu_limit(mut self, limit: Option<f32>) -> Self {
        self.cpu_limit = limit;
        self
    }

    /// Restrict the sandbox to run on a specific inclusive range of CPU IDs.
    ///
    /// For example, `0..=1` will restrict the sandbox to CPUs 0 and 1 and translate to Docker's
    /// `--cpuset-cpus 0-1`.
    pub fn cpuset_cpus(mut self, cpus: Option<RangeInclusive<usize>>) -> Self {
        self.cpuset_cpus = cpus;
        self
    }

    /// Enable or disable the sandbox's networking. When it's disabled processes inside the sandbox
    /// won't be able to reach network service on the Internet or the host machine.
    ///
    /// By default networking is enabled.
    pub fn enable_networking(mut self, enable: bool) -> Self {
        self.enable_networking = enable;
        self
    }

    pub(super) fn env<S1: Into<String>, S2: Into<String>>(mut self, key: S1, value: S2) -> Self {
        self.env.push((key.into(), value.into()));
        self
    }

    pub(super) fn cmd(mut self, cmd: Vec<String>) -> Self {
        self.cmd = cmd;
        self
    }

    pub(super) fn workdir<S: Into<String>>(mut self, workdir: S) -> Self {
        self.workdir = Some(workdir.into());
        self
    }

    pub(super) fn user(mut self, user: u32, group: u32) -> Self {
        self.user = Some(format!("{user}:{group}"));
        self
    }

    #[cfg_attr(
        feature = "tracing",
        tracing::instrument(
            skip_all,
            fields(
                image = %workspace.sandbox_image().name,
                mounts = self.mounts.len(),
                env = self.env.len(),
                memory_limit = ?self.memory_limit,
                cpu_limit = ?self.cpu_limit,
                cpuset_cpus = ?self.cpuset_cpus,
                enable_networking = self.enable_networking,
                command = ?self.cmd,
            )
        )
    )]
    fn create(self, workspace: &Workspace) -> Result<Container<'_>, CommandError> {
        let mut args: Vec<String> = vec!["create".into()];

        // Mounts are container-level config, always on `docker create`
        for mount in &self.mounts {
            std::fs::create_dir_all(&mount.host_path)?;

            // On Windows, we mount paths containing a colon which don't work with `-v`, but on
            // Linux we need the Z flag, which doesn't work with `--mount`, for SELinux relabeling.
            if cfg!(windows) {
                args.push("--mount".into());
                args.push(mount.to_mount_arg(workspace)?)
            } else {
                args.push("-v".into());
                args.push(mount.to_volume_arg(workspace)?)
            }
        }

        // Resource limits and networking are container-level config
        if let Some(limit) = self.memory_limit {
            args.push("-m".into());
            args.push(limit.to_string());
        }

        if let Some(limit) = self.cpu_limit {
            args.push("--cpus".into());
            args.push(limit.to_string());
        }

        if let Some(cpus) = self.cpuset_cpus {
            args.push("--cpuset-cpus".into());
            args.push(format_cpuset_cpus(&cpus));
        }

        if !self.enable_networking {
            args.push("--network".into());
            args.push("none".into());
        }

        if cfg!(windows) {
            args.push("--isolation=process".into());
        }

        args.push(workspace.sandbox_image().name.clone());

        // Use an idle command; the real command runs via `docker exec` so the container stays
        // alive after the command finishes, allowing us to read cgroup metrics.
        args.push("sleep".into());
        args.push("infinity".into());

        let out = Command::new(workspace, "docker")
            .args(&args)
            .run_capture()
            .map_err(|err| CommandError::SandboxContainerCreate(Box::new(err)))?;
        Ok(Container {
            id: out.stdout_lines()[0].clone(),
            workspace,
            cmd: self.cmd,
            env: self.env,
            workdir: self.workdir,
            user: self.user,
        })
    }

    #[allow(clippy::too_many_arguments)]
    #[allow(clippy::type_complexity)]
    #[cfg_attr(
        feature = "tracing",
        tracing::instrument(
            skip_all,
            fields(
                image = %workspace.sandbox_image().name,
                mounts = self.mounts.len(),
                env = self.env.len(),
                memory_limit = ?self.memory_limit,
                cpu_limit = ?self.cpu_limit,
                cpuset_cpus = ?self.cpuset_cpus,
                enable_networking = self.enable_networking,
                command = ?self.cmd,
                capture,
                timeout_secs = ?timeout.map(|timeout| timeout.as_secs()),
                no_output_timeout_secs = ?no_output_timeout.map(|timeout| timeout.as_secs()),
            )
        )
    )]
    pub(super) fn run(
        self,
        workspace: &Workspace,
        timeout: Option<Duration>,
        no_output_timeout: Option<Duration>,
        process_lines: Option<&mut dyn FnMut(&str, &mut ProcessLinesActions)>,
        log_output: bool,
        log_command: bool,
        capture: bool,
    ) -> Result<ProcessOutput, CommandError> {
        let container = self.create(workspace)?;

        // Ensure the container is properly deleted even if something panics
        scopeguard::defer! {{
            if let Err(err) = container.delete() {
                error!("failed to delete container {}", container.id);
                error!("caused by: {err}");
                let mut err: &dyn Error = &err;
                while let Some(cause) = err.source() {
                    error!("caused by: {cause}");
                    err = cause;
                }
            }
        }}

        container.run(
            timeout,
            no_output_timeout,
            process_lines,
            log_output,
            log_command,
            capture,
        )
    }
}

#[derive(Deserialize)]
#[serde(rename_all = "PascalCase")]
struct InspectContainer {
    state: InspectState,
}

#[derive(Deserialize)]
struct InspectState {
    #[serde(rename = "OOMKilled")]
    oom_killed: bool,
}

#[derive(Clone)]
struct Container<'w> {
    // Docker container ID
    id: String,
    workspace: &'w Workspace,
    // Command-level config for `docker exec` (not baked into `docker create`)
    cmd: Vec<String>,
    env: Vec<(String, String)>,
    workdir: Option<String>,
    user: Option<String>,
}

impl fmt::Display for Container<'_> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.id.fmt(f)
    }
}

impl Container<'_> {
    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
    fn inspect(&self) -> Result<InspectContainer, CommandError> {
        let output = Command::new(self.workspace, "docker")
            .args(&["inspect", &self.id])
            .log_output(false)
            .run_capture()?;

        let mut data: Vec<InspectContainer> =
            ::serde_json::from_str(&output.stdout_lines().join("\n"))
                .map_err(CommandError::InvalidDockerInspectOutput)?;
        assert_eq!(data.len(), 1);
        Ok(data.pop().unwrap())
    }

    /// Start the container in detached mode (without `-a`).
    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
    fn start(&self) -> Result<(), CommandError> {
        Command::new(self.workspace, "docker")
            .args(&["start", &self.id])
            .log_output(false)
            .run()
            .map(|_| ())
    }

    /// Stop a running container. Uses `-t 1` to give `sleep infinity` a short grace period.
    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
    fn stop(&self) -> Result<(), CommandError> {
        Command::new(self.workspace, "docker")
            .args(&["stop", "-t", "1", &self.id])
            .log_output(false)
            .run()
            .map(|_| ())
    }

    /// Helper to `docker exec cat <path>` and return stdout lines on success.
    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
    fn exec_cat_file(&self, path: &str) -> Option<Vec<String>> {
        Command::new(self.workspace, "docker")
            .args(&["exec", &self.id, "cat", path])
            .log_output(false)
            .log_command(false)
            .run_capture()
            .ok()
            .map(|o| o.stdout_lines().to_vec())
    }

    /// Best-effort read of peak memory usage from the still-running container.
    /// Tries cgroups v2 first, then falls back to cgroups v1.
    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
    fn read_memory_peak(&self) -> Option<u64> {
        let paths = [
            "/sys/fs/cgroup/memory.peak",                      // v2
            "/sys/fs/cgroup/memory/memory.max_usage_in_bytes", // v1
        ];
        for path in paths {
            if let Some(val) = self
                .exec_cat_file(path)
                .and_then(|lines| lines.first()?.trim().parse::<u64>().ok())
            {
                return Some(val);
            }
        }
        None
    }

    /// Check if any OOM kills occurred in the container's cgroup.
    ///
    /// With the `docker exec` model, the OOM killer may only kill the exec'd process
    /// while `sleep infinity` (PID 1) survives. In that case `docker inspect` won't
    /// report `OOMKilled`, so we check the cgroup events directly.
    /// Tries cgroups v2 first, then falls back to cgroups v1.
    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
    fn check_cgroup_oom(&self) -> bool {
        // Both v1 and v2 expose `oom_kill <count>` — just in different files.
        let paths = [
            "/sys/fs/cgroup/memory.events",             // v2
            "/sys/fs/cgroup/memory/memory.oom_control", // v1
        ];
        for path in paths {
            if let Some(lines) = self.exec_cat_file(path) {
                let found = lines.iter().any(|line| {
                    line.strip_prefix("oom_kill ")
                        .and_then(|rest| rest.trim().parse::<u64>().ok())
                        .is_some_and(|count| count > 0)
                });
                if found {
                    return true;
                }
                // File existed but no OOM — don't try the other version
                return false;
            }
        }
        false
    }

    #[allow(clippy::type_complexity)]
    #[cfg_attr(
        feature = "tracing",
        tracing::instrument(skip_all, fields(container_id = %self.id, capture))
    )]
    fn run(
        &self,
        timeout: Option<Duration>,
        no_output_timeout: Option<Duration>,
        process_lines: Option<&mut dyn FnMut(&str, &mut ProcessLinesActions)>,
        log_output: bool,
        log_command: bool,
        capture: bool,
    ) -> Result<ProcessOutput, CommandError> {
        // Start the container in detached mode (runs `sleep infinity`)
        self.start()?;

        // Build the `docker exec` command with env/workdir/user from the sandbox config
        let mut args: Vec<String> = vec!["exec".into()];

        for (var, value) in &self.env {
            args.push("-e".into());
            args.push(format!("{var}={value}"));
        }

        if let Some(ref workdir) = self.workdir {
            args.push("-w".into());
            args.push(workdir.clone());
        }

        if let Some(ref user) = self.user {
            args.push("--user".into());
            args.push(user.clone());
        }

        args.push(self.id.clone());
        args.extend(self.cmd.iter().cloned());

        let mut cmd = Command::new(self.workspace, "docker")
            .args(&args)
            .timeout(timeout)
            .log_output(log_output)
            .log_command(log_command)
            .no_output_timeout(no_output_timeout);

        if let Some(f) = process_lines {
            cmd = cmd.process_lines(f);
        }

        let res = cmd.run_inner(capture);

        // Read peak memory usage while the container is still running (best-effort)
        let memory_peak = self.read_memory_peak();

        // Check OOM via cgroup events (catches cases where only the exec'd process
        // was killed, leaving the container's init process alive)
        let cgroup_oom = self.check_cgroup_oom();

        // Explicitly stop the container now that we're done reading metrics.
        // The scopeguard will still call `docker rm -f` for final cleanup.
        let _ = self.stop();

        let details = self.inspect()?;

        // Return a different error if the container was killed due to an OOM
        if details.state.oom_killed || cgroup_oom {
            Err(match res {
                Ok(_) | Err(CommandError::ExecutionFailed { .. }) => CommandError::SandboxOOM,
                Err(err) => err,
            })
        } else {
            res.map(|mut output| {
                output.statistics = ProcessStatistics { memory_peak };
                output
            })
        }
    }

    #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
    fn delete(&self) -> Result<(), CommandError> {
        Command::new(self.workspace, "docker")
            .args(&["rm", "-f", &self.id])
            .run()
            .map(|_| ())
    }
}

/// Check whether the Docker daemon is running.
///
/// The Docker daemon is required for sandboxing to work, and this function returns whether the
/// daemon is online and reachable or not. Calling a sandboxed command when the daemon is offline
/// will error too, but this function allows the caller to error earlier.
pub fn docker_running(workspace: &Workspace) -> bool {
    info!("checking if the docker daemon is running");
    Command::new(workspace, "docker")
        .args(&["info"])
        .log_output(false)
        .run()
        .is_ok()
}

fn format_cpuset_cpus(cpus: &RangeInclusive<usize>) -> String {
    format!("{}-{}", cpus.start(), cpus.end())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn formats_cpuset_cpus() {
        assert_eq!(format_cpuset_cpus(&(2..=4)), "2-4");
    }
}