Skip to main content

libcontainer/container/
tenant_builder.rs

1use std::collections::HashMap;
2use std::convert::TryFrom;
3use std::ffi::{OsStr, OsString};
4use std::fs;
5use std::io::BufReader;
6use std::os::fd::{AsRawFd, OwnedFd};
7use std::path::{Path, PathBuf};
8use std::rc::Rc;
9use std::str::FromStr;
10
11use caps::Capability;
12use nix::fcntl::OFlag;
13use nix::unistd::{Pid, pipe2, read};
14use oci_spec::runtime::{
15    Capabilities as SpecCapabilities, Capability as SpecCapability, LinuxBuilder,
16    LinuxCapabilities, LinuxCapabilitiesBuilder, LinuxNamespace, LinuxNamespaceBuilder,
17    LinuxNamespaceType, LinuxSchedulerPolicy, Process, ProcessBuilder, Spec, UserBuilder,
18};
19use procfs::process::Namespace;
20
21use super::Container;
22use super::builder::ContainerBuilder;
23use crate::capabilities::CapabilityExt;
24use crate::container::ContainerStatus;
25use crate::container::builder_impl::ContainerBuilderImpl;
26use crate::error::{ErrInvalidSpec, LibcontainerError, MissingSpecError};
27use crate::notify_socket::NotifySocket;
28use crate::process::args::ContainerType;
29use crate::syscall::syscall::create_syscall;
30use crate::user_ns::UserNamespaceConfig;
31use crate::{tty, utils};
32
33const NAMESPACE_TYPES: &[&str] = &["ipc", "uts", "net", "pid", "mnt", "cgroup"];
34const TENANT_NOTIFY: &str = "tenant-notify-";
35const TENANT_TTY: &str = "tenant-tty-";
36
37/// Builder that can be used to configure the properties of a process
38/// that will join an existing container sandbox
39pub struct TenantContainerBuilder {
40    base: ContainerBuilder,
41    env: HashMap<String, String>,
42    cwd: Option<PathBuf>,
43    args: Vec<String>,
44    no_new_privs: Option<bool>,
45    capabilities: Vec<String>,
46    process: Option<PathBuf>,
47    detached: bool,
48    as_sibling: bool,
49    additional_gids: Vec<u32>,
50    user: Option<u32>,
51    group: Option<u32>,
52    ignore_paused: bool,
53    sub_cgroup: Option<String>,
54    process_label: Option<String>,
55    apparmor: Option<String>,
56}
57
58/// This is a helper function to get capabilities for tenant container, based on
59/// additional capabilities provided by user and capabilities of existing container
60/// extracted into separate function for easier testing
61fn get_capabilities(
62    additional: &[String],
63    spec: &Spec,
64) -> Result<LinuxCapabilities, LibcontainerError> {
65    let mut caps: Vec<Capability> = Vec::with_capacity(additional.len());
66    for cap in additional {
67        caps.push(Capability::from_str(cap)?);
68    }
69    let caps: SpecCapabilities = caps.iter().map(|c| SpecCapability::from_cap(*c)).collect();
70
71    if let Some(spec_caps) = spec
72        .process()
73        .as_ref()
74        .ok_or(MissingSpecError::Process)?
75        .capabilities()
76    {
77        let mut capabilities_builder = LinuxCapabilitiesBuilder::default();
78
79        let bounding: SpecCapabilities = match spec_caps.bounding() {
80            Some(bounding) => bounding.union(&caps).copied().collect(),
81            None => SpecCapabilities::new().union(&caps).copied().collect(),
82        };
83        capabilities_builder = capabilities_builder.bounding(bounding);
84
85        let effective: SpecCapabilities = match spec_caps.effective() {
86            Some(effective) => effective.union(&caps).copied().collect(),
87            None => SpecCapabilities::new().union(&caps).copied().collect(),
88        };
89        capabilities_builder = capabilities_builder.effective(effective);
90
91        let permitted: SpecCapabilities = match spec_caps.permitted() {
92            Some(permitted) => permitted.union(&caps).copied().collect(),
93            None => SpecCapabilities::new().union(&caps).copied().collect(),
94        };
95        capabilities_builder = capabilities_builder.permitted(permitted);
96
97        // ambient capabilities are only useful when inherent capabilities
98        // are set. Hence we check and set accordingly. Inherent capabilities
99        // are never set from user as that can lead to vulnerability like
100        // https://github.com/advisories/GHSA-f3fp-gc8g-vw66
101        // Hence, we follow runc's code and set things similarly.
102        let caps = if let Some(inheritable) = spec_caps.inheritable() {
103            let ambient: SpecCapabilities = match spec_caps.ambient() {
104                Some(ambient) => ambient.union(&caps).copied().collect(),
105                None => SpecCapabilities::new().union(&caps).copied().collect(),
106            };
107            capabilities_builder = capabilities_builder.ambient(ambient);
108            capabilities_builder = capabilities_builder.inheritable(inheritable.clone());
109            capabilities_builder.build()?
110        } else {
111            let mut caps = capabilities_builder.build()?;
112            // oci-spec-rs sets these to some default caps, so we reset them here
113            caps.set_inheritable(None);
114            caps.set_ambient(None);
115            caps
116        };
117
118        return Ok(caps);
119    }
120
121    // If there are no caps in original container's spec,
122    // we simply set given caps , excluding the inherent and ambient
123    let mut caps = LinuxCapabilitiesBuilder::default()
124        .bounding(caps.clone())
125        .effective(caps.clone())
126        .permitted(caps.clone())
127        .build()?;
128    caps.set_inheritable(None);
129    caps.set_ambient(None);
130    Ok(caps)
131}
132
133impl TenantContainerBuilder {
134    /// Generates the base configuration for a process that will join
135    /// an existing container sandbox from which configuration methods
136    /// can be chained
137    pub(super) fn new(builder: ContainerBuilder) -> Self {
138        Self {
139            base: builder,
140            env: HashMap::new(),
141            cwd: None,
142            args: Vec::new(),
143            no_new_privs: None,
144            capabilities: Vec::new(),
145            process: None,
146            detached: false,
147            as_sibling: false,
148            additional_gids: vec![],
149            user: None,
150            group: None,
151            ignore_paused: false,
152            sub_cgroup: None,
153            process_label: None,
154            apparmor: None,
155        }
156    }
157
158    /// Sets environment variables for the container
159    pub fn with_env(mut self, env: HashMap<String, String>) -> Self {
160        self.env = env;
161        self
162    }
163
164    /// Sets the working directory of the container
165    pub fn with_cwd<P: Into<PathBuf>>(mut self, path: Option<P>) -> Self {
166        self.cwd = path.map(|p| p.into());
167        self
168    }
169
170    /// Sets the command the container will be started with
171    pub fn with_container_args(mut self, args: Vec<String>) -> Self {
172        self.args = args;
173        self
174    }
175
176    pub fn with_no_new_privs(mut self, no_new_privs: bool) -> Self {
177        self.no_new_privs = Some(no_new_privs);
178        self
179    }
180
181    pub fn with_capabilities(mut self, capabilities: Vec<String>) -> Self {
182        self.capabilities = capabilities;
183        self
184    }
185
186    pub fn with_process<P: Into<PathBuf>>(mut self, path: Option<P>) -> Self {
187        self.process = path.map(|p| p.into());
188        self
189    }
190
191    /// Sets if the init process should be run as a child or a sibling of
192    /// the calling process
193    pub fn as_sibling(mut self, as_sibling: bool) -> Self {
194        self.as_sibling = as_sibling;
195        self
196    }
197
198    pub fn with_detach(mut self, detached: bool) -> Self {
199        self.detached = detached;
200        self
201    }
202
203    pub fn with_additional_gids(mut self, gids: Vec<u32>) -> Self {
204        self.additional_gids = gids;
205        self
206    }
207
208    pub fn with_user(mut self, user: Option<u32>) -> Self {
209        self.user = user;
210        self
211    }
212
213    pub fn with_group(mut self, group: Option<u32>) -> Self {
214        self.group = group;
215        self
216    }
217
218    pub fn with_ignore_paused(mut self, ignore_paused: bool) -> Self {
219        self.ignore_paused = ignore_paused;
220        self
221    }
222
223    pub fn with_sub_cgroup(mut self, sub_cgroup: Option<String>) -> Self {
224        self.sub_cgroup = sub_cgroup;
225        self
226    }
227
228    pub fn with_process_label(mut self, process_label: Option<String>) -> Self {
229        self.process_label = process_label;
230        self
231    }
232
233    pub fn with_apparmor(mut self, apparmor: Option<String>) -> Self {
234        self.apparmor = apparmor;
235        self
236    }
237
238    /// Joins an existing container
239    pub fn build(self) -> Result<Pid, LibcontainerError> {
240        let container_dir = self.lookup_container_dir()?;
241        let container = self.load_container_state(container_dir.clone())?;
242        let mut spec = self.load_init_spec(&container)?;
243        self.adapt_spec_for_tenant(&mut spec, &container)?;
244
245        tracing::debug!("{:#?}", spec);
246
247        let notify_path = Self::setup_notify_listener(&container_dir)?;
248        // convert path of root file system of the container to absolute path
249        let rootfs = fs::canonicalize(spec.root().as_ref().ok_or(MissingSpecError::Root)?.path())
250            .map_err(LibcontainerError::OtherIO)?;
251
252        // if socket file path is given in commandline options,
253        // get file descriptors of console socket
254        let csocketfd = self.setup_tty_socket(&container_dir)?;
255
256        let use_systemd = self.should_use_systemd(&container);
257        let user_ns_config = UserNamespaceConfig::new(&spec)?;
258
259        let (read_end, write_end) =
260            pipe2(OFlag::O_CLOEXEC).map_err(LibcontainerError::OtherSyscall)?;
261
262        let mut builder_impl = ContainerBuilderImpl {
263            container_type: ContainerType::TenantContainer {
264                exec_notify_fd: write_end.as_raw_fd(),
265                landlord_init_pid: container.pid(),
266            },
267            syscall: self.base.syscall,
268            container_id: self.base.container_id,
269            pid_file: self.base.pid_file,
270            console_socket: csocketfd,
271            use_systemd,
272            spec: Rc::new(spec),
273            rootfs,
274            user_ns_config,
275            notify_path: notify_path.clone(),
276            container: None,
277            preserve_fds: self.base.preserve_fds,
278            detached: self.detached,
279            executor: self.base.executor,
280            no_pivot: false,
281            stdin: self.base.stdin,
282            stdout: self.base.stdout,
283            stderr: self.base.stderr,
284            as_sibling: self.as_sibling,
285            sub_cgroup_path: self.sub_cgroup,
286            process_label: self.process_label,
287        };
288
289        let pid = builder_impl.create()?;
290
291        let mut notify_socket = NotifySocket::new(notify_path);
292        notify_socket.notify_container_start()?;
293
294        // Explicitly close the write end of the pipe here to notify the
295        // `read_end` that the init process is able to move forward. Closing one
296        // end of the pipe will immediately signal the other end of the pipe,
297        // which we use in the init thread as a form of barrier.  `drop` is used
298        // here because `OwnedFd` supports it, so we don't have to use `close`
299        // here with `RawFd`.
300        drop(write_end);
301
302        let mut err_str_buf = Vec::new();
303
304        loop {
305            let mut buf = [0; 3];
306            match read(read_end.as_raw_fd(), &mut buf).map_err(LibcontainerError::OtherSyscall)? {
307                0 => {
308                    if err_str_buf.is_empty() {
309                        return Ok(pid);
310                    } else {
311                        return Err(LibcontainerError::Other(
312                            String::from_utf8_lossy(&err_str_buf).to_string(),
313                        ));
314                    }
315                }
316                _ => {
317                    err_str_buf.extend(buf);
318                }
319            }
320        }
321    }
322
323    fn lookup_container_dir(&self) -> Result<PathBuf, LibcontainerError> {
324        let container_dir = self.base.root_path.join(&self.base.container_id);
325        if !container_dir.exists() {
326            tracing::error!(?container_dir, ?self.base.container_id, "container dir does not exist");
327            return Err(LibcontainerError::NoDirectory);
328        }
329
330        Ok(container_dir)
331    }
332
333    fn load_init_spec(&self, container: &Container) -> Result<Spec, LibcontainerError> {
334        let spec_path = container.bundle().join("config.json");
335
336        let mut spec = Spec::load(&spec_path).map_err(|err| {
337            tracing::error!(path = ?spec_path, ?err, "failed to load spec");
338            err
339        })?;
340
341        Self::validate_spec(&spec)?;
342
343        spec.canonicalize_rootfs(container.bundle())?;
344        Ok(spec)
345    }
346
347    fn validate_spec(spec: &Spec) -> Result<(), LibcontainerError> {
348        let version = spec.version();
349        if !version.starts_with("1.") {
350            tracing::error!(
351                "runtime spec has incompatible version '{}'. Only 1.X.Y is supported",
352                spec.version()
353            );
354            Err(ErrInvalidSpec::UnsupportedVersion)?;
355        }
356
357        if let Some(process) = spec.process() {
358            if let Some(io_priority) = process.io_priority() {
359                let priority = io_priority.priority();
360                let iop_class_res = serde_json::to_string(&io_priority.class());
361                match iop_class_res {
362                    Ok(iop_class) => {
363                        if !(0..=7).contains(&priority) {
364                            tracing::error!(
365                                ?priority,
366                                "io priority '{}' not between 0 and 7 (inclusive), class '{}' not in (IO_PRIO_CLASS_RT,IO_PRIO_CLASS_BE,IO_PRIO_CLASS_IDLE)",
367                                priority,
368                                iop_class
369                            );
370                            Err(ErrInvalidSpec::IoPriority)?;
371                        }
372                    }
373                    Err(e) => {
374                        tracing::error!(?priority, ?e, "failed to parse io priority class");
375                        Err(ErrInvalidSpec::IoPriority)?;
376                    }
377                }
378            }
379
380            if let Some(sc) = process.scheduler() {
381                let policy = sc.policy();
382                if let Some(nice) = sc.nice() {
383                    // https://man7.org/linux/man-pages/man2/sched_setattr.2.html#top_of_page
384                    if (*policy == LinuxSchedulerPolicy::SchedBatch
385                        || *policy == LinuxSchedulerPolicy::SchedOther)
386                        && (*nice < -20 || *nice > 19)
387                    {
388                        tracing::error!(
389                            ?nice,
390                            "invalid scheduler.nice: '{}', must be within -20 to 19",
391                            nice
392                        );
393                        Err(ErrInvalidSpec::Scheduler)?;
394                    }
395                }
396                if let Some(priority) = sc.priority() {
397                    if *priority != 0
398                        && (*policy != LinuxSchedulerPolicy::SchedFifo
399                            && *policy != LinuxSchedulerPolicy::SchedRr)
400                    {
401                        tracing::error!(
402                            ?policy,
403                            "scheduler.priority can only be specified for SchedFIFO or SchedRR policy"
404                        );
405                        Err(ErrInvalidSpec::Scheduler)?;
406                    }
407                }
408                if *policy != LinuxSchedulerPolicy::SchedDeadline {
409                    if let Some(runtime) = sc.runtime() {
410                        if *runtime != 0 {
411                            tracing::error!(
412                                ?runtime,
413                                "scheduler runtime can only be specified for SchedDeadline policy"
414                            );
415                            Err(ErrInvalidSpec::Scheduler)?;
416                        }
417                    }
418                    if let Some(deadline) = sc.deadline() {
419                        if *deadline != 0 {
420                            tracing::error!(
421                                ?deadline,
422                                "scheduler deadline can only be specified for SchedDeadline policy"
423                            );
424                            Err(ErrInvalidSpec::Scheduler)?;
425                        }
426                    }
427                    if let Some(period) = sc.period() {
428                        if *period != 0 {
429                            tracing::error!(
430                                ?period,
431                                "scheduler period can only be specified for SchedDeadline policy"
432                            );
433                            Err(ErrInvalidSpec::Scheduler)?;
434                        }
435                    }
436                }
437            }
438        }
439
440        if let Some(mounts) = spec.mounts() {
441            utils::validate_mount_options(mounts)?;
442        }
443
444        let syscall = create_syscall();
445        utils::validate_spec_for_new_user_ns(spec, &*syscall)?;
446        utils::validate_spec_for_net_devices(spec, &*syscall)
447            .map_err(LibcontainerError::NetDevicesError)?;
448
449        Ok(())
450    }
451
452    fn load_container_state(&self, container_dir: PathBuf) -> Result<Container, LibcontainerError> {
453        let container = Container::load(container_dir)?;
454
455        match container.status() {
456            ContainerStatus::Running => Ok(container),
457            ContainerStatus::Paused if self.ignore_paused => Ok(container),
458            _ => {
459                tracing::error!(status = ?container.status(), "cannot exec: invalid container state");
460                Err(LibcontainerError::IncorrectStatus(container.status()))
461            }
462        }
463    }
464
465    fn adapt_spec_for_tenant(
466        &self,
467        spec: &mut Spec,
468        container: &Container,
469    ) -> Result<(), LibcontainerError> {
470        let process = if let Some(process) = &self.process {
471            self.get_process(process)?
472        } else {
473            // Use the spec's process env as the baseline for exec.
474            let spec_env = spec
475                .process()
476                .as_ref()
477                .and_then(|p| p.env().as_ref().cloned())
478                .unwrap_or_default();
479            let mut process_builder = ProcessBuilder::default()
480                .args(self.get_args()?)
481                .env(self.get_environment(spec_env));
482            if let Some(cwd) = self.get_working_dir()? {
483                process_builder = process_builder.cwd(cwd);
484            }
485
486            if let Some(process) = spec.process() {
487                if let Some(cpu_affinity) = process.exec_cpu_affinity() {
488                    process_builder = process_builder.exec_cpu_affinity(cpu_affinity.clone());
489                }
490            }
491
492            if let Some(no_new_priv) = self.get_no_new_privileges(spec) {
493                process_builder = process_builder.no_new_privileges(no_new_priv);
494            }
495
496            if let Some(ref apparmor) = self.apparmor {
497                process_builder = process_builder.apparmor_profile(apparmor)
498            }
499
500            let capabilities = get_capabilities(&self.capabilities, spec)?;
501            process_builder = process_builder.capabilities(capabilities);
502
503            let mut user_builder = UserBuilder::default();
504
505            if !self.additional_gids.is_empty() {
506                user_builder = user_builder.additional_gids(self.additional_gids.clone());
507            }
508
509            if let Some(uid) = self.user {
510                user_builder = user_builder.uid(uid);
511            }
512
513            if let Some(gid) = self.group {
514                user_builder = user_builder.gid(gid);
515            }
516
517            process_builder = process_builder.user(user_builder.build()?);
518
519            process_builder.build()?
520        };
521
522        let container_pid = container.pid().ok_or(LibcontainerError::Other(
523            "could not retrieve container init pid".into(),
524        ))?;
525
526        let init_process = procfs::process::Process::new(container_pid.as_raw())?;
527        let ns = self.get_namespaces(init_process.namespaces()?.0)?;
528
529        // it should never be the case that linux is not present in spec
530        let spec_linux = spec.linux().as_ref().unwrap();
531        let mut linux_builder = LinuxBuilder::default().namespaces(ns);
532
533        if let Some(cgroup_path) = spec_linux.cgroups_path() {
534            linux_builder = linux_builder.cgroups_path(cgroup_path.clone());
535        }
536
537        if let Some(personality) = spec_linux.personality() {
538            linux_builder = linux_builder.personality(personality.clone());
539        }
540
541        let linux = linux_builder.build()?;
542        spec.set_process(Some(process)).set_linux(Some(linux));
543
544        Ok(())
545    }
546
547    fn get_process(&self, process: &Path) -> Result<Process, LibcontainerError> {
548        if !process.exists() {
549            tracing::error!(?process, "process.json file does not exist");
550            return Err(LibcontainerError::Other(
551                "process.json file does not exist".into(),
552            ));
553        }
554
555        let process = utils::open(process).map_err(LibcontainerError::OtherIO)?;
556        let reader = BufReader::new(process);
557        let process_spec =
558            serde_json::from_reader(reader).map_err(LibcontainerError::OtherSerialization)?;
559        Ok(process_spec)
560    }
561
562    fn get_working_dir(&self) -> Result<Option<PathBuf>, LibcontainerError> {
563        if let Some(cwd) = &self.cwd {
564            if cwd.is_relative() {
565                tracing::error!(?cwd, "current working directory must be an absolute path");
566                return Err(LibcontainerError::Other(
567                    "current working directory must be an absolute path".into(),
568                ));
569            }
570            return Ok(Some(cwd.into()));
571        }
572        Ok(None)
573    }
574
575    fn get_args(&self) -> Result<Vec<String>, LibcontainerError> {
576        if self.args.is_empty() {
577            Err(MissingSpecError::Args)?;
578        }
579
580        Ok(self.args.clone())
581    }
582
583    /// Builds the environment for an exec process.
584    /// The spec's env vars are used as the baseline, and env vars provided to the
585    /// builder, such as those from the CLI, override entries with the same key.
586    /// This follows runc's behavior.
587    /// See <https://github.com/youki-dev/youki/issues/3428>.
588    fn get_environment(&self, spec_env: Vec<String>) -> Vec<String> {
589        // Start with spec env, skipping any vars that the CLI overrides.
590        let mut env: Vec<String> = spec_env
591            .into_iter()
592            .filter(|entry| {
593                let key = entry.split('=').next().unwrap_or("");
594                !self.env.contains_key(key)
595            })
596            .collect();
597
598        // Append CLI overrides.
599        for (k, v) in &self.env {
600            env.push(format!("{k}={v}"));
601        }
602
603        env
604    }
605
606    fn get_no_new_privileges(&self, spec: &Spec) -> Option<bool> {
607        self.no_new_privs
608            .filter(|&is_set| is_set)
609            .or_else(|| spec.process().as_ref().and_then(|p| p.no_new_privileges()))
610    }
611
612    fn get_namespaces(
613        &self,
614        init_namespaces: HashMap<OsString, Namespace>,
615    ) -> Result<Vec<LinuxNamespace>, LibcontainerError> {
616        let mut tenant_namespaces = Vec::with_capacity(init_namespaces.len());
617
618        for &ns_type in NAMESPACE_TYPES {
619            if let Some(init_ns) = init_namespaces.get(OsStr::new(ns_type)) {
620                let tenant_ns = LinuxNamespaceType::try_from(ns_type)?;
621                tenant_namespaces.push(
622                    LinuxNamespaceBuilder::default()
623                        .typ(tenant_ns)
624                        .path(init_ns.path.clone())
625                        .build()?,
626                )
627            }
628        }
629
630        Ok(tenant_namespaces)
631    }
632
633    fn should_use_systemd(&self, container: &Container) -> bool {
634        container.systemd()
635    }
636
637    fn setup_notify_listener(container_dir: &Path) -> Result<PathBuf, LibcontainerError> {
638        let notify_name = Self::generate_name(container_dir, TENANT_NOTIFY);
639        let socket_path = container_dir.join(notify_name);
640
641        Ok(socket_path)
642    }
643
644    fn setup_tty_socket(&self, container_dir: &Path) -> Result<Option<OwnedFd>, LibcontainerError> {
645        let tty_name = Self::generate_name(container_dir, TENANT_TTY);
646        let csocketfd = if let Some(console_socket) = &self.base.console_socket {
647            Some(tty::setup_console_socket(
648                container_dir,
649                console_socket,
650                &tty_name,
651            )?)
652        } else {
653            None
654        };
655
656        Ok(csocketfd)
657    }
658
659    fn generate_name(dir: &Path, prefix: &str) -> String {
660        loop {
661            let rand = fastrand::i32(..);
662            let name = format!("{prefix}{rand:x}.sock");
663            if !dir.join(&name).exists() {
664                return name;
665            }
666        }
667    }
668}
669
670#[cfg(test)]
671mod tests {
672    use caps::Capability as Cap;
673    use oci_spec::runtime::{Capabilities, Capability as SpecCap, SpecBuilder};
674
675    use super::*;
676    use crate::capabilities::CapabilityExt;
677    use crate::syscall::syscall::SyscallType;
678
679    fn get_spec(caps: LinuxCapabilities) -> Spec {
680        SpecBuilder::default()
681            .process(
682                ProcessBuilder::default()
683                    .capabilities(caps)
684                    .build()
685                    .unwrap(),
686            )
687            .build()
688            .unwrap()
689    }
690
691    fn cap_to_string(caps: &[Cap]) -> Vec<String> {
692        caps.iter().map(|c| c.to_string()).collect()
693    }
694
695    fn caps_to_spec_set(caps: &[Cap]) -> Capabilities {
696        caps.iter().map(|c| SpecCap::from_cap(*c)).collect()
697    }
698
699    fn empty_caps() -> LinuxCapabilities {
700        let mut t = LinuxCapabilities::default();
701        t.set_effective(None)
702            .set_bounding(None)
703            .set_permitted(None)
704            .set_inheritable(None)
705            .set_ambient(None);
706        t
707    }
708
709    /// Helper to build a minimal TenantContainerBuilder with the given CLI env.
710    fn builder_with_env(env: &[(&str, &str)]) -> TenantContainerBuilder {
711        let base = ContainerBuilder::new("test".to_string(), SyscallType::default());
712        let env_map: HashMap<String, String> = env
713            .iter()
714            .map(|(k, v)| (k.to_string(), v.to_string()))
715            .collect();
716
717        TenantContainerBuilder::new(base).with_env(env_map)
718    }
719
720    // --- capabilities tests ---
721
722    // if there are no existing capabilities, then tenant can only
723    // set effective, bounding and permitted caps ; not inheritable or ambient
724    #[test]
725    fn test_capabilities_no_existing() -> Result<(), LibcontainerError> {
726        let spec = get_spec(empty_caps());
727
728        let extra_caps = &[Cap::CAP_SYS_ADMIN, Cap::CAP_NET_ADMIN, Cap::CAP_AUDIT_READ];
729
730        let additional = cap_to_string(extra_caps);
731        let caps = get_capabilities(&additional, &spec)?;
732
733        let expected_caps = empty_caps()
734            .set_effective(Some(caps_to_spec_set(extra_caps)))
735            .set_bounding(Some(caps_to_spec_set(extra_caps)))
736            .set_permitted(Some(caps_to_spec_set(extra_caps)))
737            .clone();
738
739        assert_eq!(caps, expected_caps);
740        Ok(())
741    }
742
743    // If there are existing capabilities, but not inherent, then tenant should union
744    // existing and provided caps only for effective, bounding and permitted,
745    // inherent and ambient should be explicitly None
746    #[test]
747    fn test_capabilities_with_existing() -> Result<(), LibcontainerError> {
748        let existing_caps = &[Cap::CAP_SYS_ADMIN, Cap::CAP_BPF, Cap::CAP_MKNOD];
749
750        let existing = LinuxCapabilities::default()
751            .set_effective(Some(caps_to_spec_set(existing_caps)))
752            .set_bounding(Some(caps_to_spec_set(existing_caps)))
753            .set_permitted(Some(caps_to_spec_set(existing_caps)))
754            .set_inheritable(None)
755            .set_ambient(None)
756            .clone();
757
758        let spec = get_spec(existing);
759
760        let extra_caps = &[Cap::CAP_SYS_ADMIN, Cap::CAP_NET_ADMIN, Cap::CAP_AUDIT_READ];
761
762        let additional = cap_to_string(extra_caps);
763        let caps = get_capabilities(&additional, &spec)?;
764
765        let mut combined_caps = existing_caps.to_vec();
766        combined_caps.extend(extra_caps);
767        let expected_caps = empty_caps()
768            .set_effective(Some(caps_to_spec_set(&combined_caps)))
769            .set_bounding(Some(caps_to_spec_set(&combined_caps)))
770            .set_permitted(Some(caps_to_spec_set(&combined_caps)))
771            .clone();
772
773        assert_eq!(caps, expected_caps);
774        Ok(())
775    }
776
777    // we check that if inherent capabilities are present, ambient are set correctly
778    #[test]
779    fn test_capabilities_with_existing_inherent() -> Result<(), LibcontainerError> {
780        let existing_caps = &[Cap::CAP_SYS_ADMIN, Cap::CAP_BPF, Cap::CAP_MKNOD];
781        let extra_caps = &[Cap::CAP_SYS_ADMIN, Cap::CAP_NET_ADMIN, Cap::CAP_AUDIT_READ];
782
783        let mut combined_caps = existing_caps.to_vec();
784        combined_caps.extend(extra_caps);
785
786        // case 1 :  when inheritable are there, but no ambient
787
788        let existing = LinuxCapabilities::default()
789            .set_effective(Some(caps_to_spec_set(existing_caps)))
790            .set_bounding(Some(caps_to_spec_set(existing_caps)))
791            .set_permitted(Some(caps_to_spec_set(existing_caps)))
792            .set_inheritable(Some(caps_to_spec_set(existing_caps)))
793            .set_ambient(None)
794            .clone();
795        let spec = get_spec(existing);
796        let additional = cap_to_string(extra_caps);
797        let caps = get_capabilities(&additional, &spec)?;
798        let expected_caps = empty_caps()
799            .set_effective(Some(caps_to_spec_set(&combined_caps)))
800            .set_bounding(Some(caps_to_spec_set(&combined_caps)))
801            .set_permitted(Some(caps_to_spec_set(&combined_caps)))
802            // inheritable must not change
803            .set_inheritable(Some(caps_to_spec_set(existing_caps)))
804            // as there were no existing ambient, only extra will be set
805            .set_ambient(Some(caps_to_spec_set(extra_caps)))
806            .clone();
807        assert_eq!(caps, expected_caps);
808
809        // case 2 :  when inheritable and ambient both are present
810
811        let existing = LinuxCapabilities::default()
812            .set_effective(Some(caps_to_spec_set(existing_caps)))
813            .set_bounding(Some(caps_to_spec_set(existing_caps)))
814            .set_permitted(Some(caps_to_spec_set(existing_caps)))
815            .set_inheritable(Some(caps_to_spec_set(existing_caps)))
816            .set_ambient(Some(caps_to_spec_set(existing_caps)))
817            .clone();
818        let spec = get_spec(existing);
819        let additional = cap_to_string(extra_caps);
820        let caps = get_capabilities(&additional, &spec)?;
821        let expected_caps = empty_caps()
822            .set_effective(Some(caps_to_spec_set(&combined_caps)))
823            .set_bounding(Some(caps_to_spec_set(&combined_caps)))
824            .set_permitted(Some(caps_to_spec_set(&combined_caps)))
825            // inheritable must not change
826            .set_inheritable(Some(caps_to_spec_set(existing_caps)))
827            .set_ambient(Some(caps_to_spec_set(&combined_caps)))
828            .clone();
829        assert_eq!(caps, expected_caps);
830
831        Ok(())
832    }
833
834    // --- environment tests ---
835
836    #[test]
837    fn env_inherits_spec_vars() {
838        let b = builder_with_env(&[]);
839        let spec_env = vec!["PATH=/usr/bin".to_string(), "AAA=bbb".to_string()];
840        let result = b.get_environment(spec_env);
841        assert!(result.contains(&"PATH=/usr/bin".to_string()));
842        assert!(result.contains(&"AAA=bbb".to_string()));
843    }
844
845    #[test]
846    fn builder_env_overrides_spec() {
847        let b = builder_with_env(&[("AAA", "override")]);
848        let spec_env = vec!["PATH=/usr/bin".to_string(), "AAA=bbb".to_string()];
849        let result = b.get_environment(spec_env);
850        assert!(result.contains(&"PATH=/usr/bin".to_string()));
851        assert!(result.contains(&"AAA=override".to_string()));
852        assert!(!result.contains(&"AAA=bbb".to_string()));
853    }
854
855    #[test]
856    fn builder_env_adds_new_vars() {
857        let b = builder_with_env(&[("NEW_VAR", "hello")]);
858        let spec_env = vec!["PATH=/usr/bin".to_string()];
859        let result = b.get_environment(spec_env);
860        assert!(result.contains(&"PATH=/usr/bin".to_string()));
861        assert!(result.contains(&"NEW_VAR=hello".to_string()));
862    }
863
864    #[test]
865    fn empty_spec_env_uses_builder_env_only() {
866        let b = builder_with_env(&[("FOO", "bar")]);
867        let result = b.get_environment(Vec::new());
868        assert_eq!(result, vec!["FOO=bar".to_string()]);
869    }
870
871    #[test]
872    fn no_env_at_all() {
873        let b = builder_with_env(&[]);
874        let result = b.get_environment(Vec::new());
875        assert!(result.is_empty());
876    }
877}