Skip to main content

libcontainer/
utils.rs

1//! Utility functionality
2
3use std::collections::HashMap;
4use std::fs::{self, DirBuilder, File};
5use std::os::fd::{AsRawFd, OwnedFd};
6use std::os::linux::fs::MetadataExt;
7use std::os::unix::fs::DirBuilderExt;
8use std::path::{Component, Path, PathBuf};
9use std::time::Duration;
10
11use libc::IFNAMSIZ;
12use nix::sys::stat::{Mode, fstat};
13use nix::sys::statfs::{Statfs, fstatfs};
14use nix::unistd::{Uid, User};
15use oci_spec::runtime::{LinuxNamespaceType, Spec};
16
17use crate::error::{LibcontainerError, MissingSpecError};
18use crate::syscall::syscall::Syscall;
19use crate::user_ns::UserNamespaceConfig;
20
21#[derive(Debug, thiserror::Error)]
22pub enum PathBufExtError {
23    #[error("relative path cannot be converted to the path in the container")]
24    RelativePath,
25    #[error("failed to strip prefix from {path:?}")]
26    StripPrefix {
27        path: PathBuf,
28        source: std::path::StripPrefixError,
29    },
30    #[error("failed to canonicalize path {path:?}")]
31    Canonicalize {
32        path: PathBuf,
33        source: std::io::Error,
34    },
35    #[error("failed to get current directory")]
36    CurrentDir { source: std::io::Error },
37}
38
39pub trait PathBufExt {
40    fn as_relative(&self) -> Result<&Path, PathBufExtError>;
41    fn join_safely<P: AsRef<Path>>(&self, p: P) -> Result<PathBuf, PathBufExtError>;
42    fn canonicalize_safely(&self) -> Result<PathBuf, PathBufExtError>;
43    fn normalize(&self) -> PathBuf;
44}
45
46impl PathBufExt for Path {
47    fn as_relative(&self) -> Result<&Path, PathBufExtError> {
48        match self.is_relative() {
49            true => Err(PathBufExtError::RelativePath),
50            false => Ok(self
51                .strip_prefix("/")
52                .map_err(|e| PathBufExtError::StripPrefix {
53                    path: self.to_path_buf(),
54                    source: e,
55                })?),
56        }
57    }
58
59    fn join_safely<P: AsRef<Path>>(&self, path: P) -> Result<PathBuf, PathBufExtError> {
60        let path = path.as_ref();
61        if path.is_relative() {
62            return Ok(self.join(path));
63        }
64
65        let stripped = path
66            .strip_prefix("/")
67            .map_err(|e| PathBufExtError::StripPrefix {
68                path: self.to_path_buf(),
69                source: e,
70            })?;
71        Ok(self.join(stripped))
72    }
73
74    /// Canonicalizes existing and not existing paths
75    fn canonicalize_safely(&self) -> Result<PathBuf, PathBufExtError> {
76        if self.exists() {
77            self.canonicalize()
78                .map_err(|e| PathBufExtError::Canonicalize {
79                    path: self.to_path_buf(),
80                    source: e,
81                })
82        } else {
83            if self.is_relative() {
84                let p = std::env::current_dir()
85                    .map_err(|e| PathBufExtError::CurrentDir { source: e })?
86                    .join(self);
87                return Ok(p.normalize());
88            }
89
90            Ok(self.normalize())
91        }
92    }
93
94    /// Normalizes a path. In contrast to canonicalize the path does not need to exist.
95    // adapted from https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61
96    fn normalize(&self) -> PathBuf {
97        let mut components = self.components().peekable();
98        let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
99            components.next();
100            PathBuf::from(c.as_os_str())
101        } else {
102            PathBuf::new()
103        };
104
105        for component in components {
106            match component {
107                Component::Prefix(..) => unreachable!(),
108                Component::RootDir => {
109                    ret.push(component.as_os_str());
110                }
111                Component::CurDir => {}
112                Component::ParentDir => {
113                    ret.pop();
114                }
115                Component::Normal(c) => {
116                    ret.push(c);
117                }
118            }
119        }
120        ret
121    }
122}
123
124pub fn parse_env(envs: &[String]) -> HashMap<String, String> {
125    envs.iter()
126        .filter_map(|e| {
127            let mut split = e.split('=');
128
129            split.next().map(|key| {
130                let value = split.collect::<Vec<&str>>().join("=");
131                (key.into(), value)
132            })
133        })
134        .collect()
135}
136
137/// Get a nix::unistd::User via UID. Potential errors will be ignored.
138pub fn get_unix_user(uid: Uid) -> Option<User> {
139    User::from_uid(uid).unwrap_or_default()
140}
141
142/// Get home path of a User via UID.
143pub fn get_user_home(uid: u32) -> Option<PathBuf> {
144    match get_unix_user(Uid::from_raw(uid)) {
145        Some(user) => Some(user.dir),
146        None => None,
147    }
148}
149
150/// If None, it will generate a default path for cgroups.
151pub fn get_cgroup_path(cgroups_path: &Option<PathBuf>, container_id: &str) -> PathBuf {
152    match cgroups_path {
153        Some(cpath) => cpath.clone(),
154        None => PathBuf::from(format!(":youki:{container_id}")),
155    }
156}
157
158pub fn write_file<P: AsRef<Path>, C: AsRef<[u8]>>(
159    path: P,
160    contents: C,
161) -> Result<(), std::io::Error> {
162    fs::write(path.as_ref(), contents).map_err(|err| {
163        tracing::error!(path = ?path.as_ref(), ?err, "failed to write file");
164        err
165    })?;
166
167    Ok(())
168}
169
170pub fn create_dir_all<P: AsRef<Path>>(path: P) -> Result<(), std::io::Error> {
171    fs::create_dir_all(path.as_ref()).map_err(|err| {
172        tracing::error!(path = ?path.as_ref(), ?err, "failed to create directory");
173        err
174    })?;
175    Ok(())
176}
177
178pub fn open<P: AsRef<Path>>(path: P) -> Result<File, std::io::Error> {
179    File::open(path.as_ref()).map_err(|err| {
180        tracing::error!(path = ?path.as_ref(), ?err, "failed to open file");
181        err
182    })
183}
184
185#[derive(Debug, thiserror::Error)]
186pub enum MkdirWithModeError {
187    #[error("IO error")]
188    Io(#[from] std::io::Error),
189    #[error("metadata doesn't match the expected attributes")]
190    MetadataMismatch,
191}
192
193#[derive(Debug, thiserror::Error)]
194pub enum VerifyInodeError {
195    #[error("stat operation failed")]
196    Stat(#[from] nix::Error),
197    #[error("{0}")]
198    Verification(String),
199}
200
201/// Verify file descriptor using stat and statfs, similar to runc's VerifyInode.
202///
203/// This is a helper function that gets stat/statfs for a file descriptor and
204/// calls the provided verification function with the results.
205///
206/// # Arguments
207/// * `fd` - The file descriptor to verify
208/// * `verify` - A closure that receives stat and statfs results and performs verification
209///
210/// # Returns
211/// Returns `Ok(())` if verification succeeds, or an error if stat/statfs fails
212/// or the verification function returns an error.
213///
214/// Ref: <https://github.com/opencontainers/runc/blob/v1.4.0/libcontainer/system/linux.go>
215pub fn verify_inode<F>(fd: &OwnedFd, verify: F) -> Result<(), VerifyInodeError>
216where
217    F: FnOnce(&libc::stat, &Statfs) -> Result<(), VerifyInodeError>,
218{
219    let stat = fstat(fd.as_raw_fd())?;
220    let fs_stat = fstatfs(fd)?;
221    verify(&stat, &fs_stat)
222}
223
224/// Creates the specified directory and all parent directories with the specified mode. Ensures
225/// that the directory has been created with the correct mode and that the owner of the directory
226/// is the owner that has been specified
227/// # Example
228/// ``` no_run
229/// use libcontainer::utils::create_dir_all_with_mode;
230/// use nix::sys::stat::Mode;
231/// use std::path::Path;
232///
233/// let path = Path::new("/tmp/youki");
234/// create_dir_all_with_mode(&path, 1000, Mode::S_IRWXU).unwrap();
235/// assert!(path.exists())
236/// ```
237pub fn create_dir_all_with_mode<P: AsRef<Path>>(
238    path: P,
239    owner: u32,
240    mode: Mode,
241) -> Result<(), MkdirWithModeError> {
242    let path = path.as_ref();
243    if !path.exists() {
244        DirBuilder::new()
245            .recursive(true)
246            .mode(mode.bits())
247            .create(path)?;
248    }
249
250    let metadata = path.metadata()?;
251    if metadata.is_dir()
252        && metadata.st_uid() == owner
253        && metadata.st_mode() & mode.bits() == mode.bits()
254    {
255        Ok(())
256    } else {
257        Err(MkdirWithModeError::MetadataMismatch)
258    }
259}
260
261pub fn is_in_new_userns() -> Result<bool, std::io::Error> {
262    let uid_map_path = "/proc/self/uid_map";
263    let content = std::fs::read_to_string(uid_map_path)?;
264    Ok(!content.contains("4294967295"))
265}
266
267/// Checks if rootless mode needs to be used
268pub fn rootless_required(syscall: &dyn Syscall) -> Result<bool, std::io::Error> {
269    if !syscall.get_euid().is_root() {
270        return Ok(true);
271    }
272    is_in_new_userns()
273}
274
275/// checks if given spec is valid for current user namespace setup
276pub fn validate_spec_for_new_user_ns(
277    spec: &Spec,
278    syscall: &dyn Syscall,
279) -> Result<(), LibcontainerError> {
280    let config = UserNamespaceConfig::new(spec)?;
281    let in_user_ns = is_in_new_userns().map_err(LibcontainerError::OtherIO)?;
282    let is_rootless_required = rootless_required(syscall).map_err(LibcontainerError::OtherIO)?;
283    // In case of rootless, there are 2 possible cases :
284    // we have a new user ns specified in the spec
285    // or the youki is launched in a new user ns (this is how podman does it)
286    // So here, we check if rootless is required,
287    // but we are neither in a new user ns nor a new user ns is specified in spec
288    // then it is an error
289    if is_rootless_required && !in_user_ns && config.is_none() {
290        return Err(LibcontainerError::NoUserNamespace);
291    }
292    Ok(())
293}
294
295// Generic retry function with delay and policy.
296// Retries the operation `op` up to `attempts` times if it fails.
297// Waits for `delay` duration between retries.
298// Only retries if the error satisfies the `policy` function.
299pub fn retry<F, T, E, P>(mut op: F, attempts: u32, delay: Duration, policy: P) -> Result<T, E>
300where
301    F: FnMut() -> Result<T, E>,
302    P: Fn(&E) -> bool,
303{
304    if attempts == 0 {
305        panic!("retry called with 0 attempts. Minimum attempts is 1.");
306    }
307    for attempt in 0..attempts {
308        match op() {
309            Ok(res) => return Ok(res),
310            Err(err) => {
311                if attempt + 1 < attempts && policy(&err) {
312                    std::thread::sleep(delay);
313                } else {
314                    return Err(err);
315                }
316            }
317        }
318    }
319    unreachable!("retry loop completed without returning a result.");
320}
321
322#[derive(Debug, thiserror::Error)]
323pub enum NetDevicesError {
324    #[error("unable to move network devices without a NET namespace")]
325    NoNetNamespace,
326    #[error("network devices are not supported in rootless containers")]
327    RootlessNotSupported,
328    #[error("invalid network device name: {0}")]
329    InvalidDeviceName(String),
330    #[error(transparent)]
331    IO(#[from] std::io::Error),
332    #[error(transparent)]
333    Spec(#[from] MissingSpecError),
334}
335
336// check if given spec is valid for netDevices
337pub fn validate_spec_for_net_devices(
338    spec: &Spec,
339    syscall: &dyn Syscall,
340) -> Result<(), NetDevicesError> {
341    let linux = spec
342        .linux()
343        .as_ref()
344        .ok_or(NetDevicesError::Spec(MissingSpecError::Linux))?;
345
346    if linux.net_devices().is_none() {
347        return Ok(());
348    }
349
350    let has_net_namespace = match linux.namespaces() {
351        Some(namespaces) => namespaces
352            .iter()
353            .any(|ns| ns.typ() == LinuxNamespaceType::Network),
354        None => false,
355    };
356
357    if !has_net_namespace {
358        return Err(NetDevicesError::NoNetNamespace);
359    }
360
361    let is_rootless = rootless_required(syscall).map_err(NetDevicesError::IO)?;
362    if is_rootless {
363        return Err(NetDevicesError::RootlessNotSupported);
364    }
365
366    if let Some(devices) = linux.net_devices() {
367        devices.iter().try_for_each(|(name, net_dev)| {
368            if !dev_valid_name(name) {
369                return Err(NetDevicesError::InvalidDeviceName(name.into()));
370            }
371            if let Some(dev_name) = net_dev.name() {
372                if !dev_valid_name(dev_name) {
373                    return Err(NetDevicesError::InvalidDeviceName(dev_name.into()));
374                }
375            }
376            Ok(())
377        })?;
378    }
379
380    Ok(())
381}
382
383/// Validates mount destinations and warns about deprecated relative paths.
384/// Follows the OCI Runtime Spec requirement that mount destinations SHOULD be absolute.
385/// Relative paths are deprecated but still accepted for backward compatibility.
386pub fn validate_mount_options(
387    mounts: &[oci_spec::runtime::Mount],
388) -> Result<(), LibcontainerError> {
389    mounts
390        .iter()
391        .filter(|mount| !mount.destination().is_absolute())
392        .for_each(|mount| {
393            tracing::warn!(
394                "mount destination {:?} is not absolute. \
395                Relative paths are deprecated in OCI Runtime Spec and may not be supported in future versions. \
396                The path will be interpreted as relative to '/'.",
397                mount.destination()
398            );
399        });
400
401    Ok(())
402}
403
404// https://elixir.bootlin.com/linux/v6.12/source/net/core/dev.c#L1066
405fn dev_valid_name(name: &str) -> bool {
406    if name.is_empty() || name.len() > IFNAMSIZ {
407        return false;
408    }
409    if name.eq(".") || name.eq("..") {
410        return false;
411    }
412
413    for c in name.chars() {
414        if c == '/' || c == ':' || c.is_whitespace() {
415            return false;
416        }
417    }
418
419    true
420}
421
422#[cfg(test)]
423mod tests {
424    use core::panic;
425
426    use anyhow::{Result, bail};
427    use nix::unistd::Gid;
428    use oci_spec::runtime::{LinuxBuilder, LinuxNamespaceBuilder, LinuxNetDevice, SpecBuilder};
429    use serial_test::serial;
430
431    use super::*;
432    use crate::syscall::syscall::create_syscall;
433    use crate::test_utils;
434
435    #[test]
436    pub fn test_get_unix_user() {
437        let user = get_unix_user(Uid::from_raw(0));
438        assert_eq!(user.unwrap().name, "root");
439
440        // for a non-exist UID
441        let user = get_unix_user(Uid::from_raw(1000000000));
442        assert!(user.is_none());
443    }
444
445    #[test]
446    pub fn test_get_user_home() {
447        let dir = get_user_home(0);
448        assert_eq!(dir.unwrap().to_str().unwrap(), "/root");
449
450        // for a non-exist UID
451        let dir = get_user_home(1000000000);
452        assert!(dir.is_none());
453    }
454
455    #[test]
456    fn test_get_cgroup_path() {
457        let cid = "sample_container_id";
458        assert_eq!(
459            get_cgroup_path(&None, cid),
460            PathBuf::from(":youki:sample_container_id")
461        );
462        assert_eq!(
463            get_cgroup_path(&Some(PathBuf::from("/youki")), cid),
464            PathBuf::from("/youki")
465        );
466    }
467
468    #[test]
469    fn test_parse_env() -> Result<()> {
470        let key = "key".into();
471        let value = "value".into();
472        let env_input = vec![format!("{key}={value}")];
473        let env_output = parse_env(&env_input);
474        assert_eq!(
475            env_output.len(),
476            1,
477            "There should be exactly one entry inside"
478        );
479        assert_eq!(env_output.get_key_value(&key), Some((&key, &value)));
480
481        Ok(())
482    }
483
484    #[test]
485    fn test_create_dir_all_with_mode() -> Result<()> {
486        {
487            let temdir = tempfile::tempdir()?;
488            let path = temdir.path().join("test");
489            let syscall = create_syscall();
490            let uid = syscall.get_uid().as_raw();
491            let mode = Mode::S_IRWXU;
492            create_dir_all_with_mode(&path, uid, mode)?;
493            let metadata = path.metadata()?;
494            assert!(path.is_dir());
495            assert_eq!(metadata.st_uid(), uid);
496            assert_eq!(metadata.st_mode() & mode.bits(), mode.bits());
497        }
498        {
499            let temdir = tempfile::tempdir()?;
500            let path = temdir.path().join("test");
501            let mode = Mode::S_IRWXU;
502            std::fs::create_dir(&path)?;
503            assert!(path.is_dir());
504            match create_dir_all_with_mode(&path, 8899, mode) {
505                Err(MkdirWithModeError::MetadataMismatch) => {}
506                _ => bail!("should return MetadataMismatch"),
507            }
508        }
509        Ok(())
510    }
511
512    #[test]
513    fn test_io() -> Result<()> {
514        {
515            let tempdir = tempfile::tempdir()?;
516            let path = tempdir.path().join("test");
517            write_file(&path, "test".as_bytes())?;
518            open(&path)?;
519            assert!(create_dir_all(path).is_err());
520        }
521        {
522            let tempdir = tempfile::tempdir()?;
523            let path = tempdir.path().join("test");
524            create_dir_all(&path)?;
525            assert!(write_file(&path, "test".as_bytes()).is_err());
526        }
527        {
528            let tempdir = tempfile::tempdir()?;
529            let path = tempdir.path().join("test");
530            assert!(open(&path).is_err());
531            create_dir_all(&path)?;
532            assert!(path.is_dir())
533        }
534
535        Ok(())
536    }
537
538    // the following test is marked as serial because
539    // we are doing unshare of user ns and fork, so better to run in serial,
540    #[test]
541    #[serial]
542    fn test_userns_spec_validation() -> Result<(), test_utils::TestError> {
543        use nix::sched::{CloneFlags, unshare};
544        let syscall = create_syscall();
545        // default rootful spec
546        let rootful_spec = Spec::default();
547        // as we are not in a user ns, and spec does not have user ns
548        // we should get error here
549        assert!(validate_spec_for_new_user_ns(&rootful_spec, &*syscall).is_err());
550
551        let rootless_spec = Spec::rootless(1000, 1000);
552        // because the spec contains user ns info, we should not get error
553        assert!(validate_spec_for_new_user_ns(&rootless_spec, &*syscall).is_ok());
554
555        test_utils::test_in_child_process(|| {
556            unshare(CloneFlags::CLONE_NEWUSER).unwrap();
557            // here we are in a new user namespace
558            let rootful_spec = Spec::default();
559            let syscall = create_syscall();
560            // because we are already in a new user ns, it is fine if spec
561            // does not have user ns, and because the test is running as
562            // non root
563            assert!(validate_spec_for_new_user_ns(&rootful_spec, &*syscall).is_ok());
564
565            let rootless_spec = Spec::rootless(1000, 1000);
566            // following should succeed irrespective if we're in user ns or not
567            assert!(validate_spec_for_new_user_ns(&rootless_spec, &*syscall).is_ok());
568            Ok(())
569        })
570    }
571
572    #[test]
573    fn test_dev_valid_name() {
574        assert!(!dev_valid_name(""));
575
576        let long_name = "a".repeat(IFNAMSIZ + 1);
577        assert!(!dev_valid_name(&long_name));
578
579        let valid_name = "a".repeat(IFNAMSIZ);
580        assert!(dev_valid_name(&valid_name));
581
582        assert!(!dev_valid_name("."));
583        assert!(!dev_valid_name(".."));
584
585        assert!(!dev_valid_name("/: "));
586        assert!(!dev_valid_name("eth0/: "));
587
588        assert!(dev_valid_name("eth0"));
589        assert!(dev_valid_name("veth123"));
590        assert!(dev_valid_name("abc.def"));
591    }
592
593    fn build_spec_with_ns_and_devices(include_net_ns: bool, devices: Vec<(&str, &str)>) -> Spec {
594        let mut namespaces = vec![];
595        if include_net_ns {
596            namespaces.push(
597                LinuxNamespaceBuilder::default()
598                    .typ(LinuxNamespaceType::Network)
599                    .path(PathBuf::from("/dev/net"))
600                    .build()
601                    .unwrap(),
602            );
603        }
604
605        let net_devices: HashMap<String, LinuxNetDevice> = devices
606            .into_iter()
607            .map(|(key, val)| {
608                (
609                    key.into(),
610                    LinuxNetDevice::default().set_name(Some(val.into())).clone(),
611                )
612            })
613            .collect();
614        let linux = LinuxBuilder::default()
615            .namespaces(namespaces)
616            .net_devices(net_devices)
617            .build()
618            .unwrap();
619
620        SpecBuilder::default().linux(linux).build().unwrap()
621    }
622
623    #[test]
624    fn test_net_devices_none() {
625        let spec = Spec::default();
626        let syscall = create_syscall();
627        syscall.set_id(Uid::from_raw(0), Gid::from_raw(0)).unwrap();
628        let result = validate_spec_for_net_devices(&spec, &*syscall);
629        assert!(result.is_ok());
630    }
631
632    #[test]
633    fn test_missing_net_namespace() {
634        let spec = build_spec_with_ns_and_devices(false, vec![]);
635        let syscall = create_syscall();
636        let err = validate_spec_for_net_devices(&spec, &*syscall).unwrap_err();
637        assert!(matches!(err, NetDevicesError::NoNetNamespace));
638    }
639
640    #[test]
641    fn test_invalid_device_name() {
642        let spec = build_spec_with_ns_and_devices(true, vec![("eth0", "/:invalid")]);
643        let syscall = create_syscall();
644        syscall.set_id(Uid::from_raw(0), Gid::from_raw(0)).unwrap();
645        let err = validate_spec_for_net_devices(&spec, &*syscall).unwrap_err();
646        if let NetDevicesError::InvalidDeviceName(name) = err {
647            assert_eq!(name, "/:invalid");
648        } else {
649            panic!("Expected InvalidDeviceName error");
650        }
651    }
652
653    #[test]
654    fn test_valid_config() {
655        let spec = build_spec_with_ns_and_devices(true, vec![("eth0", "eth0_container")]);
656        let syscall = create_syscall();
657        syscall.set_id(Uid::from_raw(0), Gid::from_raw(0)).unwrap();
658        let result = validate_spec_for_net_devices(&spec, &*syscall);
659        assert!(result.is_ok());
660    }
661}