Skip to main content

libcontainer/container/
container_checkpoint.rs

1use std::fs::{DirBuilder, File, read_link};
2use std::io::{ErrorKind, Write};
3use std::os::unix::fs::DirBuilderExt;
4use std::os::unix::io::AsRawFd;
5
6use libcgroups::common::CgroupSetup::{Hybrid, Legacy};
7#[cfg(feature = "v1")]
8use libcgroups::common::DEFAULT_CGROUP_ROOT;
9use oci_spec::runtime::Spec;
10
11use super::container_criu::{CRIU_VERSION_MINIMUM, check_criu_version};
12use super::{Container, ContainerStatus};
13use crate::container::container::CheckpointOptions;
14use crate::error::LibcontainerError;
15
16const CRIU_CHECKPOINT_LOG_FILE: &str = "dump.log";
17const DESCRIPTORS_JSON: &str = "descriptors.json";
18
19#[derive(thiserror::Error, Debug)]
20pub enum CheckpointError {
21    #[error("criu error: {0}")]
22    CriuError(String),
23}
24
25impl Container {
26    pub fn checkpoint(&mut self, opts: &CheckpointOptions) -> Result<(), LibcontainerError> {
27        self.refresh_status()?;
28
29        // can_pause() checks if the container is running. That also works for
30        // checkpointing. is_running() would make more sense here, but let's
31        // just reuse existing functions.
32        if !self.can_pause() {
33            tracing::error!(status = ?self.status(), id = ?self.id(), "cannot checkpoint container because it is not running");
34            return Err(LibcontainerError::IncorrectStatus(self.status()));
35        }
36
37        // Require CRIU >= 3.15.0, matching crun's LIBCRIU_MIN_VERSION requirement.
38        check_criu_version(CRIU_VERSION_MINIMUM)?;
39
40        // Create checkpoint image directory if it doesn't exist (mode 0o700 like crun).
41        if let Err(err) = DirBuilder::new().mode(0o700).create(&opts.image_path) {
42            if err.kind() != ErrorKind::AlreadyExists {
43                tracing::error!(path = ?opts.image_path, ?err, "failed to create checkpoint directory");
44                return Err(LibcontainerError::OtherIO(err));
45            }
46        }
47
48        let mut criu = rust_criu::Criu::new().map_err(|e| {
49            LibcontainerError::Checkpoint(CheckpointError::CriuError(format!(
50                "error in creating criu struct: {}",
51                e
52            )))
53        })?;
54        // We need to tell CRIU that all bind mounts are external. CRIU will fail checkpointing
55        // if it does not know that these bind mounts are coming from the outside of the container.
56        // This information is needed during restore again. The external location of the bind
57        // mounts can change and CRIU will just mount whatever we tell it to mount based on
58        // information found in 'config.json'.
59        let source_spec_path = self.bundle().join("config.json");
60        let spec = Spec::load(source_spec_path)?;
61        let mounts = spec.mounts().clone();
62        for m in mounts.unwrap_or_default() {
63            match m.typ().as_deref() {
64                Some("bind") => {
65                    let dest = m
66                        .destination()
67                        .clone()
68                        .into_os_string()
69                        .into_string()
70                        .expect("failed to convert mount destination");
71                    criu.set_external_mount(dest.clone(), dest);
72                }
73                Some("cgroup") => {
74                    match libcgroups::common::get_cgroup_setup()? {
75                        // For v1 it is necessary to list all cgroup mounts as external mounts
76                        Legacy | Hybrid => {
77                            #[cfg(not(feature = "v1"))]
78                            panic!(
79                                "libcontainer can't run in a Legacy or Hybrid cgroup setup without the v1 feature"
80                            );
81                            #[cfg(feature = "v1")]
82                            for mp in libcgroups::v1::util::list_subsystem_mount_points().map_err(
83                                |err| {
84                                    tracing::error!(?err, "failed to get subsystem mount points");
85                                    LibcontainerError::OtherCgroup(err.to_string())
86                                },
87                            )? {
88                                let cgroup_mount = mp
89                                    .clone()
90                                    .into_os_string()
91                                    .into_string()
92                                    .expect("failed to convert mount point");
93                                if cgroup_mount.starts_with(DEFAULT_CGROUP_ROOT) {
94                                    criu.set_external_mount(cgroup_mount.clone(), cgroup_mount);
95                                }
96                            }
97                        }
98                        _ => (),
99                    }
100                }
101                _ => (),
102            }
103        }
104
105        let directory = File::open(&opts.image_path).map_err(|err| {
106            tracing::error!(path = ?opts.image_path, ?err, "failed to open checkpoint directory");
107            LibcontainerError::OtherIO(err)
108        })?;
109        criu.set_images_dir_fd(directory.as_raw_fd());
110
111        // It seems to be necessary to be defined outside of 'if' to
112        // keep the FD open until CRIU uses it.
113        let work_dir: File;
114        if let Some(wp) = &opts.work_path {
115            // Create work directory if it doesn't exist (mode 0o700 like crun).
116            if let Err(err) = DirBuilder::new().mode(0o700).create(wp) {
117                if err.kind() != ErrorKind::AlreadyExists {
118                    tracing::error!(path = ?wp, ?err, "failed to create work directory");
119                    return Err(LibcontainerError::OtherIO(err));
120                }
121            }
122            work_dir = File::open(wp).map_err(LibcontainerError::OtherIO)?;
123            criu.set_work_dir_fd(work_dir.as_raw_fd());
124        }
125
126        let pid: i32 = self
127            .pid()
128            .ok_or(LibcontainerError::Other(
129                "container process pid not found in state".into(),
130            ))?
131            .into();
132
133        // Remember original stdin, stdout, stderr for container restore.
134        let mut descriptors = Vec::new();
135        for n in 0..3 {
136            let link_path = match read_link(format!("/proc/{pid}/fd/{n}")) {
137                // it should not have any non utf-8 or non os safe path,
138                // as we are reading from os , so ok to unwrap
139                Ok(lp) => lp.into_os_string().into_string().unwrap(),
140                Err(..) => "/dev/null".to_string(),
141            };
142            descriptors.push(link_path);
143        }
144        let descriptors_json_path = opts.image_path.join(DESCRIPTORS_JSON);
145        let mut descriptors_json =
146            File::create(descriptors_json_path).map_err(LibcontainerError::OtherIO)?;
147        write!(
148            descriptors_json,
149            "{}",
150            serde_json::to_string(&descriptors).map_err(LibcontainerError::OtherSerialization)?
151        )
152        .map_err(LibcontainerError::OtherIO)?;
153
154        criu.set_log_file(CRIU_CHECKPOINT_LOG_FILE.to_string());
155        criu.set_log_level(4);
156        criu.set_pid(pid);
157        criu.set_leave_running(opts.leave_running);
158        criu.set_ext_unix_sk(opts.ext_unix_sk);
159        criu.set_shell_job(opts.shell_job);
160        criu.set_tcp_established(opts.tcp_established);
161        criu.set_file_locks(opts.file_locks);
162        criu.set_orphan_pts_master(true);
163        criu.set_manage_cgroups(true);
164        criu.set_root(
165            self.bundle()
166                .clone()
167                .into_os_string()
168                .into_string()
169                .unwrap(),
170        );
171        criu.cgroups_mode(opts.manage_cgroups_mode.clone());
172
173        criu.dump().map_err(|err| {
174            tracing::error!(?err, id = ?self.id(), logfile = ?opts.image_path.join(CRIU_CHECKPOINT_LOG_FILE), "checkpointing container failed");
175            LibcontainerError::Other(err.to_string())
176        })?;
177
178        if !opts.leave_running {
179            self.set_status(ContainerStatus::Stopped).save()?;
180        }
181
182        tracing::debug!("container {} checkpointed", self.id());
183        Ok(())
184    }
185}