Skip to main content

nucleus/checkpoint/
criu.rs

1use crate::checkpoint::metadata::CheckpointMetadata;
2use crate::checkpoint::state::CheckpointState;
3use crate::container::ContainerState;
4use crate::error::{NucleusError, Result, StateTransition};
5use nix::unistd::Uid;
6use std::fs;
7use std::os::unix::fs::PermissionsExt;
8use std::path::{Path, PathBuf};
9use std::process::Command;
10use tempfile::Builder;
11use tracing::info;
12
13/// CRIU runtime for checkpoint/restore
14///
15/// Follows the same pattern as GVisorRuntime: find binary, validate, invoke via Command.
16pub struct CriuRuntime {
17    binary_path: PathBuf,
18    state: CheckpointState,
19}
20
21impl CriuRuntime {
22    /// Create a new CRIU runtime, finding the criu binary
23    pub fn new() -> Result<Self> {
24        let binary_path = Self::find_binary()?;
25
26        // Validate binary works
27        let output = Command::new(&binary_path)
28            .arg("--version")
29            .output()
30            .map_err(|e| NucleusError::CheckpointError(format!("Failed to execute criu: {}", e)))?;
31
32        if !output.status.success() {
33            return Err(NucleusError::CheckpointError(
34                "criu --version failed".to_string(),
35            ));
36        }
37
38        let version = String::from_utf8_lossy(&output.stdout);
39        info!("Found CRIU: {}", version.trim());
40
41        Ok(Self {
42            binary_path,
43            state: CheckpointState::None,
44        })
45    }
46
47    /// Validate a binary path for safe execution
48    fn validate_binary(path: &Path) -> Result<()> {
49        let metadata = fs::metadata(path).map_err(|e| {
50            NucleusError::CheckpointError(format!("Cannot stat criu binary {:?}: {}", path, e))
51        })?;
52        let mode = metadata.permissions().mode();
53        if mode & 0o022 != 0 {
54            return Err(NucleusError::CheckpointError(format!(
55                "criu binary {:?} is writable by group/others (mode {:o}), refusing to execute",
56                path, mode
57            )));
58        }
59        if mode & 0o111 == 0 {
60            return Err(NucleusError::CheckpointError(format!(
61                "criu binary {:?} is not executable",
62                path
63            )));
64        }
65        Ok(())
66    }
67
68    fn find_binary() -> Result<PathBuf> {
69        // Check common locations
70        for path in &["/usr/sbin/criu", "/usr/bin/criu", "/usr/local/sbin/criu"] {
71            let p = PathBuf::from(path);
72            if p.exists() {
73                Self::validate_binary(&p)?;
74                return Ok(p);
75            }
76        }
77
78        // For privileged execution, do not resolve runtime binaries via PATH.
79        // This avoids environment-based binary hijacking when running as root.
80        if Uid::effective().is_root() {
81            return Err(NucleusError::CheckpointError(
82                "CRIU binary not found in trusted system paths".to_string(),
83            ));
84        }
85
86        // Try PATH for unprivileged execution.
87        if let Some(path_var) = std::env::var_os("PATH") {
88            for dir in std::env::split_paths(&path_var) {
89                let candidate = dir.join("criu");
90                if candidate.exists() {
91                    Self::validate_binary(&candidate)?;
92                    return Ok(candidate);
93                }
94            }
95        }
96
97        Err(NucleusError::CheckpointError(
98            "CRIU binary not found. Install criu to use checkpoint/restore.".to_string(),
99        ))
100    }
101
102    /// Checkpoint a running container
103    ///
104    /// State transitions: None -> Dumping -> Dumped (or Dumping -> None on failure)
105    pub fn checkpoint(
106        &mut self,
107        state: &ContainerState,
108        output_dir: &Path,
109        leave_running: bool,
110    ) -> Result<()> {
111        // Requires root
112        if !nix::unistd::Uid::effective().is_root() {
113            return Err(NucleusError::CheckpointError(
114                "Checkpoint requires root (CRIU needs CAP_SYS_PTRACE)".to_string(),
115            ));
116        }
117
118        if !state.is_running() {
119            return Err(NucleusError::CheckpointError(format!(
120                "Container {} is not running",
121                state.id
122            )));
123        }
124
125        // State transition: None -> Dumping
126        self.state = self.state.transition(CheckpointState::Dumping)?;
127
128        let images_dir = Self::prepare_checkpoint_dir(output_dir)?;
129
130        // Run criu dump
131        let mut cmd = Command::new(&self.binary_path);
132        cmd.arg("dump")
133            .arg("--tree")
134            .arg(state.pid.to_string())
135            .arg("--images-dir")
136            .arg(&images_dir)
137            .arg("--shell-job");
138
139        if leave_running {
140            cmd.arg("--leave-running");
141        }
142
143        info!(
144            "Checkpointing container {} (PID {}) to {:?}",
145            state.id, state.pid, output_dir
146        );
147
148        let output = cmd.output().map_err(|e| {
149            // Abort: Dumping -> None
150            self.state = self.state.transition(CheckpointState::None).unwrap_or(self.state);
151            NucleusError::CheckpointError(format!("Failed to run criu dump: {}", e))
152        })?;
153
154        if !output.status.success() {
155            // Abort: Dumping -> None
156            self.state = self.state.transition(CheckpointState::None).unwrap_or(self.state);
157            let stderr = String::from_utf8_lossy(&output.stderr);
158            return Err(NucleusError::CheckpointError(format!(
159                "criu dump failed: {}",
160                stderr
161            )));
162        }
163
164        // Write metadata
165        let metadata = CheckpointMetadata::from_state(state);
166        metadata.save(output_dir)?;
167
168        // State transition: Dumping -> Dumped
169        self.state = self.state.transition(CheckpointState::Dumped)?;
170
171        info!("Checkpoint complete: {:?}", output_dir);
172        Ok(())
173    }
174
175    /// Restore a container from checkpoint
176    ///
177    /// State transitions: None -> Restoring -> Restored (or Restoring -> None on failure)
178    pub fn restore(&mut self, input_dir: &Path) -> Result<u32> {
179        // Requires root
180        if !nix::unistd::Uid::effective().is_root() {
181            return Err(NucleusError::CheckpointError(
182                "Restore requires root (CRIU needs CAP_SYS_PTRACE)".to_string(),
183            ));
184        }
185
186        // Load and validate metadata
187        let metadata = CheckpointMetadata::load(input_dir)?;
188        info!(
189            "Restoring container {} from checkpoint (originally PID {})",
190            metadata.container_id, metadata.original_pid
191        );
192
193        let images_dir = input_dir.join("images");
194        if !images_dir.exists() {
195            return Err(NucleusError::CheckpointError(format!(
196                "Images directory not found: {:?}",
197                images_dir
198            )));
199        }
200
201        // State transition: None -> Restoring
202        self.state = self.state.transition(CheckpointState::Restoring)?;
203
204        // Capture the restored init PID explicitly.
205        let pidfile = Builder::new()
206            .prefix("nucleus-criu-restore-")
207            .tempfile()
208            .map_err(|e| {
209                NucleusError::CheckpointError(format!("Failed to create CRIU pidfile: {}", e))
210            })?;
211        let pidfile_path = pidfile.path().to_path_buf();
212
213        // Run criu restore
214        let output = Command::new(&self.binary_path)
215            .arg("restore")
216            .arg("--images-dir")
217            .arg(&images_dir)
218            .arg("--shell-job")
219            .arg("--pidfile")
220            .arg(&pidfile_path)
221            .output()
222            .map_err(|e| {
223                // Abort: Restoring -> None
224                self.state = self.state.transition(CheckpointState::None).unwrap_or(self.state);
225                NucleusError::CheckpointError(format!("Failed to run criu restore: {}", e))
226            })?;
227
228        if !output.status.success() {
229            // Abort: Restoring -> None
230            self.state = self.state.transition(CheckpointState::None).unwrap_or(self.state);
231            let stderr = String::from_utf8_lossy(&output.stderr);
232            return Err(NucleusError::CheckpointError(format!(
233                "criu restore failed: {}",
234                stderr
235            )));
236        }
237
238        // State transition: Restoring -> Restored
239        self.state = self.state.transition(CheckpointState::Restored)?;
240
241        // Parse restored PID from pidfile, with output fallback for compatibility.
242        let pid_text = fs::read_to_string(&pidfile_path).unwrap_or_default();
243        if let Some(pid) = Self::parse_pidfile(&pid_text) {
244            info!("Restore complete, new PID: {}", pid);
245            return Ok(pid);
246        }
247
248        let stdout = String::from_utf8_lossy(&output.stdout);
249        if let Some(pid) = Self::parse_pid_text(&stdout) {
250            info!("Restore complete, new PID: {}", pid);
251            return Ok(pid);
252        }
253
254        let stderr = String::from_utf8_lossy(&output.stderr);
255        if let Some(pid) = Self::parse_pid_text(&stderr) {
256            info!("Restore complete, new PID: {}", pid);
257            return Ok(pid);
258        }
259
260        Err(NucleusError::CheckpointError(format!(
261            "Failed to parse restored PID from CRIU output (pidfile='{}', stdout='{}', stderr='{}')",
262            pid_text.trim(),
263            stdout.trim(),
264            stderr.trim()
265        )))
266    }
267
268    fn parse_pid_text(text: &str) -> Option<u32> {
269        text.split(|c: char| !c.is_ascii_digit())
270            .filter(|tok| !tok.is_empty())
271            .find_map(|tok| tok.parse::<u32>().ok())
272    }
273
274    fn parse_pidfile(text: &str) -> Option<u32> {
275        let trimmed = text.trim();
276        if trimmed.is_empty() || !trimmed.chars().all(|c| c.is_ascii_digit()) {
277            return None;
278        }
279        trimmed.parse::<u32>().ok()
280    }
281
282    fn prepare_checkpoint_dir(output_dir: &Path) -> Result<PathBuf> {
283        Self::ensure_secure_dir(output_dir, "checkpoint directory")?;
284        let images_dir = output_dir.join("images");
285        Self::ensure_secure_dir(&images_dir, "checkpoint images directory")?;
286        Ok(images_dir)
287    }
288
289    fn ensure_secure_dir(path: &Path, label: &str) -> Result<()> {
290        Self::reject_symlink_path(path, label)?;
291
292        if path.exists() {
293            if !path.is_dir() {
294                return Err(NucleusError::CheckpointError(format!(
295                    "{} {:?} is not a directory",
296                    label, path
297                )));
298            }
299        } else {
300            fs::create_dir_all(path).map_err(|e| {
301                NucleusError::CheckpointError(format!(
302                    "Failed to create {} {:?}: {}",
303                    label, path, e
304                ))
305            })?;
306        }
307
308        Self::reject_symlink_path(path, label)?;
309        fs::set_permissions(path, fs::Permissions::from_mode(0o700)).map_err(|e| {
310            NucleusError::CheckpointError(format!(
311                "Failed to set {} permissions {:?}: {}",
312                label, path, e
313            ))
314        })?;
315
316        Ok(())
317    }
318
319    fn reject_symlink_path(path: &Path, label: &str) -> Result<()> {
320        match fs::symlink_metadata(path) {
321            Ok(metadata) if metadata.file_type().is_symlink() => Err(
322                NucleusError::CheckpointError(format!(
323                    "Refusing symlink {} {:?}",
324                    label, path
325                )),
326            ),
327            Ok(_) | Err(_) => Ok(()),
328        }
329    }
330}
331
332#[cfg(test)]
333mod tests {
334    use super::CriuRuntime;
335    use std::fs;
336    use std::os::unix::fs::{symlink, PermissionsExt};
337    use tempfile::TempDir;
338
339    #[test]
340    fn test_parse_pid_text_plain() {
341        assert_eq!(CriuRuntime::parse_pid_text("1234\n"), Some(1234));
342    }
343
344    #[test]
345    fn test_parse_pid_text_embedded() {
346        assert_eq!(
347            CriuRuntime::parse_pid_text("restored successfully pid=5678"),
348            Some(5678)
349        );
350    }
351
352    #[test]
353    fn test_parse_pid_text_missing() {
354        assert_eq!(CriuRuntime::parse_pid_text("no pid here"), None);
355    }
356
357    #[test]
358    fn test_parse_pidfile_strict() {
359        // BUG-22: parse_pid_text must prefer strict pidfile parsing
360        // A pidfile should contain just a number, not extract first number from error messages
361        assert_eq!(CriuRuntime::parse_pidfile("1234\n"), Some(1234));
362        assert_eq!(CriuRuntime::parse_pidfile("  5678  \n"), Some(5678));
363        // Error messages should NOT parse as PIDs
364        assert_eq!(CriuRuntime::parse_pidfile("Error code: 255 (EPERM)"), None);
365        assert_eq!(CriuRuntime::parse_pidfile("restored successfully pid=5678"), None);
366        assert_eq!(CriuRuntime::parse_pidfile(""), None);
367        assert_eq!(CriuRuntime::parse_pidfile("no pid here"), None);
368    }
369
370    #[test]
371    fn test_prepare_checkpoint_dir_rejects_symlinked_images_dir() {
372        let tmp = TempDir::new().unwrap();
373        let target = tmp.path().join("target");
374        fs::create_dir(&target).unwrap();
375        let images = tmp.path().join("images");
376        symlink(&target, &images).unwrap();
377
378        let err = CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap_err();
379        assert!(
380            err.to_string().contains("symlink"),
381            "expected symlink rejection, got: {err}"
382        );
383    }
384
385    #[test]
386    fn test_prepare_checkpoint_dir_creates_images_subdir() {
387        let tmp = TempDir::new().unwrap();
388        let images = CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap();
389        assert_eq!(images, tmp.path().join("images"));
390        assert!(images.is_dir());
391
392        // Verify permissions are 0o700
393        let mode = fs::metadata(&images).unwrap().permissions().mode() & 0o777;
394        assert_eq!(mode, 0o700, "images dir should be mode 700, got {:o}", mode);
395    }
396
397    #[test]
398    fn test_prepare_checkpoint_dir_rejects_file_as_output_dir() {
399        let tmp = TempDir::new().unwrap();
400        let file_path = tmp.path().join("not-a-dir");
401        fs::write(&file_path, "").unwrap();
402
403        let err = CriuRuntime::prepare_checkpoint_dir(&file_path).unwrap_err();
404        assert!(
405            err.to_string().contains("not a directory"),
406            "expected 'not a directory' error, got: {err}"
407        );
408    }
409
410    #[test]
411    fn test_prepare_checkpoint_dir_rejects_symlinked_output_dir() {
412        let tmp = TempDir::new().unwrap();
413        let real_dir = tmp.path().join("real");
414        fs::create_dir(&real_dir).unwrap();
415        let link = tmp.path().join("link");
416        symlink(&real_dir, &link).unwrap();
417
418        let err = CriuRuntime::prepare_checkpoint_dir(&link).unwrap_err();
419        assert!(
420            err.to_string().contains("symlink"),
421            "expected symlink rejection, got: {err}"
422        );
423    }
424
425    #[test]
426    fn test_validate_binary_rejects_group_writable() {
427        let tmp = TempDir::new().unwrap();
428        let bin = tmp.path().join("criu");
429        fs::write(&bin, "#!/bin/sh\n").unwrap();
430        fs::set_permissions(&bin, fs::Permissions::from_mode(0o775)).unwrap();
431
432        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
433        assert!(
434            err.to_string().contains("writable by group/others"),
435            "expected group-writable rejection, got: {err}"
436        );
437    }
438
439    #[test]
440    fn test_validate_binary_rejects_world_writable() {
441        let tmp = TempDir::new().unwrap();
442        let bin = tmp.path().join("criu");
443        fs::write(&bin, "#!/bin/sh\n").unwrap();
444        fs::set_permissions(&bin, fs::Permissions::from_mode(0o757)).unwrap();
445
446        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
447        assert!(
448            err.to_string().contains("writable by group/others"),
449            "expected world-writable rejection, got: {err}"
450        );
451    }
452
453    #[test]
454    fn test_validate_binary_rejects_non_executable() {
455        let tmp = TempDir::new().unwrap();
456        let bin = tmp.path().join("criu");
457        fs::write(&bin, "#!/bin/sh\n").unwrap();
458        fs::set_permissions(&bin, fs::Permissions::from_mode(0o600)).unwrap();
459
460        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
461        assert!(
462            err.to_string().contains("not executable"),
463            "expected non-executable rejection, got: {err}"
464        );
465    }
466
467    #[test]
468    fn test_validate_binary_accepts_secure_binary() {
469        let tmp = TempDir::new().unwrap();
470        let bin = tmp.path().join("criu");
471        fs::write(&bin, "#!/bin/sh\n").unwrap();
472        fs::set_permissions(&bin, fs::Permissions::from_mode(0o755)).unwrap();
473
474        CriuRuntime::validate_binary(&bin).expect("should accept mode 0755");
475    }
476
477    #[test]
478    fn test_validate_binary_accepts_owner_only_executable() {
479        let tmp = TempDir::new().unwrap();
480        let bin = tmp.path().join("criu");
481        fs::write(&bin, "#!/bin/sh\n").unwrap();
482        fs::set_permissions(&bin, fs::Permissions::from_mode(0o700)).unwrap();
483
484        CriuRuntime::validate_binary(&bin).expect("should accept mode 0700");
485    }
486
487    #[test]
488    fn test_validate_binary_rejects_nonexistent() {
489        let tmp = TempDir::new().unwrap();
490        let bin = tmp.path().join("nonexistent");
491        assert!(CriuRuntime::validate_binary(&bin).is_err());
492    }
493
494    #[test]
495    fn test_checkpoint_state_transitions() {
496        use crate::checkpoint::state::CheckpointState;
497        use crate::error::StateTransition;
498
499        // Valid forward transitions
500        assert!(CheckpointState::None.can_transition_to(&CheckpointState::Dumping));
501        assert!(CheckpointState::Dumping.can_transition_to(&CheckpointState::Dumped));
502        assert!(CheckpointState::None.can_transition_to(&CheckpointState::Restoring));
503        assert!(CheckpointState::Restoring.can_transition_to(&CheckpointState::Restored));
504
505        // Valid abort transitions
506        assert!(CheckpointState::Dumping.can_transition_to(&CheckpointState::None));
507        assert!(CheckpointState::Restoring.can_transition_to(&CheckpointState::None));
508
509        // Invalid transitions
510        assert!(!CheckpointState::None.can_transition_to(&CheckpointState::Dumped));
511        assert!(!CheckpointState::None.can_transition_to(&CheckpointState::Restored));
512        assert!(!CheckpointState::Dumped.can_transition_to(&CheckpointState::Restoring));
513        assert!(!CheckpointState::Restored.can_transition_to(&CheckpointState::Dumping));
514    }
515
516    #[test]
517    fn test_prepare_checkpoint_dir_sets_secure_permissions() {
518        let tmp = TempDir::new().unwrap();
519        CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap();
520
521        // Both output dir and images subdir should be 0700
522        let output_mode = fs::metadata(tmp.path()).unwrap().permissions().mode() & 0o777;
523        let images_mode = fs::metadata(tmp.path().join("images"))
524            .unwrap()
525            .permissions()
526            .mode()
527            & 0o777;
528        assert_eq!(output_mode, 0o700);
529        assert_eq!(images_mode, 0o700);
530    }
531}