Skip to main content

nucleus/checkpoint/
criu.rs

1use crate::checkpoint::metadata::CheckpointMetadata;
2use crate::checkpoint::state::CheckpointState;
3use crate::container::ContainerState;
4use crate::error::{NucleusError, Result, StateTransition};
5use nix::unistd::Uid;
6use std::fs;
7use std::os::unix::fs::PermissionsExt;
8use std::path::{Path, PathBuf};
9use std::process::Command;
10use tempfile::Builder;
11use tracing::info;
12
13/// CRIU runtime for checkpoint/restore
14///
15/// Follows the same pattern as GVisorRuntime: find binary, validate, invoke via Command.
16pub struct CriuRuntime {
17    binary_path: PathBuf,
18    state: CheckpointState,
19}
20
21impl CriuRuntime {
22    /// Create a new CRIU runtime, finding the criu binary
23    pub fn new() -> Result<Self> {
24        let binary_path = Self::find_binary()?;
25
26        // Validate binary works
27        let output = Command::new(&binary_path)
28            .arg("--version")
29            .output()
30            .map_err(|e| NucleusError::CheckpointError(format!("Failed to execute criu: {}", e)))?;
31
32        if !output.status.success() {
33            return Err(NucleusError::CheckpointError(
34                "criu --version failed".to_string(),
35            ));
36        }
37
38        let version = String::from_utf8_lossy(&output.stdout);
39        info!("Found CRIU: {}", version.trim());
40
41        Ok(Self {
42            binary_path,
43            state: CheckpointState::None,
44        })
45    }
46
47    /// Validate a binary path for safe execution
48    fn validate_binary(path: &Path) -> Result<()> {
49        let metadata = fs::metadata(path).map_err(|e| {
50            NucleusError::CheckpointError(format!("Cannot stat criu binary {:?}: {}", path, e))
51        })?;
52        let mode = metadata.permissions().mode();
53        if mode & 0o022 != 0 {
54            return Err(NucleusError::CheckpointError(format!(
55                "criu binary {:?} is writable by group/others (mode {:o}), refusing to execute",
56                path, mode
57            )));
58        }
59        if mode & 0o111 == 0 {
60            return Err(NucleusError::CheckpointError(format!(
61                "criu binary {:?} is not executable",
62                path
63            )));
64        }
65        Ok(())
66    }
67
68    fn find_binary() -> Result<PathBuf> {
69        // Check common locations
70        for path in &["/usr/sbin/criu", "/usr/bin/criu", "/usr/local/sbin/criu"] {
71            let p = PathBuf::from(path);
72            if p.exists() {
73                Self::validate_binary(&p)?;
74                return Ok(p);
75            }
76        }
77
78        // For privileged execution, do not resolve runtime binaries via PATH.
79        // This avoids environment-based binary hijacking when running as root.
80        if Uid::effective().is_root() {
81            return Err(NucleusError::CheckpointError(
82                "CRIU binary not found in trusted system paths".to_string(),
83            ));
84        }
85
86        // Try PATH for unprivileged execution.
87        if let Some(path_var) = std::env::var_os("PATH") {
88            for dir in std::env::split_paths(&path_var) {
89                let candidate = dir.join("criu");
90                if candidate.exists() {
91                    Self::validate_binary(&candidate)?;
92                    return Ok(candidate);
93                }
94            }
95        }
96
97        Err(NucleusError::CheckpointError(
98            "CRIU binary not found. Install criu to use checkpoint/restore.".to_string(),
99        ))
100    }
101
102    /// Checkpoint a running container
103    ///
104    /// State transitions: None -> Dumping -> Dumped (or Dumping -> None on failure)
105    pub fn checkpoint(
106        &mut self,
107        state: &ContainerState,
108        output_dir: &Path,
109        leave_running: bool,
110    ) -> Result<()> {
111        // Requires root
112        if !nix::unistd::Uid::effective().is_root() {
113            return Err(NucleusError::CheckpointError(
114                "Checkpoint requires root (CRIU needs CAP_SYS_PTRACE)".to_string(),
115            ));
116        }
117
118        if !state.is_running() {
119            return Err(NucleusError::CheckpointError(format!(
120                "Container {} is not running",
121                state.id
122            )));
123        }
124
125        // State transition: None -> Dumping
126        self.state = self.state.transition(CheckpointState::Dumping)?;
127
128        let images_dir = Self::prepare_checkpoint_dir(output_dir)?;
129
130        // Run criu dump
131        let mut cmd = Command::new(&self.binary_path);
132        cmd.arg("dump")
133            .arg("--tree")
134            .arg(state.pid.to_string())
135            .arg("--images-dir")
136            .arg(&images_dir)
137            .arg("--shell-job");
138
139        if leave_running {
140            cmd.arg("--leave-running");
141        }
142
143        info!(
144            "Checkpointing container {} (PID {}) to {:?}",
145            state.id, state.pid, output_dir
146        );
147
148        let output = cmd.output().map_err(|e| {
149            // Abort: Dumping -> None
150            self.state = self
151                .state
152                .transition(CheckpointState::None)
153                .unwrap_or(self.state);
154            NucleusError::CheckpointError(format!("Failed to run criu dump: {}", e))
155        })?;
156
157        if !output.status.success() {
158            // Abort: Dumping -> None
159            self.state = self
160                .state
161                .transition(CheckpointState::None)
162                .unwrap_or(self.state);
163            let stderr = String::from_utf8_lossy(&output.stderr);
164            return Err(NucleusError::CheckpointError(format!(
165                "criu dump failed: {}",
166                stderr
167            )));
168        }
169
170        // Write metadata
171        let metadata = CheckpointMetadata::from_state(state);
172        metadata.save(output_dir)?;
173
174        // State transition: Dumping -> Dumped
175        self.state = self.state.transition(CheckpointState::Dumped)?;
176
177        info!("Checkpoint complete: {:?}", output_dir);
178        Ok(())
179    }
180
181    /// Restore a container from checkpoint
182    ///
183    /// State transitions: None -> Restoring -> Restored (or Restoring -> None on failure)
184    pub fn restore(&mut self, input_dir: &Path) -> Result<u32> {
185        // Requires root
186        if !nix::unistd::Uid::effective().is_root() {
187            return Err(NucleusError::CheckpointError(
188                "Restore requires root (CRIU needs CAP_SYS_PTRACE)".to_string(),
189            ));
190        }
191
192        // Load and validate metadata
193        let metadata = CheckpointMetadata::load(input_dir)?;
194        info!(
195            "Restoring container {} from checkpoint (originally PID {})",
196            metadata.container_id, metadata.original_pid
197        );
198
199        let images_dir = input_dir.join("images");
200        if !images_dir.exists() {
201            return Err(NucleusError::CheckpointError(format!(
202                "Images directory not found: {:?}",
203                images_dir
204            )));
205        }
206
207        // State transition: None -> Restoring
208        self.state = self.state.transition(CheckpointState::Restoring)?;
209
210        // Capture the restored init PID explicitly.
211        let pidfile = Builder::new()
212            .prefix("nucleus-criu-restore-")
213            .tempfile()
214            .map_err(|e| {
215                NucleusError::CheckpointError(format!("Failed to create CRIU pidfile: {}", e))
216            })?;
217        let pidfile_path = pidfile.path().to_path_buf();
218
219        // Run criu restore
220        let output = Command::new(&self.binary_path)
221            .arg("restore")
222            .arg("--images-dir")
223            .arg(&images_dir)
224            .arg("--shell-job")
225            .arg("--pidfile")
226            .arg(&pidfile_path)
227            .output()
228            .map_err(|e| {
229                // Abort: Restoring -> None
230                self.state = self
231                    .state
232                    .transition(CheckpointState::None)
233                    .unwrap_or(self.state);
234                NucleusError::CheckpointError(format!("Failed to run criu restore: {}", e))
235            })?;
236
237        if !output.status.success() {
238            // Abort: Restoring -> None
239            self.state = self
240                .state
241                .transition(CheckpointState::None)
242                .unwrap_or(self.state);
243            let stderr = String::from_utf8_lossy(&output.stderr);
244            return Err(NucleusError::CheckpointError(format!(
245                "criu restore failed: {}",
246                stderr
247            )));
248        }
249
250        // State transition: Restoring -> Restored
251        self.state = self.state.transition(CheckpointState::Restored)?;
252
253        // Parse restored PID from pidfile, with output fallback for compatibility.
254        let pid_text = fs::read_to_string(&pidfile_path).unwrap_or_default();
255        if let Some(pid) = Self::parse_pidfile(&pid_text) {
256            info!("Restore complete, new PID: {}", pid);
257            return Ok(pid);
258        }
259
260        let stdout = String::from_utf8_lossy(&output.stdout);
261        if let Some(pid) = Self::parse_pid_text(&stdout) {
262            info!("Restore complete, new PID: {}", pid);
263            return Ok(pid);
264        }
265
266        let stderr = String::from_utf8_lossy(&output.stderr);
267        if let Some(pid) = Self::parse_pid_text(&stderr) {
268            info!("Restore complete, new PID: {}", pid);
269            return Ok(pid);
270        }
271
272        Err(NucleusError::CheckpointError(format!(
273            "Failed to parse restored PID from CRIU output (pidfile='{}', stdout='{}', stderr='{}')",
274            pid_text.trim(),
275            stdout.trim(),
276            stderr.trim()
277        )))
278    }
279
280    fn parse_pid_text(text: &str) -> Option<u32> {
281        text.split(|c: char| !c.is_ascii_digit())
282            .filter(|tok| !tok.is_empty())
283            .find_map(|tok| tok.parse::<u32>().ok())
284    }
285
286    fn parse_pidfile(text: &str) -> Option<u32> {
287        let trimmed = text.trim();
288        if trimmed.is_empty() || !trimmed.chars().all(|c| c.is_ascii_digit()) {
289            return None;
290        }
291        trimmed.parse::<u32>().ok()
292    }
293
294    fn prepare_checkpoint_dir(output_dir: &Path) -> Result<PathBuf> {
295        Self::ensure_secure_dir(output_dir, "checkpoint directory")?;
296        let images_dir = output_dir.join("images");
297        Self::ensure_secure_dir(&images_dir, "checkpoint images directory")?;
298        Ok(images_dir)
299    }
300
301    fn ensure_secure_dir(path: &Path, label: &str) -> Result<()> {
302        Self::reject_symlink_path(path, label)?;
303
304        if path.exists() {
305            if !path.is_dir() {
306                return Err(NucleusError::CheckpointError(format!(
307                    "{} {:?} is not a directory",
308                    label, path
309                )));
310            }
311        } else {
312            fs::create_dir_all(path).map_err(|e| {
313                NucleusError::CheckpointError(format!(
314                    "Failed to create {} {:?}: {}",
315                    label, path, e
316                ))
317            })?;
318        }
319
320        Self::reject_symlink_path(path, label)?;
321        fs::set_permissions(path, fs::Permissions::from_mode(0o700)).map_err(|e| {
322            NucleusError::CheckpointError(format!(
323                "Failed to set {} permissions {:?}: {}",
324                label, path, e
325            ))
326        })?;
327
328        Ok(())
329    }
330
331    fn reject_symlink_path(path: &Path, label: &str) -> Result<()> {
332        match fs::symlink_metadata(path) {
333            Ok(metadata) if metadata.file_type().is_symlink() => Err(
334                NucleusError::CheckpointError(format!("Refusing symlink {} {:?}", label, path)),
335            ),
336            Ok(_) | Err(_) => Ok(()),
337        }
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use super::CriuRuntime;
344    use std::fs;
345    use std::os::unix::fs::{symlink, PermissionsExt};
346    use tempfile::TempDir;
347
348    #[test]
349    fn test_parse_pid_text_plain() {
350        assert_eq!(CriuRuntime::parse_pid_text("1234\n"), Some(1234));
351    }
352
353    #[test]
354    fn test_parse_pid_text_embedded() {
355        assert_eq!(
356            CriuRuntime::parse_pid_text("restored successfully pid=5678"),
357            Some(5678)
358        );
359    }
360
361    #[test]
362    fn test_parse_pid_text_missing() {
363        assert_eq!(CriuRuntime::parse_pid_text("no pid here"), None);
364    }
365
366    #[test]
367    fn test_parse_pidfile_strict() {
368        // BUG-22: parse_pid_text must prefer strict pidfile parsing
369        // A pidfile should contain just a number, not extract first number from error messages
370        assert_eq!(CriuRuntime::parse_pidfile("1234\n"), Some(1234));
371        assert_eq!(CriuRuntime::parse_pidfile("  5678  \n"), Some(5678));
372        // Error messages should NOT parse as PIDs
373        assert_eq!(CriuRuntime::parse_pidfile("Error code: 255 (EPERM)"), None);
374        assert_eq!(
375            CriuRuntime::parse_pidfile("restored successfully pid=5678"),
376            None
377        );
378        assert_eq!(CriuRuntime::parse_pidfile(""), None);
379        assert_eq!(CriuRuntime::parse_pidfile("no pid here"), None);
380    }
381
382    #[test]
383    fn test_prepare_checkpoint_dir_rejects_symlinked_images_dir() {
384        let tmp = TempDir::new().unwrap();
385        let target = tmp.path().join("target");
386        fs::create_dir(&target).unwrap();
387        let images = tmp.path().join("images");
388        symlink(&target, &images).unwrap();
389
390        let err = CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap_err();
391        assert!(
392            err.to_string().contains("symlink"),
393            "expected symlink rejection, got: {err}"
394        );
395    }
396
397    #[test]
398    fn test_prepare_checkpoint_dir_creates_images_subdir() {
399        let tmp = TempDir::new().unwrap();
400        let images = CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap();
401        assert_eq!(images, tmp.path().join("images"));
402        assert!(images.is_dir());
403
404        // Verify permissions are 0o700
405        let mode = fs::metadata(&images).unwrap().permissions().mode() & 0o777;
406        assert_eq!(mode, 0o700, "images dir should be mode 700, got {:o}", mode);
407    }
408
409    #[test]
410    fn test_prepare_checkpoint_dir_rejects_file_as_output_dir() {
411        let tmp = TempDir::new().unwrap();
412        let file_path = tmp.path().join("not-a-dir");
413        fs::write(&file_path, "").unwrap();
414
415        let err = CriuRuntime::prepare_checkpoint_dir(&file_path).unwrap_err();
416        assert!(
417            err.to_string().contains("not a directory"),
418            "expected 'not a directory' error, got: {err}"
419        );
420    }
421
422    #[test]
423    fn test_prepare_checkpoint_dir_rejects_symlinked_output_dir() {
424        let tmp = TempDir::new().unwrap();
425        let real_dir = tmp.path().join("real");
426        fs::create_dir(&real_dir).unwrap();
427        let link = tmp.path().join("link");
428        symlink(&real_dir, &link).unwrap();
429
430        let err = CriuRuntime::prepare_checkpoint_dir(&link).unwrap_err();
431        assert!(
432            err.to_string().contains("symlink"),
433            "expected symlink rejection, got: {err}"
434        );
435    }
436
437    #[test]
438    fn test_validate_binary_rejects_group_writable() {
439        let tmp = TempDir::new().unwrap();
440        let bin = tmp.path().join("criu");
441        fs::write(&bin, "#!/bin/sh\n").unwrap();
442        fs::set_permissions(&bin, fs::Permissions::from_mode(0o775)).unwrap();
443
444        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
445        assert!(
446            err.to_string().contains("writable by group/others"),
447            "expected group-writable rejection, got: {err}"
448        );
449    }
450
451    #[test]
452    fn test_validate_binary_rejects_world_writable() {
453        let tmp = TempDir::new().unwrap();
454        let bin = tmp.path().join("criu");
455        fs::write(&bin, "#!/bin/sh\n").unwrap();
456        fs::set_permissions(&bin, fs::Permissions::from_mode(0o757)).unwrap();
457
458        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
459        assert!(
460            err.to_string().contains("writable by group/others"),
461            "expected world-writable rejection, got: {err}"
462        );
463    }
464
465    #[test]
466    fn test_validate_binary_rejects_non_executable() {
467        let tmp = TempDir::new().unwrap();
468        let bin = tmp.path().join("criu");
469        fs::write(&bin, "#!/bin/sh\n").unwrap();
470        fs::set_permissions(&bin, fs::Permissions::from_mode(0o600)).unwrap();
471
472        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
473        assert!(
474            err.to_string().contains("not executable"),
475            "expected non-executable rejection, got: {err}"
476        );
477    }
478
479    #[test]
480    fn test_validate_binary_accepts_secure_binary() {
481        let tmp = TempDir::new().unwrap();
482        let bin = tmp.path().join("criu");
483        fs::write(&bin, "#!/bin/sh\n").unwrap();
484        fs::set_permissions(&bin, fs::Permissions::from_mode(0o755)).unwrap();
485
486        CriuRuntime::validate_binary(&bin).expect("should accept mode 0755");
487    }
488
489    #[test]
490    fn test_validate_binary_accepts_owner_only_executable() {
491        let tmp = TempDir::new().unwrap();
492        let bin = tmp.path().join("criu");
493        fs::write(&bin, "#!/bin/sh\n").unwrap();
494        fs::set_permissions(&bin, fs::Permissions::from_mode(0o700)).unwrap();
495
496        CriuRuntime::validate_binary(&bin).expect("should accept mode 0700");
497    }
498
499    #[test]
500    fn test_validate_binary_rejects_nonexistent() {
501        let tmp = TempDir::new().unwrap();
502        let bin = tmp.path().join("nonexistent");
503        assert!(CriuRuntime::validate_binary(&bin).is_err());
504    }
505
506    #[test]
507    fn test_checkpoint_state_transitions() {
508        use crate::checkpoint::state::CheckpointState;
509        use crate::error::StateTransition;
510
511        // Valid forward transitions
512        assert!(CheckpointState::None.can_transition_to(&CheckpointState::Dumping));
513        assert!(CheckpointState::Dumping.can_transition_to(&CheckpointState::Dumped));
514        assert!(CheckpointState::None.can_transition_to(&CheckpointState::Restoring));
515        assert!(CheckpointState::Restoring.can_transition_to(&CheckpointState::Restored));
516
517        // Valid abort transitions
518        assert!(CheckpointState::Dumping.can_transition_to(&CheckpointState::None));
519        assert!(CheckpointState::Restoring.can_transition_to(&CheckpointState::None));
520
521        // Invalid transitions
522        assert!(!CheckpointState::None.can_transition_to(&CheckpointState::Dumped));
523        assert!(!CheckpointState::None.can_transition_to(&CheckpointState::Restored));
524        assert!(!CheckpointState::Dumped.can_transition_to(&CheckpointState::Restoring));
525        assert!(!CheckpointState::Restored.can_transition_to(&CheckpointState::Dumping));
526    }
527
528    #[test]
529    fn test_prepare_checkpoint_dir_sets_secure_permissions() {
530        let tmp = TempDir::new().unwrap();
531        CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap();
532
533        // Both output dir and images subdir should be 0700
534        let output_mode = fs::metadata(tmp.path()).unwrap().permissions().mode() & 0o777;
535        let images_mode = fs::metadata(tmp.path().join("images"))
536            .unwrap()
537            .permissions()
538            .mode()
539            & 0o777;
540        assert_eq!(output_mode, 0o700);
541        assert_eq!(images_mode, 0o700);
542    }
543}