Skip to main content

nucleus/checkpoint/
criu.rs

1use crate::checkpoint::metadata::CheckpointMetadata;
2use crate::checkpoint::state::CheckpointState;
3use crate::container::ContainerState;
4use crate::error::{NucleusError, Result, StateTransition};
5use nix::unistd::Uid;
6use sha2::{Digest, Sha256};
7use std::fs;
8use std::fs::OpenOptions;
9use std::io::{Read, Write};
10use std::os::unix::fs::{MetadataExt, OpenOptionsExt, PermissionsExt};
11use std::path::{Path, PathBuf};
12use std::process::Command;
13use tempfile::Builder;
14use tracing::info;
15
16const CHECKPOINT_HMAC_FILE: &str = "checkpoint.hmac";
17const CHECKPOINT_HMAC_KEY_SIZE: usize = 32;
18
19/// CRIU runtime for checkpoint/restore
20///
21/// Follows the same pattern as GVisorRuntime: find binary, validate, invoke via Command.
22pub struct CriuRuntime {
23    binary_path: PathBuf,
24    state: CheckpointState,
25}
26
27impl CriuRuntime {
28    /// Create a new CRIU runtime, finding the criu binary
29    pub fn new() -> Result<Self> {
30        let binary_path = Self::find_binary()?;
31
32        // Validate binary works
33        let output = Command::new(&binary_path)
34            .arg("--version")
35            .output()
36            .map_err(|e| NucleusError::CheckpointError(format!("Failed to execute criu: {}", e)))?;
37
38        if !output.status.success() {
39            return Err(NucleusError::CheckpointError(
40                "criu --version failed".to_string(),
41            ));
42        }
43
44        let version = String::from_utf8_lossy(&output.stdout);
45        info!("Found CRIU: {}", version.trim());
46
47        Ok(Self {
48            binary_path,
49            state: CheckpointState::None,
50        })
51    }
52
53    /// Validate a binary path for safe execution.
54    ///
55    /// Checks permissions (not world/group-writable) and ownership (must be
56    /// owned by root or the effective UID) to prevent execution of tampered
57    /// binaries.
58    fn validate_binary(path: &Path) -> Result<()> {
59        use std::os::unix::fs::MetadataExt;
60
61        let metadata = fs::metadata(path).map_err(|e| {
62            NucleusError::CheckpointError(format!("Cannot stat criu binary {:?}: {}", path, e))
63        })?;
64        let mode = metadata.permissions().mode();
65        if mode & 0o022 != 0 {
66            return Err(NucleusError::CheckpointError(format!(
67                "criu binary {:?} is writable by group/others (mode {:o}), refusing to execute",
68                path, mode
69            )));
70        }
71        if mode & 0o111 == 0 {
72            return Err(NucleusError::CheckpointError(format!(
73                "criu binary {:?} is not executable",
74                path
75            )));
76        }
77        let owner_uid = metadata.uid();
78        let euid = nix::unistd::Uid::effective().as_raw();
79        if owner_uid != 0 && owner_uid != euid {
80            return Err(NucleusError::CheckpointError(format!(
81                "criu binary {:?} is owned by UID {} (expected root or euid {}), refusing to execute",
82                path, owner_uid, euid
83            )));
84        }
85        Ok(())
86    }
87
88    fn find_binary() -> Result<PathBuf> {
89        // Check common locations
90        for path in &["/usr/sbin/criu", "/usr/bin/criu", "/usr/local/sbin/criu"] {
91            let p = PathBuf::from(path);
92            if p.exists() {
93                Self::validate_binary(&p)?;
94                return Ok(p);
95            }
96        }
97
98        // For privileged execution, do not resolve runtime binaries via PATH.
99        // This avoids environment-based binary hijacking when running as root.
100        if Uid::effective().is_root() {
101            return Err(NucleusError::CheckpointError(
102                "CRIU binary not found in trusted system paths".to_string(),
103            ));
104        }
105
106        // Try PATH for unprivileged execution.
107        if let Some(path_var) = std::env::var_os("PATH") {
108            for dir in std::env::split_paths(&path_var) {
109                let candidate = dir.join("criu");
110                if candidate.exists() {
111                    Self::validate_binary(&candidate)?;
112                    return Ok(candidate);
113                }
114            }
115        }
116
117        Err(NucleusError::CheckpointError(
118            "CRIU binary not found. Install criu to use checkpoint/restore.".to_string(),
119        ))
120    }
121
122    /// Checkpoint a running container
123    ///
124    /// State transitions: None -> Dumping -> Dumped (or Dumping -> None on failure)
125    pub fn checkpoint(
126        &mut self,
127        state: &ContainerState,
128        output_dir: &Path,
129        leave_running: bool,
130    ) -> Result<()> {
131        // Requires root
132        if !nix::unistd::Uid::effective().is_root() {
133            return Err(NucleusError::CheckpointError(
134                "Checkpoint requires root (CRIU needs CAP_SYS_PTRACE)".to_string(),
135            ));
136        }
137
138        if !state.is_running() {
139            return Err(NucleusError::CheckpointError(format!(
140                "Container {} is not running",
141                state.id
142            )));
143        }
144
145        // State transition: None -> Dumping
146        self.state = self.state.transition(CheckpointState::Dumping)?;
147
148        let images_dir = Self::prepare_checkpoint_dir(output_dir)?;
149
150        // Run criu dump
151        let mut cmd = Command::new(&self.binary_path);
152        cmd.arg("dump")
153            .arg("--tree")
154            .arg(state.pid.to_string())
155            .arg("--images-dir")
156            .arg(&images_dir)
157            .arg("--shell-job");
158
159        if leave_running {
160            cmd.arg("--leave-running");
161        }
162
163        info!(
164            "Checkpointing container {} (PID {}) to {:?}",
165            state.id, state.pid, output_dir
166        );
167
168        let output = cmd.output().map_err(|e| {
169            // Abort: Dumping -> None
170            self.state = self
171                .state
172                .transition(CheckpointState::None)
173                .unwrap_or(self.state);
174            NucleusError::CheckpointError(format!("Failed to run criu dump: {}", e))
175        })?;
176
177        if !output.status.success() {
178            // Abort: Dumping -> None
179            self.state = self
180                .state
181                .transition(CheckpointState::None)
182                .unwrap_or(self.state);
183            let stderr = String::from_utf8_lossy(&output.stderr);
184            return Err(NucleusError::CheckpointError(format!(
185                "criu dump failed: {}",
186                stderr
187            )));
188        }
189
190        // Write metadata
191        let metadata = CheckpointMetadata::from_state(state)?;
192        metadata.save(output_dir)?;
193        Self::write_checkpoint_hmac(output_dir)?;
194
195        // State transition: Dumping -> Dumped
196        self.state = self.state.transition(CheckpointState::Dumped)?;
197
198        info!("Checkpoint complete: {:?}", output_dir);
199        Ok(())
200    }
201
202    /// Restore a container from checkpoint
203    ///
204    /// State transitions: None -> Restoring -> Restored (or Restoring -> None on failure)
205    pub fn restore(&mut self, input_dir: &Path) -> Result<u32> {
206        // Requires root
207        if !nix::unistd::Uid::effective().is_root() {
208            return Err(NucleusError::CheckpointError(
209                "Restore requires root (CRIU needs CAP_SYS_PTRACE)".to_string(),
210            ));
211        }
212
213        // Load and validate metadata
214        let metadata = CheckpointMetadata::load(input_dir)?;
215        info!(
216            "Restoring container {} from checkpoint (originally PID {})",
217            metadata.container_id, metadata.original_pid
218        );
219
220        let images_dir = input_dir.join("images");
221        if !images_dir.exists() {
222            return Err(NucleusError::CheckpointError(format!(
223                "Images directory not found: {:?}",
224                images_dir
225            )));
226        }
227
228        Self::verify_checkpoint_hmac(input_dir)?;
229
230        // State transition: None -> Restoring
231        self.state = self.state.transition(CheckpointState::Restoring)?;
232
233        // Capture the restored init PID explicitly.
234        let pidfile = Builder::new()
235            .prefix("nucleus-criu-restore-")
236            .tempfile()
237            .map_err(|e| {
238                NucleusError::CheckpointError(format!("Failed to create CRIU pidfile: {}", e))
239            })?;
240        let pidfile_path = pidfile.path().to_path_buf();
241
242        // Run criu restore
243        let output = Command::new(&self.binary_path)
244            .arg("restore")
245            .arg("--images-dir")
246            .arg(&images_dir)
247            .arg("--shell-job")
248            .arg("--pidfile")
249            .arg(&pidfile_path)
250            .output()
251            .map_err(|e| {
252                // Abort: Restoring -> None
253                self.state = self
254                    .state
255                    .transition(CheckpointState::None)
256                    .unwrap_or(self.state);
257                NucleusError::CheckpointError(format!("Failed to run criu restore: {}", e))
258            })?;
259
260        if !output.status.success() {
261            // Abort: Restoring -> None
262            self.state = self
263                .state
264                .transition(CheckpointState::None)
265                .unwrap_or(self.state);
266            let stderr = String::from_utf8_lossy(&output.stderr);
267            return Err(NucleusError::CheckpointError(format!(
268                "criu restore failed: {}",
269                stderr
270            )));
271        }
272
273        // State transition: Restoring -> Restored
274        self.state = self.state.transition(CheckpointState::Restored)?;
275
276        // Parse restored PID from pidfile, with output fallback for compatibility.
277        let pid_text = fs::read_to_string(&pidfile_path).unwrap_or_default();
278        if let Some(pid) = Self::parse_pidfile(&pid_text) {
279            info!("Restore complete, new PID: {}", pid);
280            return Ok(pid);
281        }
282
283        let stdout = String::from_utf8_lossy(&output.stdout);
284        if let Some(pid) = Self::parse_pid_text(&stdout) {
285            info!("Restore complete, new PID: {}", pid);
286            return Ok(pid);
287        }
288
289        let stderr = String::from_utf8_lossy(&output.stderr);
290        if let Some(pid) = Self::parse_pid_text(&stderr) {
291            info!("Restore complete, new PID: {}", pid);
292            return Ok(pid);
293        }
294
295        Err(NucleusError::CheckpointError(format!(
296            "Failed to parse restored PID from CRIU output (pidfile='{}', stdout='{}', stderr='{}')",
297            pid_text.trim(),
298            stdout.trim(),
299            stderr.trim()
300        )))
301    }
302
303    fn parse_pid_text(text: &str) -> Option<u32> {
304        text.split(|c: char| !c.is_ascii_digit())
305            .filter(|tok| !tok.is_empty())
306            .find_map(|tok| tok.parse::<u32>().ok())
307    }
308
309    fn parse_pidfile(text: &str) -> Option<u32> {
310        let trimmed = text.trim();
311        if trimmed.is_empty() || !trimmed.chars().all(|c| c.is_ascii_digit()) {
312            return None;
313        }
314        trimmed.parse::<u32>().ok()
315    }
316
317    fn prepare_checkpoint_dir(output_dir: &Path) -> Result<PathBuf> {
318        Self::ensure_secure_dir(output_dir, "checkpoint directory")?;
319        let images_dir = output_dir.join("images");
320        Self::ensure_secure_dir(&images_dir, "checkpoint images directory")?;
321        Ok(images_dir)
322    }
323
324    fn write_checkpoint_hmac(dir: &Path) -> Result<()> {
325        let key = Self::load_or_create_checkpoint_hmac_key()?;
326        let hmac_path = dir.join(CHECKPOINT_HMAC_FILE);
327        let tmp_path = dir.join(format!("{}.tmp", CHECKPOINT_HMAC_FILE));
328
329        match fs::symlink_metadata(&tmp_path) {
330            Ok(meta) if meta.file_type().is_symlink() => {
331                return Err(NucleusError::CheckpointError(format!(
332                    "Refusing symlink checkpoint HMAC temp file {:?}",
333                    tmp_path
334                )));
335            }
336            Ok(_) => {
337                fs::remove_file(&tmp_path).map_err(|e| {
338                    NucleusError::CheckpointError(format!(
339                        "Failed to remove stale checkpoint HMAC temp file {:?}: {}",
340                        tmp_path, e
341                    ))
342                })?;
343            }
344            Err(_) => {}
345        }
346
347        let digest = Self::compute_checkpoint_hmac(dir, &key)?;
348
349        let mut file = OpenOptions::new()
350            .create_new(true)
351            .write(true)
352            .mode(0o600)
353            .custom_flags(libc::O_NOFOLLOW)
354            .open(&tmp_path)
355            .map_err(|e| {
356                NucleusError::CheckpointError(format!(
357                    "Failed to open checkpoint HMAC temp file {:?}: {}",
358                    tmp_path, e
359                ))
360            })?;
361        file.write_all(digest.as_bytes()).map_err(|e| {
362            NucleusError::CheckpointError(format!(
363                "Failed to write checkpoint HMAC {:?}: {}",
364                tmp_path, e
365            ))
366        })?;
367        file.sync_all().map_err(|e| {
368            NucleusError::CheckpointError(format!(
369                "Failed to sync checkpoint HMAC {:?}: {}",
370                tmp_path, e
371            ))
372        })?;
373        fs::rename(&tmp_path, &hmac_path).map_err(|e| {
374            NucleusError::CheckpointError(format!(
375                "Failed to atomically replace checkpoint HMAC {:?}: {}",
376                hmac_path, e
377            ))
378        })?;
379
380        Ok(())
381    }
382
383    fn verify_checkpoint_hmac(dir: &Path) -> Result<()> {
384        let hmac_path = dir.join(CHECKPOINT_HMAC_FILE);
385        let expected = Self::read_file_nofollow_bytes(&hmac_path).map_err(|e| {
386            NucleusError::CheckpointError(format!(
387                "Failed to read checkpoint HMAC {:?}: {}",
388                hmac_path, e
389            ))
390        })?;
391        let expected = std::str::from_utf8(&expected)
392            .map_err(|e| {
393                NucleusError::CheckpointError(format!(
394                    "Checkpoint HMAC {:?} is not valid UTF-8: {}",
395                    hmac_path, e
396                ))
397            })?
398            .trim()
399            .to_string();
400        if expected.is_empty() {
401            return Err(NucleusError::CheckpointError(format!(
402                "Checkpoint HMAC {:?} is empty",
403                hmac_path
404            )));
405        }
406
407        info!("Verifying checkpoint HMAC integrity");
408        let key = Self::load_or_create_checkpoint_hmac_key()?;
409        let actual = Self::compute_checkpoint_hmac(dir, &key)?;
410        if actual != expected {
411            return Err(NucleusError::CheckpointError(format!(
412                "Checkpoint integrity verification failed: HMAC mismatch (expected {}, got {})",
413                expected, actual
414            )));
415        }
416
417        info!("Checkpoint integrity verified");
418        Ok(())
419    }
420
421    fn checkpoint_hmac_key_path() -> PathBuf {
422        if let Some(path) =
423            std::env::var_os("NUCLEUS_CHECKPOINT_HMAC_KEY_FILE").filter(|path| !path.is_empty())
424        {
425            return PathBuf::from(path);
426        }
427
428        if Uid::effective().is_root() {
429            PathBuf::from("/var/lib/nucleus/checkpoint-hmac.key")
430        } else {
431            dirs::data_local_dir()
432                .map(|dir| dir.join("nucleus/checkpoint-hmac.key"))
433                .or_else(|| dirs::home_dir().map(|dir| dir.join(".nucleus/checkpoint-hmac.key")))
434                .unwrap_or_else(|| PathBuf::from("/tmp/nucleus-checkpoint-hmac.key"))
435        }
436    }
437
438    fn load_or_create_checkpoint_hmac_key() -> Result<Vec<u8>> {
439        let key_path = Self::checkpoint_hmac_key_path();
440        let parent = key_path.parent().ok_or_else(|| {
441            NucleusError::CheckpointError(format!(
442                "Checkpoint HMAC key path {:?} has no parent directory",
443                key_path
444            ))
445        })?;
446        Self::ensure_secure_key_parent_dir(parent)?;
447        Self::reject_symlink_path(&key_path, "checkpoint HMAC key file")?;
448
449        if key_path.exists() {
450            let metadata = fs::metadata(&key_path).map_err(|e| {
451                NucleusError::CheckpointError(format!(
452                    "Failed to stat checkpoint HMAC key {:?}: {}",
453                    key_path, e
454                ))
455            })?;
456            let mode = metadata.permissions().mode() & 0o777;
457            let owner = metadata.uid();
458            let euid = Uid::effective().as_raw();
459            if owner != euid {
460                return Err(NucleusError::CheckpointError(format!(
461                    "Checkpoint HMAC key {:?} is owned by uid {} (expected {})",
462                    key_path, owner, euid
463                )));
464            }
465            if mode & 0o077 != 0 {
466                return Err(NucleusError::CheckpointError(format!(
467                    "Checkpoint HMAC key {:?} has insecure mode {:o}; expected owner-only access",
468                    key_path, mode
469                )));
470            }
471            let key = Self::read_file_nofollow_bytes(&key_path).map_err(|e| {
472                NucleusError::CheckpointError(format!(
473                    "Failed to read checkpoint HMAC key {:?}: {}",
474                    key_path, e
475                ))
476            })?;
477            if key.len() < CHECKPOINT_HMAC_KEY_SIZE {
478                return Err(NucleusError::CheckpointError(format!(
479                    "Checkpoint HMAC key {:?} is too short ({} bytes)",
480                    key_path,
481                    key.len()
482                )));
483            }
484            return Ok(key);
485        }
486
487        let mut key = vec![0u8; CHECKPOINT_HMAC_KEY_SIZE];
488        Self::fill_secure_random(&mut key)?;
489        let mut file = OpenOptions::new()
490            .create_new(true)
491            .write(true)
492            .mode(0o600)
493            .custom_flags(libc::O_NOFOLLOW)
494            .open(&key_path)
495            .map_err(|e| {
496                NucleusError::CheckpointError(format!(
497                    "Failed to create checkpoint HMAC key {:?}: {}",
498                    key_path, e
499                ))
500            })?;
501        file.write_all(&key).map_err(|e| {
502            NucleusError::CheckpointError(format!(
503                "Failed to write checkpoint HMAC key {:?}: {}",
504                key_path, e
505            ))
506        })?;
507        file.sync_all().map_err(|e| {
508            NucleusError::CheckpointError(format!(
509                "Failed to sync checkpoint HMAC key {:?}: {}",
510                key_path, e
511            ))
512        })?;
513        Ok(key)
514    }
515
516    fn ensure_secure_key_parent_dir(path: &Path) -> Result<()> {
517        Self::reject_symlink_path(path, "checkpoint HMAC key directory")?;
518
519        if path.exists() {
520            let metadata = fs::metadata(path).map_err(|e| {
521                NucleusError::CheckpointError(format!(
522                    "Failed to stat checkpoint HMAC key directory {:?}: {}",
523                    path, e
524                ))
525            })?;
526            if !metadata.is_dir() {
527                return Err(NucleusError::CheckpointError(format!(
528                    "Checkpoint HMAC key directory {:?} is not a directory",
529                    path
530                )));
531            }
532            let mode = metadata.permissions().mode() & 0o777;
533            let owner = metadata.uid();
534            let euid = Uid::effective().as_raw();
535            if owner != euid {
536                return Err(NucleusError::CheckpointError(format!(
537                    "Checkpoint HMAC key directory {:?} is owned by uid {} (expected {})",
538                    path, owner, euid
539                )));
540            }
541            if mode & 0o077 != 0 {
542                return Err(NucleusError::CheckpointError(format!(
543                    "Checkpoint HMAC key directory {:?} has insecure mode {:o}; expected owner-only access",
544                    path, mode
545                )));
546            }
547            return Ok(());
548        }
549
550        fs::create_dir_all(path).map_err(|e| {
551            NucleusError::CheckpointError(format!(
552                "Failed to create checkpoint HMAC key directory {:?}: {}",
553                path, e
554            ))
555        })?;
556        fs::set_permissions(path, fs::Permissions::from_mode(0o700)).map_err(|e| {
557            NucleusError::CheckpointError(format!(
558                "Failed to secure checkpoint HMAC key directory {:?}: {}",
559                path, e
560            ))
561        })?;
562        Ok(())
563    }
564
565    fn fill_secure_random(buf: &mut [u8]) -> Result<()> {
566        let file = OpenOptions::new()
567            .read(true)
568            .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
569            .open("/dev/urandom")
570            .map_err(|e| {
571                NucleusError::CheckpointError(format!(
572                    "Failed to open /dev/urandom for checkpoint HMAC key generation: {}",
573                    e
574                ))
575            })?;
576        let metadata = file.metadata().map_err(|e| {
577            NucleusError::CheckpointError(format!("Failed to stat /dev/urandom: {}", e))
578        })?;
579        use std::os::unix::fs::FileTypeExt;
580        if !metadata.file_type().is_char_device() {
581            return Err(NucleusError::CheckpointError(
582                "/dev/urandom is not a character device".to_string(),
583            ));
584        }
585        let mut file = file;
586        file.read_exact(buf).map_err(|e| {
587            NucleusError::CheckpointError(format!(
588                "Failed to read /dev/urandom for checkpoint HMAC key generation: {}",
589                e
590            ))
591        })
592    }
593
594    fn read_file_nofollow_bytes(path: &Path) -> std::io::Result<Vec<u8>> {
595        let mut file = OpenOptions::new()
596            .read(true)
597            .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
598            .open(path)?;
599        let mut content = Vec::new();
600        file.read_to_end(&mut content)?;
601        Ok(content)
602    }
603
604    fn compute_checkpoint_hmac(dir: &Path, key: &[u8]) -> Result<String> {
605        let mut key_block = [0u8; 64];
606        if key.len() > key_block.len() {
607            let digest = Sha256::digest(key);
608            key_block[..digest.len()].copy_from_slice(&digest);
609        } else {
610            key_block[..key.len()].copy_from_slice(key);
611        }
612
613        let mut ipad = [0x36u8; 64];
614        let mut opad = [0x5cu8; 64];
615        for (dst, src) in ipad.iter_mut().zip(key_block.iter()) {
616            *dst ^= *src;
617        }
618        for (dst, src) in opad.iter_mut().zip(key_block.iter()) {
619            *dst ^= *src;
620        }
621
622        let mut inner = Sha256::new();
623        inner.update(ipad);
624        Self::update_checkpoint_hmac_inner(&mut inner, dir, dir)?;
625        let inner_hash = inner.finalize();
626
627        let mut outer = Sha256::new();
628        outer.update(opad);
629        outer.update(inner_hash);
630        Ok(hex::encode(outer.finalize()))
631    }
632
633    fn update_checkpoint_hmac_inner(hasher: &mut Sha256, root: &Path, dir: &Path) -> Result<()> {
634        let mut entries = Vec::new();
635        for entry in fs::read_dir(dir).map_err(|e| {
636            NucleusError::CheckpointError(format!(
637                "Failed to read checkpoint directory {:?}: {}",
638                dir, e
639            ))
640        })? {
641            let entry = entry.map_err(|e| {
642                NucleusError::CheckpointError(format!(
643                    "Failed to read checkpoint entry in {:?}: {}",
644                    dir, e
645                ))
646            })?;
647            entries.push(entry.path());
648        }
649        entries.sort();
650
651        for path in entries {
652            let relative = path.strip_prefix(root).map_err(|e| {
653                NucleusError::CheckpointError(format!(
654                    "Failed to compute checkpoint-relative path for {:?}: {}",
655                    path, e
656                ))
657            })?;
658            if relative == Path::new(CHECKPOINT_HMAC_FILE) {
659                continue;
660            }
661
662            let metadata = fs::symlink_metadata(&path).map_err(|e| {
663                NucleusError::CheckpointError(format!(
664                    "Failed to stat checkpoint path {:?}: {}",
665                    path, e
666                ))
667            })?;
668            if metadata.file_type().is_symlink() {
669                return Err(NucleusError::CheckpointError(format!(
670                    "Checkpoint integrity scan refuses symlink path {:?}",
671                    path
672                )));
673            }
674
675            let relative = relative.to_str().ok_or_else(|| {
676                NucleusError::CheckpointError(format!(
677                    "Checkpoint path {:?} is not valid UTF-8",
678                    relative
679                ))
680            })?;
681
682            if metadata.is_dir() {
683                hasher.update(b"D\0");
684                hasher.update(relative.as_bytes());
685                hasher.update(b"\0");
686                Self::update_checkpoint_hmac_inner(hasher, root, &path)?;
687            } else if metadata.is_file() {
688                hasher.update(b"F\0");
689                hasher.update(relative.as_bytes());
690                hasher.update(b"\0");
691                hasher.update(metadata.len().to_le_bytes());
692                let mut file = OpenOptions::new()
693                    .read(true)
694                    .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
695                    .open(&path)
696                    .map_err(|e| {
697                        NucleusError::CheckpointError(format!(
698                            "Failed to open checkpoint file {:?}: {}",
699                            path, e
700                        ))
701                    })?;
702                let mut buf = [0u8; 8192];
703                loop {
704                    let read = file.read(&mut buf).map_err(|e| {
705                        NucleusError::CheckpointError(format!(
706                            "Failed to read checkpoint file {:?}: {}",
707                            path, e
708                        ))
709                    })?;
710                    if read == 0 {
711                        break;
712                    }
713                    hasher.update(&buf[..read]);
714                }
715            } else {
716                return Err(NucleusError::CheckpointError(format!(
717                    "Checkpoint integrity scan rejects special file {:?}",
718                    path
719                )));
720            }
721        }
722
723        Ok(())
724    }
725
726    fn ensure_secure_dir(path: &Path, label: &str) -> Result<()> {
727        Self::reject_symlink_path(path, label)?;
728
729        if path.exists() {
730            if !path.is_dir() {
731                return Err(NucleusError::CheckpointError(format!(
732                    "{} {:?} is not a directory",
733                    label, path
734                )));
735            }
736        } else {
737            fs::create_dir_all(path).map_err(|e| {
738                NucleusError::CheckpointError(format!(
739                    "Failed to create {} {:?}: {}",
740                    label, path, e
741                ))
742            })?;
743        }
744
745        Self::reject_symlink_path(path, label)?;
746        fs::set_permissions(path, fs::Permissions::from_mode(0o700)).map_err(|e| {
747            NucleusError::CheckpointError(format!(
748                "Failed to set {} permissions {:?}: {}",
749                label, path, e
750            ))
751        })?;
752
753        Ok(())
754    }
755
756    fn reject_symlink_path(path: &Path, label: &str) -> Result<()> {
757        match fs::symlink_metadata(path) {
758            Ok(metadata) if metadata.file_type().is_symlink() => Err(
759                NucleusError::CheckpointError(format!("Refusing symlink {} {:?}", label, path)),
760            ),
761            Ok(_) | Err(_) => Ok(()),
762        }
763    }
764}
765
766#[cfg(test)]
767mod tests {
768    use super::CriuRuntime;
769    use std::ffi::OsString;
770    use std::fs;
771    use std::os::unix::fs::{symlink, PermissionsExt};
772    use std::path::{Path, PathBuf};
773    use std::sync::{Mutex, OnceLock};
774    use tempfile::TempDir;
775
776    fn checkpoint_key_env_lock() -> &'static Mutex<()> {
777        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
778        LOCK.get_or_init(|| Mutex::new(()))
779    }
780
781    struct CheckpointKeyEnvGuard {
782        previous: Option<OsString>,
783    }
784
785    impl CheckpointKeyEnvGuard {
786        fn set(path: &Path) -> Self {
787            let previous = std::env::var_os("NUCLEUS_CHECKPOINT_HMAC_KEY_FILE");
788            std::env::set_var("NUCLEUS_CHECKPOINT_HMAC_KEY_FILE", path);
789            Self { previous }
790        }
791    }
792
793    impl Drop for CheckpointKeyEnvGuard {
794        fn drop(&mut self) {
795            match &self.previous {
796                Some(value) => std::env::set_var("NUCLEUS_CHECKPOINT_HMAC_KEY_FILE", value),
797                None => std::env::remove_var("NUCLEUS_CHECKPOINT_HMAC_KEY_FILE"),
798            }
799        }
800    }
801
802    fn prepare_secure_checkpoint_key_dir(tmp: &TempDir) -> PathBuf {
803        let key_dir = tmp.path().join("keys");
804        fs::create_dir(&key_dir).unwrap();
805        fs::set_permissions(&key_dir, fs::Permissions::from_mode(0o700)).unwrap();
806        key_dir
807    }
808
809    #[test]
810    fn test_parse_pid_text_plain() {
811        assert_eq!(CriuRuntime::parse_pid_text("1234\n"), Some(1234));
812    }
813
814    #[test]
815    fn test_parse_pid_text_embedded() {
816        assert_eq!(
817            CriuRuntime::parse_pid_text("restored successfully pid=5678"),
818            Some(5678)
819        );
820    }
821
822    #[test]
823    fn test_parse_pid_text_missing() {
824        assert_eq!(CriuRuntime::parse_pid_text("no pid here"), None);
825    }
826
827    #[test]
828    fn test_parse_pidfile_strict() {
829        // BUG-22: parse_pid_text must prefer strict pidfile parsing
830        // A pidfile should contain just a number, not extract first number from error messages
831        assert_eq!(CriuRuntime::parse_pidfile("1234\n"), Some(1234));
832        assert_eq!(CriuRuntime::parse_pidfile("  5678  \n"), Some(5678));
833        // Error messages should NOT parse as PIDs
834        assert_eq!(CriuRuntime::parse_pidfile("Error code: 255 (EPERM)"), None);
835        assert_eq!(
836            CriuRuntime::parse_pidfile("restored successfully pid=5678"),
837            None
838        );
839        assert_eq!(CriuRuntime::parse_pidfile(""), None);
840        assert_eq!(CriuRuntime::parse_pidfile("no pid here"), None);
841    }
842
843    #[test]
844    fn test_prepare_checkpoint_dir_rejects_symlinked_images_dir() {
845        let tmp = TempDir::new().unwrap();
846        let target = tmp.path().join("target");
847        fs::create_dir(&target).unwrap();
848        let images = tmp.path().join("images");
849        symlink(&target, &images).unwrap();
850
851        let err = CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap_err();
852        assert!(
853            err.to_string().contains("symlink"),
854            "expected symlink rejection, got: {err}"
855        );
856    }
857
858    #[test]
859    fn test_prepare_checkpoint_dir_creates_images_subdir() {
860        let tmp = TempDir::new().unwrap();
861        let images = CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap();
862        assert_eq!(images, tmp.path().join("images"));
863        assert!(images.is_dir());
864
865        // Verify permissions are 0o700
866        let mode = fs::metadata(&images).unwrap().permissions().mode() & 0o777;
867        assert_eq!(mode, 0o700, "images dir should be mode 700, got {:o}", mode);
868    }
869
870    #[test]
871    fn test_prepare_checkpoint_dir_rejects_file_as_output_dir() {
872        let tmp = TempDir::new().unwrap();
873        let file_path = tmp.path().join("not-a-dir");
874        fs::write(&file_path, "").unwrap();
875
876        let err = CriuRuntime::prepare_checkpoint_dir(&file_path).unwrap_err();
877        assert!(
878            err.to_string().contains("not a directory"),
879            "expected 'not a directory' error, got: {err}"
880        );
881    }
882
883    #[test]
884    fn test_prepare_checkpoint_dir_rejects_symlinked_output_dir() {
885        let tmp = TempDir::new().unwrap();
886        let real_dir = tmp.path().join("real");
887        fs::create_dir(&real_dir).unwrap();
888        let link = tmp.path().join("link");
889        symlink(&real_dir, &link).unwrap();
890
891        let err = CriuRuntime::prepare_checkpoint_dir(&link).unwrap_err();
892        assert!(
893            err.to_string().contains("symlink"),
894            "expected symlink rejection, got: {err}"
895        );
896    }
897
898    #[test]
899    fn test_validate_binary_rejects_group_writable() {
900        let tmp = TempDir::new().unwrap();
901        let bin = tmp.path().join("criu");
902        fs::write(&bin, "#!/bin/sh\n").unwrap();
903        fs::set_permissions(&bin, fs::Permissions::from_mode(0o775)).unwrap();
904
905        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
906        assert!(
907            err.to_string().contains("writable by group/others"),
908            "expected group-writable rejection, got: {err}"
909        );
910    }
911
912    #[test]
913    fn test_validate_binary_rejects_world_writable() {
914        let tmp = TempDir::new().unwrap();
915        let bin = tmp.path().join("criu");
916        fs::write(&bin, "#!/bin/sh\n").unwrap();
917        fs::set_permissions(&bin, fs::Permissions::from_mode(0o757)).unwrap();
918
919        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
920        assert!(
921            err.to_string().contains("writable by group/others"),
922            "expected world-writable rejection, got: {err}"
923        );
924    }
925
926    #[test]
927    fn test_validate_binary_rejects_non_executable() {
928        let tmp = TempDir::new().unwrap();
929        let bin = tmp.path().join("criu");
930        fs::write(&bin, "#!/bin/sh\n").unwrap();
931        fs::set_permissions(&bin, fs::Permissions::from_mode(0o600)).unwrap();
932
933        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
934        assert!(
935            err.to_string().contains("not executable"),
936            "expected non-executable rejection, got: {err}"
937        );
938    }
939
940    #[test]
941    fn test_validate_binary_accepts_secure_binary() {
942        let tmp = TempDir::new().unwrap();
943        let bin = tmp.path().join("criu");
944        fs::write(&bin, "#!/bin/sh\n").unwrap();
945        fs::set_permissions(&bin, fs::Permissions::from_mode(0o755)).unwrap();
946
947        CriuRuntime::validate_binary(&bin).expect("should accept mode 0755");
948    }
949
950    #[test]
951    fn test_validate_binary_accepts_owner_only_executable() {
952        let tmp = TempDir::new().unwrap();
953        let bin = tmp.path().join("criu");
954        fs::write(&bin, "#!/bin/sh\n").unwrap();
955        fs::set_permissions(&bin, fs::Permissions::from_mode(0o700)).unwrap();
956
957        CriuRuntime::validate_binary(&bin).expect("should accept mode 0700");
958    }
959
960    #[test]
961    fn test_validate_binary_rejects_nonexistent() {
962        let tmp = TempDir::new().unwrap();
963        let bin = tmp.path().join("nonexistent");
964        assert!(CriuRuntime::validate_binary(&bin).is_err());
965    }
966
967    #[test]
968    fn test_checkpoint_state_transitions() {
969        use crate::checkpoint::state::CheckpointState;
970        use crate::error::StateTransition;
971
972        // Valid forward transitions
973        assert!(CheckpointState::None.can_transition_to(&CheckpointState::Dumping));
974        assert!(CheckpointState::Dumping.can_transition_to(&CheckpointState::Dumped));
975        assert!(CheckpointState::None.can_transition_to(&CheckpointState::Restoring));
976        assert!(CheckpointState::Restoring.can_transition_to(&CheckpointState::Restored));
977
978        // Valid abort transitions
979        assert!(CheckpointState::Dumping.can_transition_to(&CheckpointState::None));
980        assert!(CheckpointState::Restoring.can_transition_to(&CheckpointState::None));
981
982        // Invalid transitions
983        assert!(!CheckpointState::None.can_transition_to(&CheckpointState::Dumped));
984        assert!(!CheckpointState::None.can_transition_to(&CheckpointState::Restored));
985        assert!(!CheckpointState::Dumped.can_transition_to(&CheckpointState::Restoring));
986        assert!(!CheckpointState::Restored.can_transition_to(&CheckpointState::Dumping));
987    }
988
989    #[test]
990    fn test_prepare_checkpoint_dir_sets_secure_permissions() {
991        let tmp = TempDir::new().unwrap();
992        CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap();
993
994        // Both output dir and images subdir should be 0700
995        let output_mode = fs::metadata(tmp.path()).unwrap().permissions().mode() & 0o777;
996        let images_mode = fs::metadata(tmp.path().join("images"))
997            .unwrap()
998            .permissions()
999            .mode()
1000            & 0o777;
1001        assert_eq!(output_mode, 0o700);
1002        assert_eq!(images_mode, 0o700);
1003    }
1004
1005    #[test]
1006    fn test_checkpoint_hmac_detects_tampering_in_images() {
1007        let _guard = checkpoint_key_env_lock()
1008            .lock()
1009            .unwrap_or_else(|poisoned| poisoned.into_inner());
1010        let tmp = TempDir::new().unwrap();
1011        let key_dir = prepare_secure_checkpoint_key_dir(&tmp);
1012        let key_path = key_dir.join("checkpoint.key");
1013        let _env = CheckpointKeyEnvGuard::set(&key_path);
1014
1015        let checkpoint_dir = tmp.path().join("checkpoint");
1016        fs::create_dir(&checkpoint_dir).unwrap();
1017        fs::create_dir(checkpoint_dir.join("images")).unwrap();
1018        fs::write(checkpoint_dir.join("metadata.json"), "{\"id\":\"abc\"}").unwrap();
1019        fs::write(
1020            checkpoint_dir.join("images").join("pages-1.img"),
1021            b"snapshot",
1022        )
1023        .unwrap();
1024
1025        CriuRuntime::write_checkpoint_hmac(&checkpoint_dir).unwrap();
1026        CriuRuntime::verify_checkpoint_hmac(&checkpoint_dir).unwrap();
1027
1028        fs::write(
1029            checkpoint_dir.join("images").join("pages-1.img"),
1030            b"tampered",
1031        )
1032        .unwrap();
1033        let err = CriuRuntime::verify_checkpoint_hmac(&checkpoint_dir).unwrap_err();
1034        assert!(err.to_string().contains("HMAC mismatch"));
1035    }
1036
1037    #[test]
1038    fn test_checkpoint_hmac_rejects_symlinks_in_checkpoint_tree() {
1039        let _guard = checkpoint_key_env_lock()
1040            .lock()
1041            .unwrap_or_else(|poisoned| poisoned.into_inner());
1042        let tmp = TempDir::new().unwrap();
1043        let key_dir = prepare_secure_checkpoint_key_dir(&tmp);
1044        let key_path = key_dir.join("checkpoint.key");
1045        let _env = CheckpointKeyEnvGuard::set(&key_path);
1046
1047        let checkpoint_dir = tmp.path().join("checkpoint");
1048        fs::create_dir(&checkpoint_dir).unwrap();
1049        fs::create_dir(checkpoint_dir.join("images")).unwrap();
1050        fs::write(checkpoint_dir.join("metadata.json"), "{\"id\":\"abc\"}").unwrap();
1051        symlink(
1052            checkpoint_dir.join("metadata.json"),
1053            checkpoint_dir.join("images").join("metadata-link"),
1054        )
1055        .unwrap();
1056
1057        let err = CriuRuntime::write_checkpoint_hmac(&checkpoint_dir).unwrap_err();
1058        assert!(err.to_string().contains("refuses symlink"));
1059    }
1060}