Skip to main content

nucleus/checkpoint/
criu.rs

1use crate::checkpoint::metadata::CheckpointMetadata;
2use crate::checkpoint::state::CheckpointState;
3use crate::container::ContainerState;
4use crate::error::{NucleusError, Result, StateTransition};
5use nix::unistd::Uid;
6use sha2::{Digest, Sha256};
7use std::fs;
8use std::fs::OpenOptions;
9use std::io::{Read, Write};
10use std::os::unix::fs::{MetadataExt, OpenOptionsExt, PermissionsExt};
11use std::path::{Path, PathBuf};
12use std::process::Command;
13use tempfile::Builder;
14use tracing::info;
15
16const CHECKPOINT_HMAC_FILE: &str = "checkpoint.hmac";
17const CHECKPOINT_HMAC_KEY_SIZE: usize = 32;
18
19/// CRIU runtime for checkpoint/restore
20///
21/// Follows the same pattern as GVisorRuntime: find binary, validate, invoke via Command.
22pub struct CriuRuntime {
23    binary_path: PathBuf,
24    state: CheckpointState,
25}
26
27impl CriuRuntime {
28    /// Create a new CRIU runtime, finding the criu binary
29    pub fn new() -> Result<Self> {
30        let binary_path = Self::find_binary()?;
31
32        // Validate binary works
33        let output = Command::new(&binary_path)
34            .arg("--version")
35            .output()
36            .map_err(|e| NucleusError::CheckpointError(format!("Failed to execute criu: {}", e)))?;
37
38        if !output.status.success() {
39            return Err(NucleusError::CheckpointError(
40                "criu --version failed".to_string(),
41            ));
42        }
43
44        let version = String::from_utf8_lossy(&output.stdout);
45        info!("Found CRIU: {}", version.trim());
46
47        Ok(Self {
48            binary_path,
49            state: CheckpointState::None,
50        })
51    }
52
53    /// Validate a binary path for safe execution.
54    ///
55    /// Checks permissions (not world/group-writable) and ownership (must be
56    /// owned by root or the effective UID) to prevent execution of tampered
57    /// binaries.
58    fn validate_binary(path: &Path) -> Result<PathBuf> {
59        use std::os::unix::fs::MetadataExt;
60
61        let resolved = fs::canonicalize(path).map_err(|e| {
62            NucleusError::CheckpointError(format!(
63                "Cannot canonicalize criu binary {:?}: {}",
64                path, e
65            ))
66        })?;
67        let metadata = fs::metadata(&resolved).map_err(|e| {
68            NucleusError::CheckpointError(format!("Cannot stat criu binary {:?}: {}", resolved, e))
69        })?;
70        let mode = metadata.permissions().mode();
71        if mode & 0o022 != 0 {
72            return Err(NucleusError::CheckpointError(format!(
73                "criu binary {:?} is writable by group/others (mode {:o}), refusing to execute",
74                resolved, mode
75            )));
76        }
77        if mode & 0o111 == 0 {
78            return Err(NucleusError::CheckpointError(format!(
79                "criu binary {:?} is not executable",
80                resolved
81            )));
82        }
83        let owner_uid = metadata.uid();
84        let euid = nix::unistd::Uid::effective().as_raw();
85        if owner_uid != 0 && owner_uid != euid {
86            return Err(NucleusError::CheckpointError(format!(
87                "criu binary {:?} is owned by UID {} (expected root or euid {}), refusing to execute",
88                resolved, owner_uid, euid
89            )));
90        }
91        Ok(resolved)
92    }
93
94    fn find_binary() -> Result<PathBuf> {
95        // Check common locations
96        for path in &["/usr/sbin/criu", "/usr/bin/criu", "/usr/local/sbin/criu"] {
97            let p = PathBuf::from(path);
98            if p.exists() {
99                return Self::validate_binary(&p);
100            }
101        }
102
103        // For privileged execution, do not resolve runtime binaries via PATH.
104        // This avoids environment-based binary hijacking when running as root.
105        if Uid::effective().is_root() {
106            return Err(NucleusError::CheckpointError(
107                "CRIU binary not found in trusted system paths".to_string(),
108            ));
109        }
110
111        // Try PATH for unprivileged execution.
112        if let Some(path_var) = std::env::var_os("PATH") {
113            for dir in std::env::split_paths(&path_var) {
114                let candidate = dir.join("criu");
115                if candidate.exists() {
116                    return Self::validate_binary(&candidate);
117                }
118            }
119        }
120
121        Err(NucleusError::CheckpointError(
122            "CRIU binary not found. Install criu to use checkpoint/restore.".to_string(),
123        ))
124    }
125
126    /// Checkpoint a running container
127    ///
128    /// State transitions: None -> Dumping -> Dumped (or Dumping -> None on failure)
129    pub fn checkpoint(
130        &mut self,
131        state: &ContainerState,
132        output_dir: &Path,
133        leave_running: bool,
134    ) -> Result<()> {
135        // Requires root
136        if !nix::unistd::Uid::effective().is_root() {
137            return Err(NucleusError::CheckpointError(
138                "Checkpoint requires root (CRIU needs CAP_SYS_PTRACE)".to_string(),
139            ));
140        }
141
142        if !state.is_running() {
143            return Err(NucleusError::CheckpointError(format!(
144                "Container {} is not running",
145                state.id
146            )));
147        }
148
149        // State transition: None -> Dumping
150        self.state = self.state.transition(CheckpointState::Dumping)?;
151
152        let images_dir = Self::prepare_checkpoint_dir(output_dir)?;
153
154        // Run criu dump
155        let mut cmd = Command::new(&self.binary_path);
156        cmd.arg("dump")
157            .arg("--tree")
158            .arg(state.pid.to_string())
159            .arg("--images-dir")
160            .arg(&images_dir)
161            .arg("--shell-job");
162
163        if leave_running {
164            cmd.arg("--leave-running");
165        }
166
167        info!(
168            "Checkpointing container {} (PID {}) to {:?}",
169            state.id, state.pid, output_dir
170        );
171
172        let output = cmd.output().map_err(|e| {
173            // Abort: Dumping -> None
174            self.state = self
175                .state
176                .transition(CheckpointState::None)
177                .unwrap_or(self.state);
178            NucleusError::CheckpointError(format!("Failed to run criu dump: {}", e))
179        })?;
180
181        if !output.status.success() {
182            // Abort: Dumping -> None
183            self.state = self
184                .state
185                .transition(CheckpointState::None)
186                .unwrap_or(self.state);
187            let stderr = String::from_utf8_lossy(&output.stderr);
188            return Err(NucleusError::CheckpointError(format!(
189                "criu dump failed: {}",
190                stderr
191            )));
192        }
193
194        // Write metadata
195        let metadata = CheckpointMetadata::from_state(state)?;
196        metadata.save(output_dir)?;
197        Self::write_checkpoint_hmac(output_dir)?;
198
199        // State transition: Dumping -> Dumped
200        self.state = self.state.transition(CheckpointState::Dumped)?;
201
202        info!("Checkpoint complete: {:?}", output_dir);
203        Ok(())
204    }
205
206    /// Restore a container from checkpoint
207    ///
208    /// State transitions: None -> Restoring -> Restored (or Restoring -> None on failure)
209    pub fn restore(&mut self, input_dir: &Path) -> Result<u32> {
210        // Requires root
211        if !nix::unistd::Uid::effective().is_root() {
212            return Err(NucleusError::CheckpointError(
213                "Restore requires root (CRIU needs CAP_SYS_PTRACE)".to_string(),
214            ));
215        }
216
217        // Load and validate metadata
218        let metadata = CheckpointMetadata::load(input_dir)?;
219        info!(
220            "Restoring container {} from checkpoint (originally PID {})",
221            metadata.container_id, metadata.original_pid
222        );
223
224        let images_dir = input_dir.join("images");
225        if !images_dir.exists() {
226            return Err(NucleusError::CheckpointError(format!(
227                "Images directory not found: {:?}",
228                images_dir
229            )));
230        }
231
232        Self::verify_checkpoint_hmac(input_dir)?;
233
234        // State transition: None -> Restoring
235        self.state = self.state.transition(CheckpointState::Restoring)?;
236
237        // Capture the restored init PID explicitly.
238        let pidfile = Builder::new()
239            .prefix("nucleus-criu-restore-")
240            .tempfile()
241            .map_err(|e| {
242                NucleusError::CheckpointError(format!("Failed to create CRIU pidfile: {}", e))
243            })?;
244        let pidfile_path = pidfile.path().to_path_buf();
245
246        // Run criu restore
247        let output = Command::new(&self.binary_path)
248            .arg("restore")
249            .arg("--images-dir")
250            .arg(&images_dir)
251            .arg("--shell-job")
252            .arg("--pidfile")
253            .arg(&pidfile_path)
254            .output()
255            .map_err(|e| {
256                // Abort: Restoring -> None
257                self.state = self
258                    .state
259                    .transition(CheckpointState::None)
260                    .unwrap_or(self.state);
261                NucleusError::CheckpointError(format!("Failed to run criu restore: {}", e))
262            })?;
263
264        if !output.status.success() {
265            // Abort: Restoring -> None
266            self.state = self
267                .state
268                .transition(CheckpointState::None)
269                .unwrap_or(self.state);
270            let stderr = String::from_utf8_lossy(&output.stderr);
271            return Err(NucleusError::CheckpointError(format!(
272                "criu restore failed: {}",
273                stderr
274            )));
275        }
276
277        // State transition: Restoring -> Restored
278        self.state = self.state.transition(CheckpointState::Restored)?;
279
280        // Parse restored PID from pidfile, with output fallback for compatibility.
281        let pid_text = fs::read_to_string(&pidfile_path).unwrap_or_default();
282        if let Some(pid) = Self::parse_pidfile(&pid_text) {
283            info!("Restore complete, new PID: {}", pid);
284            return Ok(pid);
285        }
286
287        let stdout = String::from_utf8_lossy(&output.stdout);
288        if let Some(pid) = Self::parse_pid_text(&stdout) {
289            info!("Restore complete, new PID: {}", pid);
290            return Ok(pid);
291        }
292
293        let stderr = String::from_utf8_lossy(&output.stderr);
294        if let Some(pid) = Self::parse_pid_text(&stderr) {
295            info!("Restore complete, new PID: {}", pid);
296            return Ok(pid);
297        }
298
299        Err(NucleusError::CheckpointError(format!(
300            "Failed to parse restored PID from CRIU output (pidfile='{}', stdout='{}', stderr='{}')",
301            pid_text.trim(),
302            stdout.trim(),
303            stderr.trim()
304        )))
305    }
306
307    fn parse_pid_text(text: &str) -> Option<u32> {
308        text.split(|c: char| !c.is_ascii_digit())
309            .filter(|tok| !tok.is_empty())
310            .find_map(|tok| tok.parse::<u32>().ok())
311    }
312
313    fn parse_pidfile(text: &str) -> Option<u32> {
314        let trimmed = text.trim();
315        if trimmed.is_empty() || !trimmed.chars().all(|c| c.is_ascii_digit()) {
316            return None;
317        }
318        trimmed.parse::<u32>().ok()
319    }
320
321    fn prepare_checkpoint_dir(output_dir: &Path) -> Result<PathBuf> {
322        Self::ensure_secure_dir(output_dir, "checkpoint directory")?;
323        let images_dir = output_dir.join("images");
324        Self::ensure_secure_dir(&images_dir, "checkpoint images directory")?;
325        Ok(images_dir)
326    }
327
328    fn write_checkpoint_hmac(dir: &Path) -> Result<()> {
329        let key = Self::load_or_create_checkpoint_hmac_key()?;
330        let hmac_path = dir.join(CHECKPOINT_HMAC_FILE);
331        let tmp_path = dir.join(format!("{}.tmp", CHECKPOINT_HMAC_FILE));
332
333        match fs::symlink_metadata(&tmp_path) {
334            Ok(meta) if meta.file_type().is_symlink() => {
335                return Err(NucleusError::CheckpointError(format!(
336                    "Refusing symlink checkpoint HMAC temp file {:?}",
337                    tmp_path
338                )));
339            }
340            Ok(_) => {
341                fs::remove_file(&tmp_path).map_err(|e| {
342                    NucleusError::CheckpointError(format!(
343                        "Failed to remove stale checkpoint HMAC temp file {:?}: {}",
344                        tmp_path, e
345                    ))
346                })?;
347            }
348            Err(_) => {}
349        }
350
351        let digest = Self::compute_checkpoint_hmac(dir, &key)?;
352
353        let mut file = OpenOptions::new()
354            .create_new(true)
355            .write(true)
356            .mode(0o600)
357            .custom_flags(libc::O_NOFOLLOW)
358            .open(&tmp_path)
359            .map_err(|e| {
360                NucleusError::CheckpointError(format!(
361                    "Failed to open checkpoint HMAC temp file {:?}: {}",
362                    tmp_path, e
363                ))
364            })?;
365        file.write_all(digest.as_bytes()).map_err(|e| {
366            NucleusError::CheckpointError(format!(
367                "Failed to write checkpoint HMAC {:?}: {}",
368                tmp_path, e
369            ))
370        })?;
371        file.sync_all().map_err(|e| {
372            NucleusError::CheckpointError(format!(
373                "Failed to sync checkpoint HMAC {:?}: {}",
374                tmp_path, e
375            ))
376        })?;
377        fs::rename(&tmp_path, &hmac_path).map_err(|e| {
378            NucleusError::CheckpointError(format!(
379                "Failed to atomically replace checkpoint HMAC {:?}: {}",
380                hmac_path, e
381            ))
382        })?;
383
384        Ok(())
385    }
386
387    fn verify_checkpoint_hmac(dir: &Path) -> Result<()> {
388        let hmac_path = dir.join(CHECKPOINT_HMAC_FILE);
389        let expected = Self::read_file_nofollow_bytes(&hmac_path).map_err(|e| {
390            NucleusError::CheckpointError(format!(
391                "Failed to read checkpoint HMAC {:?}: {}",
392                hmac_path, e
393            ))
394        })?;
395        let expected = std::str::from_utf8(&expected)
396            .map_err(|e| {
397                NucleusError::CheckpointError(format!(
398                    "Checkpoint HMAC {:?} is not valid UTF-8: {}",
399                    hmac_path, e
400                ))
401            })?
402            .trim()
403            .to_string();
404        if expected.is_empty() {
405            return Err(NucleusError::CheckpointError(format!(
406                "Checkpoint HMAC {:?} is empty",
407                hmac_path
408            )));
409        }
410
411        info!("Verifying checkpoint HMAC integrity");
412        let key = Self::load_or_create_checkpoint_hmac_key()?;
413        let actual = Self::compute_checkpoint_hmac(dir, &key)?;
414        if actual != expected {
415            return Err(NucleusError::CheckpointError(format!(
416                "Checkpoint integrity verification failed: HMAC mismatch (expected {}, got {})",
417                expected, actual
418            )));
419        }
420
421        info!("Checkpoint integrity verified");
422        Ok(())
423    }
424
425    fn checkpoint_hmac_key_path() -> PathBuf {
426        if let Some(path) =
427            std::env::var_os("NUCLEUS_CHECKPOINT_HMAC_KEY_FILE").filter(|path| !path.is_empty())
428        {
429            return PathBuf::from(path);
430        }
431
432        if Uid::effective().is_root() {
433            PathBuf::from("/var/lib/nucleus/checkpoint-hmac.key")
434        } else {
435            dirs::data_local_dir()
436                .map(|dir| dir.join("nucleus/checkpoint-hmac.key"))
437                .or_else(|| dirs::home_dir().map(|dir| dir.join(".nucleus/checkpoint-hmac.key")))
438                .unwrap_or_else(|| PathBuf::from("/tmp/nucleus-checkpoint-hmac.key"))
439        }
440    }
441
442    fn load_or_create_checkpoint_hmac_key() -> Result<Vec<u8>> {
443        let key_path = Self::checkpoint_hmac_key_path();
444        let parent = key_path.parent().ok_or_else(|| {
445            NucleusError::CheckpointError(format!(
446                "Checkpoint HMAC key path {:?} has no parent directory",
447                key_path
448            ))
449        })?;
450        Self::ensure_secure_key_parent_dir(parent)?;
451        Self::reject_symlink_path(&key_path, "checkpoint HMAC key file")?;
452
453        if key_path.exists() {
454            let metadata = fs::metadata(&key_path).map_err(|e| {
455                NucleusError::CheckpointError(format!(
456                    "Failed to stat checkpoint HMAC key {:?}: {}",
457                    key_path, e
458                ))
459            })?;
460            let mode = metadata.permissions().mode() & 0o777;
461            let owner = metadata.uid();
462            let euid = Uid::effective().as_raw();
463            if owner != euid {
464                return Err(NucleusError::CheckpointError(format!(
465                    "Checkpoint HMAC key {:?} is owned by uid {} (expected {})",
466                    key_path, owner, euid
467                )));
468            }
469            if mode & 0o077 != 0 {
470                return Err(NucleusError::CheckpointError(format!(
471                    "Checkpoint HMAC key {:?} has insecure mode {:o}; expected owner-only access",
472                    key_path, mode
473                )));
474            }
475            let key = Self::read_file_nofollow_bytes(&key_path).map_err(|e| {
476                NucleusError::CheckpointError(format!(
477                    "Failed to read checkpoint HMAC key {:?}: {}",
478                    key_path, e
479                ))
480            })?;
481            if key.len() < CHECKPOINT_HMAC_KEY_SIZE {
482                return Err(NucleusError::CheckpointError(format!(
483                    "Checkpoint HMAC key {:?} is too short ({} bytes)",
484                    key_path,
485                    key.len()
486                )));
487            }
488            return Ok(key);
489        }
490
491        let mut key = vec![0u8; CHECKPOINT_HMAC_KEY_SIZE];
492        Self::fill_secure_random(&mut key)?;
493        let mut file = OpenOptions::new()
494            .create_new(true)
495            .write(true)
496            .mode(0o600)
497            .custom_flags(libc::O_NOFOLLOW)
498            .open(&key_path)
499            .map_err(|e| {
500                NucleusError::CheckpointError(format!(
501                    "Failed to create checkpoint HMAC key {:?}: {}",
502                    key_path, e
503                ))
504            })?;
505        file.write_all(&key).map_err(|e| {
506            NucleusError::CheckpointError(format!(
507                "Failed to write checkpoint HMAC key {:?}: {}",
508                key_path, e
509            ))
510        })?;
511        file.sync_all().map_err(|e| {
512            NucleusError::CheckpointError(format!(
513                "Failed to sync checkpoint HMAC key {:?}: {}",
514                key_path, e
515            ))
516        })?;
517        Ok(key)
518    }
519
520    fn ensure_secure_key_parent_dir(path: &Path) -> Result<()> {
521        Self::reject_symlink_path(path, "checkpoint HMAC key directory")?;
522
523        if path.exists() {
524            let metadata = fs::metadata(path).map_err(|e| {
525                NucleusError::CheckpointError(format!(
526                    "Failed to stat checkpoint HMAC key directory {:?}: {}",
527                    path, e
528                ))
529            })?;
530            if !metadata.is_dir() {
531                return Err(NucleusError::CheckpointError(format!(
532                    "Checkpoint HMAC key directory {:?} is not a directory",
533                    path
534                )));
535            }
536            let mode = metadata.permissions().mode() & 0o777;
537            let owner = metadata.uid();
538            let euid = Uid::effective().as_raw();
539            if owner != euid {
540                return Err(NucleusError::CheckpointError(format!(
541                    "Checkpoint HMAC key directory {:?} is owned by uid {} (expected {})",
542                    path, owner, euid
543                )));
544            }
545            if mode & 0o077 != 0 {
546                return Err(NucleusError::CheckpointError(format!(
547                    "Checkpoint HMAC key directory {:?} has insecure mode {:o}; expected owner-only access",
548                    path, mode
549                )));
550            }
551            return Ok(());
552        }
553
554        fs::create_dir_all(path).map_err(|e| {
555            NucleusError::CheckpointError(format!(
556                "Failed to create checkpoint HMAC key directory {:?}: {}",
557                path, e
558            ))
559        })?;
560        fs::set_permissions(path, fs::Permissions::from_mode(0o700)).map_err(|e| {
561            NucleusError::CheckpointError(format!(
562                "Failed to secure checkpoint HMAC key directory {:?}: {}",
563                path, e
564            ))
565        })?;
566        Ok(())
567    }
568
569    fn fill_secure_random(buf: &mut [u8]) -> Result<()> {
570        let file = OpenOptions::new()
571            .read(true)
572            .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
573            .open("/dev/urandom")
574            .map_err(|e| {
575                NucleusError::CheckpointError(format!(
576                    "Failed to open /dev/urandom for checkpoint HMAC key generation: {}",
577                    e
578                ))
579            })?;
580        let metadata = file.metadata().map_err(|e| {
581            NucleusError::CheckpointError(format!("Failed to stat /dev/urandom: {}", e))
582        })?;
583        use std::os::unix::fs::FileTypeExt;
584        if !metadata.file_type().is_char_device() {
585            return Err(NucleusError::CheckpointError(
586                "/dev/urandom is not a character device".to_string(),
587            ));
588        }
589        let mut file = file;
590        file.read_exact(buf).map_err(|e| {
591            NucleusError::CheckpointError(format!(
592                "Failed to read /dev/urandom for checkpoint HMAC key generation: {}",
593                e
594            ))
595        })
596    }
597
598    fn read_file_nofollow_bytes(path: &Path) -> std::io::Result<Vec<u8>> {
599        let mut file = OpenOptions::new()
600            .read(true)
601            .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
602            .open(path)?;
603        let mut content = Vec::new();
604        file.read_to_end(&mut content)?;
605        Ok(content)
606    }
607
608    fn compute_checkpoint_hmac(dir: &Path, key: &[u8]) -> Result<String> {
609        let mut key_block = [0u8; 64];
610        if key.len() > key_block.len() {
611            let digest = Sha256::digest(key);
612            key_block[..digest.len()].copy_from_slice(&digest);
613        } else {
614            key_block[..key.len()].copy_from_slice(key);
615        }
616
617        let mut ipad = [0x36u8; 64];
618        let mut opad = [0x5cu8; 64];
619        for (dst, src) in ipad.iter_mut().zip(key_block.iter()) {
620            *dst ^= *src;
621        }
622        for (dst, src) in opad.iter_mut().zip(key_block.iter()) {
623            *dst ^= *src;
624        }
625
626        let mut inner = Sha256::new();
627        inner.update(ipad);
628        Self::update_checkpoint_hmac_inner(&mut inner, dir, dir)?;
629        let inner_hash = inner.finalize();
630
631        let mut outer = Sha256::new();
632        outer.update(opad);
633        outer.update(inner_hash);
634        Ok(hex::encode(outer.finalize()))
635    }
636
637    fn update_checkpoint_hmac_inner(hasher: &mut Sha256, root: &Path, dir: &Path) -> Result<()> {
638        let mut entries = Vec::new();
639        for entry in fs::read_dir(dir).map_err(|e| {
640            NucleusError::CheckpointError(format!(
641                "Failed to read checkpoint directory {:?}: {}",
642                dir, e
643            ))
644        })? {
645            let entry = entry.map_err(|e| {
646                NucleusError::CheckpointError(format!(
647                    "Failed to read checkpoint entry in {:?}: {}",
648                    dir, e
649                ))
650            })?;
651            entries.push(entry.path());
652        }
653        entries.sort();
654
655        for path in entries {
656            let relative = path.strip_prefix(root).map_err(|e| {
657                NucleusError::CheckpointError(format!(
658                    "Failed to compute checkpoint-relative path for {:?}: {}",
659                    path, e
660                ))
661            })?;
662            if relative == Path::new(CHECKPOINT_HMAC_FILE) {
663                continue;
664            }
665
666            let metadata = fs::symlink_metadata(&path).map_err(|e| {
667                NucleusError::CheckpointError(format!(
668                    "Failed to stat checkpoint path {:?}: {}",
669                    path, e
670                ))
671            })?;
672            if metadata.file_type().is_symlink() {
673                return Err(NucleusError::CheckpointError(format!(
674                    "Checkpoint integrity scan refuses symlink path {:?}",
675                    path
676                )));
677            }
678
679            let relative = relative.to_str().ok_or_else(|| {
680                NucleusError::CheckpointError(format!(
681                    "Checkpoint path {:?} is not valid UTF-8",
682                    relative
683                ))
684            })?;
685
686            if metadata.is_dir() {
687                hasher.update(b"D\0");
688                hasher.update(relative.as_bytes());
689                hasher.update(b"\0");
690                Self::update_checkpoint_hmac_inner(hasher, root, &path)?;
691            } else if metadata.is_file() {
692                hasher.update(b"F\0");
693                hasher.update(relative.as_bytes());
694                hasher.update(b"\0");
695                hasher.update(metadata.len().to_le_bytes());
696                let mut file = OpenOptions::new()
697                    .read(true)
698                    .custom_flags(libc::O_NOFOLLOW | libc::O_CLOEXEC)
699                    .open(&path)
700                    .map_err(|e| {
701                        NucleusError::CheckpointError(format!(
702                            "Failed to open checkpoint file {:?}: {}",
703                            path, e
704                        ))
705                    })?;
706                let mut buf = [0u8; 8192];
707                loop {
708                    let read = file.read(&mut buf).map_err(|e| {
709                        NucleusError::CheckpointError(format!(
710                            "Failed to read checkpoint file {:?}: {}",
711                            path, e
712                        ))
713                    })?;
714                    if read == 0 {
715                        break;
716                    }
717                    hasher.update(&buf[..read]);
718                }
719            } else {
720                return Err(NucleusError::CheckpointError(format!(
721                    "Checkpoint integrity scan rejects special file {:?}",
722                    path
723                )));
724            }
725        }
726
727        Ok(())
728    }
729
730    fn ensure_secure_dir(path: &Path, label: &str) -> Result<()> {
731        Self::reject_symlink_path(path, label)?;
732
733        if path.exists() {
734            if !path.is_dir() {
735                return Err(NucleusError::CheckpointError(format!(
736                    "{} {:?} is not a directory",
737                    label, path
738                )));
739            }
740        } else {
741            fs::create_dir_all(path).map_err(|e| {
742                NucleusError::CheckpointError(format!(
743                    "Failed to create {} {:?}: {}",
744                    label, path, e
745                ))
746            })?;
747        }
748
749        Self::reject_symlink_path(path, label)?;
750        fs::set_permissions(path, fs::Permissions::from_mode(0o700)).map_err(|e| {
751            NucleusError::CheckpointError(format!(
752                "Failed to set {} permissions {:?}: {}",
753                label, path, e
754            ))
755        })?;
756
757        Ok(())
758    }
759
760    fn reject_symlink_path(path: &Path, label: &str) -> Result<()> {
761        match fs::symlink_metadata(path) {
762            Ok(metadata) if metadata.file_type().is_symlink() => Err(
763                NucleusError::CheckpointError(format!("Refusing symlink {} {:?}", label, path)),
764            ),
765            Ok(_) | Err(_) => Ok(()),
766        }
767    }
768}
769
770#[cfg(test)]
771mod tests {
772    use super::CriuRuntime;
773    use std::ffi::OsString;
774    use std::fs;
775    use std::os::unix::fs::{symlink, PermissionsExt};
776    use std::path::{Path, PathBuf};
777    use std::sync::{Mutex, OnceLock};
778    use tempfile::TempDir;
779
780    fn checkpoint_key_env_lock() -> &'static Mutex<()> {
781        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
782        LOCK.get_or_init(|| Mutex::new(()))
783    }
784
785    struct CheckpointKeyEnvGuard {
786        previous: Option<OsString>,
787    }
788
789    impl CheckpointKeyEnvGuard {
790        fn set(path: &Path) -> Self {
791            let previous = std::env::var_os("NUCLEUS_CHECKPOINT_HMAC_KEY_FILE");
792            std::env::set_var("NUCLEUS_CHECKPOINT_HMAC_KEY_FILE", path);
793            Self { previous }
794        }
795    }
796
797    impl Drop for CheckpointKeyEnvGuard {
798        fn drop(&mut self) {
799            match &self.previous {
800                Some(value) => std::env::set_var("NUCLEUS_CHECKPOINT_HMAC_KEY_FILE", value),
801                None => std::env::remove_var("NUCLEUS_CHECKPOINT_HMAC_KEY_FILE"),
802            }
803        }
804    }
805
806    fn prepare_secure_checkpoint_key_dir(tmp: &TempDir) -> PathBuf {
807        let key_dir = tmp.path().join("keys");
808        fs::create_dir(&key_dir).unwrap();
809        fs::set_permissions(&key_dir, fs::Permissions::from_mode(0o700)).unwrap();
810        key_dir
811    }
812
813    #[test]
814    fn test_parse_pid_text_plain() {
815        assert_eq!(CriuRuntime::parse_pid_text("1234\n"), Some(1234));
816    }
817
818    #[test]
819    fn test_parse_pid_text_embedded() {
820        assert_eq!(
821            CriuRuntime::parse_pid_text("restored successfully pid=5678"),
822            Some(5678)
823        );
824    }
825
826    #[test]
827    fn test_parse_pid_text_missing() {
828        assert_eq!(CriuRuntime::parse_pid_text("no pid here"), None);
829    }
830
831    #[test]
832    fn test_parse_pidfile_strict() {
833        // BUG-22: parse_pid_text must prefer strict pidfile parsing
834        // A pidfile should contain just a number, not extract first number from error messages
835        assert_eq!(CriuRuntime::parse_pidfile("1234\n"), Some(1234));
836        assert_eq!(CriuRuntime::parse_pidfile("  5678  \n"), Some(5678));
837        // Error messages should NOT parse as PIDs
838        assert_eq!(CriuRuntime::parse_pidfile("Error code: 255 (EPERM)"), None);
839        assert_eq!(
840            CriuRuntime::parse_pidfile("restored successfully pid=5678"),
841            None
842        );
843        assert_eq!(CriuRuntime::parse_pidfile(""), None);
844        assert_eq!(CriuRuntime::parse_pidfile("no pid here"), None);
845    }
846
847    #[test]
848    fn test_prepare_checkpoint_dir_rejects_symlinked_images_dir() {
849        let tmp = TempDir::new().unwrap();
850        let target = tmp.path().join("target");
851        fs::create_dir(&target).unwrap();
852        let images = tmp.path().join("images");
853        symlink(&target, &images).unwrap();
854
855        let err = CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap_err();
856        assert!(
857            err.to_string().contains("symlink"),
858            "expected symlink rejection, got: {err}"
859        );
860    }
861
862    #[test]
863    fn test_prepare_checkpoint_dir_creates_images_subdir() {
864        let tmp = TempDir::new().unwrap();
865        let images = CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap();
866        assert_eq!(images, tmp.path().join("images"));
867        assert!(images.is_dir());
868
869        // Verify permissions are 0o700
870        let mode = fs::metadata(&images).unwrap().permissions().mode() & 0o777;
871        assert_eq!(mode, 0o700, "images dir should be mode 700, got {:o}", mode);
872    }
873
874    #[test]
875    fn test_prepare_checkpoint_dir_rejects_file_as_output_dir() {
876        let tmp = TempDir::new().unwrap();
877        let file_path = tmp.path().join("not-a-dir");
878        fs::write(&file_path, "").unwrap();
879
880        let err = CriuRuntime::prepare_checkpoint_dir(&file_path).unwrap_err();
881        assert!(
882            err.to_string().contains("not a directory"),
883            "expected 'not a directory' error, got: {err}"
884        );
885    }
886
887    #[test]
888    fn test_prepare_checkpoint_dir_rejects_symlinked_output_dir() {
889        let tmp = TempDir::new().unwrap();
890        let real_dir = tmp.path().join("real");
891        fs::create_dir(&real_dir).unwrap();
892        let link = tmp.path().join("link");
893        symlink(&real_dir, &link).unwrap();
894
895        let err = CriuRuntime::prepare_checkpoint_dir(&link).unwrap_err();
896        assert!(
897            err.to_string().contains("symlink"),
898            "expected symlink rejection, got: {err}"
899        );
900    }
901
902    #[test]
903    fn test_validate_binary_rejects_group_writable() {
904        let tmp = TempDir::new().unwrap();
905        let bin = tmp.path().join("criu");
906        fs::write(&bin, "#!/bin/sh\n").unwrap();
907        fs::set_permissions(&bin, fs::Permissions::from_mode(0o775)).unwrap();
908
909        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
910        assert!(
911            err.to_string().contains("writable by group/others"),
912            "expected group-writable rejection, got: {err}"
913        );
914    }
915
916    #[test]
917    fn test_validate_binary_rejects_world_writable() {
918        let tmp = TempDir::new().unwrap();
919        let bin = tmp.path().join("criu");
920        fs::write(&bin, "#!/bin/sh\n").unwrap();
921        fs::set_permissions(&bin, fs::Permissions::from_mode(0o757)).unwrap();
922
923        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
924        assert!(
925            err.to_string().contains("writable by group/others"),
926            "expected world-writable rejection, got: {err}"
927        );
928    }
929
930    #[test]
931    fn test_validate_binary_rejects_non_executable() {
932        let tmp = TempDir::new().unwrap();
933        let bin = tmp.path().join("criu");
934        fs::write(&bin, "#!/bin/sh\n").unwrap();
935        fs::set_permissions(&bin, fs::Permissions::from_mode(0o600)).unwrap();
936
937        let err = CriuRuntime::validate_binary(&bin).unwrap_err();
938        assert!(
939            err.to_string().contains("not executable"),
940            "expected non-executable rejection, got: {err}"
941        );
942    }
943
944    #[test]
945    fn test_validate_binary_accepts_secure_binary() {
946        let tmp = TempDir::new().unwrap();
947        let bin = tmp.path().join("criu");
948        fs::write(&bin, "#!/bin/sh\n").unwrap();
949        fs::set_permissions(&bin, fs::Permissions::from_mode(0o755)).unwrap();
950
951        CriuRuntime::validate_binary(&bin).expect("should accept mode 0755");
952    }
953
954    #[test]
955    fn test_validate_binary_accepts_owner_only_executable() {
956        let tmp = TempDir::new().unwrap();
957        let bin = tmp.path().join("criu");
958        fs::write(&bin, "#!/bin/sh\n").unwrap();
959        fs::set_permissions(&bin, fs::Permissions::from_mode(0o700)).unwrap();
960
961        CriuRuntime::validate_binary(&bin).expect("should accept mode 0700");
962    }
963
964    #[test]
965    fn test_validate_binary_rejects_nonexistent() {
966        let tmp = TempDir::new().unwrap();
967        let bin = tmp.path().join("nonexistent");
968        assert!(CriuRuntime::validate_binary(&bin).is_err());
969    }
970
971    #[test]
972    fn test_checkpoint_state_transitions() {
973        use crate::checkpoint::state::CheckpointState;
974        use crate::error::StateTransition;
975
976        // Valid forward transitions
977        assert!(CheckpointState::None.can_transition_to(&CheckpointState::Dumping));
978        assert!(CheckpointState::Dumping.can_transition_to(&CheckpointState::Dumped));
979        assert!(CheckpointState::None.can_transition_to(&CheckpointState::Restoring));
980        assert!(CheckpointState::Restoring.can_transition_to(&CheckpointState::Restored));
981
982        // Valid abort transitions
983        assert!(CheckpointState::Dumping.can_transition_to(&CheckpointState::None));
984        assert!(CheckpointState::Restoring.can_transition_to(&CheckpointState::None));
985
986        // Invalid transitions
987        assert!(!CheckpointState::None.can_transition_to(&CheckpointState::Dumped));
988        assert!(!CheckpointState::None.can_transition_to(&CheckpointState::Restored));
989        assert!(!CheckpointState::Dumped.can_transition_to(&CheckpointState::Restoring));
990        assert!(!CheckpointState::Restored.can_transition_to(&CheckpointState::Dumping));
991    }
992
993    #[test]
994    fn test_prepare_checkpoint_dir_sets_secure_permissions() {
995        let tmp = TempDir::new().unwrap();
996        CriuRuntime::prepare_checkpoint_dir(tmp.path()).unwrap();
997
998        // Both output dir and images subdir should be 0700
999        let output_mode = fs::metadata(tmp.path()).unwrap().permissions().mode() & 0o777;
1000        let images_mode = fs::metadata(tmp.path().join("images"))
1001            .unwrap()
1002            .permissions()
1003            .mode()
1004            & 0o777;
1005        assert_eq!(output_mode, 0o700);
1006        assert_eq!(images_mode, 0o700);
1007    }
1008
1009    #[test]
1010    fn test_checkpoint_hmac_detects_tampering_in_images() {
1011        let _guard = checkpoint_key_env_lock()
1012            .lock()
1013            .unwrap_or_else(|poisoned| poisoned.into_inner());
1014        let tmp = TempDir::new().unwrap();
1015        let key_dir = prepare_secure_checkpoint_key_dir(&tmp);
1016        let key_path = key_dir.join("checkpoint.key");
1017        let _env = CheckpointKeyEnvGuard::set(&key_path);
1018
1019        let checkpoint_dir = tmp.path().join("checkpoint");
1020        fs::create_dir(&checkpoint_dir).unwrap();
1021        fs::create_dir(checkpoint_dir.join("images")).unwrap();
1022        fs::write(checkpoint_dir.join("metadata.json"), "{\"id\":\"abc\"}").unwrap();
1023        fs::write(
1024            checkpoint_dir.join("images").join("pages-1.img"),
1025            b"snapshot",
1026        )
1027        .unwrap();
1028
1029        CriuRuntime::write_checkpoint_hmac(&checkpoint_dir).unwrap();
1030        CriuRuntime::verify_checkpoint_hmac(&checkpoint_dir).unwrap();
1031
1032        fs::write(
1033            checkpoint_dir.join("images").join("pages-1.img"),
1034            b"tampered",
1035        )
1036        .unwrap();
1037        let err = CriuRuntime::verify_checkpoint_hmac(&checkpoint_dir).unwrap_err();
1038        assert!(err.to_string().contains("HMAC mismatch"));
1039    }
1040
1041    #[test]
1042    fn test_checkpoint_hmac_rejects_symlinks_in_checkpoint_tree() {
1043        let _guard = checkpoint_key_env_lock()
1044            .lock()
1045            .unwrap_or_else(|poisoned| poisoned.into_inner());
1046        let tmp = TempDir::new().unwrap();
1047        let key_dir = prepare_secure_checkpoint_key_dir(&tmp);
1048        let key_path = key_dir.join("checkpoint.key");
1049        let _env = CheckpointKeyEnvGuard::set(&key_path);
1050
1051        let checkpoint_dir = tmp.path().join("checkpoint");
1052        fs::create_dir(&checkpoint_dir).unwrap();
1053        fs::create_dir(checkpoint_dir.join("images")).unwrap();
1054        fs::write(checkpoint_dir.join("metadata.json"), "{\"id\":\"abc\"}").unwrap();
1055        symlink(
1056            checkpoint_dir.join("metadata.json"),
1057            checkpoint_dir.join("images").join("metadata-link"),
1058        )
1059        .unwrap();
1060
1061        let err = CriuRuntime::write_checkpoint_hmac(&checkpoint_dir).unwrap_err();
1062        assert!(err.to_string().contains("refuses symlink"));
1063    }
1064}