Skip to main content

nucleus/container/
state.rs

1use crate::error::{NucleusError, Result};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::fs;
5use std::fs::OpenOptions;
6use std::io::Write;
7use std::os::unix::fs::{MetadataExt, OpenOptionsExt, PermissionsExt};
8use std::path::{Path, PathBuf};
9use std::time::SystemTime;
10use tracing::{debug, info, warn};
11
12/// OCI-compliant container status
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
14#[serde(rename_all = "lowercase")]
15pub enum OciStatus {
16    /// Container is being created
17    Creating,
18    /// Container has been created but not started
19    Created,
20    /// Container process is running
21    Running,
22    /// Container process has stopped
23    Stopped,
24}
25
26impl std::fmt::Display for OciStatus {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match self {
29            OciStatus::Creating => write!(f, "creating"),
30            OciStatus::Created => write!(f, "created"),
31            OciStatus::Running => write!(f, "running"),
32            OciStatus::Stopped => write!(f, "stopped"),
33        }
34    }
35}
36
37/// Container state tracking information
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct ContainerState {
40    /// Container ID (unique 32 hex chars, 128-bit)
41    pub id: String,
42
43    /// Container name (user-supplied or same as ID)
44    pub name: String,
45
46    /// PID of the container process
47    pub pid: u32,
48
49    /// Command being executed
50    pub command: Vec<String>,
51
52    /// Start time (Unix timestamp)
53    pub started_at: u64,
54
55    /// Memory limit in bytes (None = unlimited)
56    pub memory_limit: Option<u64>,
57
58    /// CPU limit in millicores (None = unlimited)
59    pub cpu_limit: Option<u64>,
60
61    /// Whether using gVisor runtime
62    pub using_gvisor: bool,
63
64    /// Whether using rootless mode
65    pub rootless: bool,
66
67    /// cgroup path
68    pub cgroup_path: Option<String>,
69
70    /// Desired topology config hash associated with this container, if any.
71    #[serde(default)]
72    pub config_hash: Option<u64>,
73
74    /// UID of the user who created this container
75    #[serde(default)]
76    pub creator_uid: u32,
77
78    /// Effective uid of the workload process inside the container.
79    #[serde(default)]
80    pub process_uid: u32,
81
82    /// Effective gid of the workload process inside the container.
83    #[serde(default)]
84    pub process_gid: u32,
85
86    /// Supplementary gids of the workload process inside the container.
87    #[serde(default)]
88    pub additional_gids: Vec<u32>,
89
90    /// Process start time in clock ticks (from /proc/`<pid>`/stat field 22)
91    /// Used to detect PID reuse in is_running()
92    #[serde(default)]
93    pub start_ticks: u64,
94
95    /// OCI container status
96    #[serde(default = "default_oci_status")]
97    pub status: OciStatus,
98
99    /// OCI bundle path
100    #[serde(default)]
101    pub bundle_path: Option<String>,
102
103    /// OCI annotations
104    #[serde(default)]
105    pub annotations: HashMap<String, String>,
106}
107
108fn default_oci_status() -> OciStatus {
109    OciStatus::Stopped
110}
111
112/// Parameters for creating a new `ContainerState`.
113pub struct ContainerStateParams {
114    pub id: String,
115    pub name: String,
116    pub pid: u32,
117    pub command: Vec<String>,
118    pub memory_limit: Option<u64>,
119    pub cpu_limit: Option<u64>,
120    pub using_gvisor: bool,
121    pub rootless: bool,
122    pub cgroup_path: Option<String>,
123    pub process_uid: u32,
124    pub process_gid: u32,
125    pub additional_gids: Vec<u32>,
126}
127
128impl ContainerState {
129    /// Create a new container state from the given parameters.
130    pub fn new(params: ContainerStateParams) -> Self {
131        let started_at = SystemTime::now()
132            .duration_since(SystemTime::UNIX_EPOCH)
133            .unwrap_or_default()
134            .as_secs();
135
136        let start_ticks = Self::read_start_ticks(params.pid);
137
138        Self {
139            id: params.id,
140            name: params.name,
141            pid: params.pid,
142            command: params.command,
143            started_at,
144            memory_limit: params.memory_limit,
145            cpu_limit: params.cpu_limit,
146            using_gvisor: params.using_gvisor,
147            rootless: params.rootless,
148            cgroup_path: params.cgroup_path,
149            config_hash: None,
150            creator_uid: nix::unistd::Uid::effective().as_raw(),
151            process_uid: params.process_uid,
152            process_gid: params.process_gid,
153            additional_gids: params.additional_gids,
154            start_ticks,
155            status: OciStatus::Creating,
156            bundle_path: None,
157            annotations: HashMap::new(),
158        }
159    }
160
161    /// Read the start time in clock ticks from /proc/<pid>/stat (field 22)
162    ///
163    /// BUG-09: After fork, /proc/<pid>/stat may not be immediately available.
164    /// Retry a few times with short sleeps to avoid returning 0 and breaking
165    /// PID-reuse detection in is_running().
166    fn read_start_ticks(pid: u32) -> u64 {
167        let stat_path = format!("/proc/{}/stat", pid);
168        for attempt in 0..5 {
169            if let Ok(content) = std::fs::read_to_string(&stat_path) {
170                if let Some(ticks) = Self::parse_start_ticks(&content) {
171                    return ticks;
172                }
173            }
174            if attempt < 4 {
175                std::thread::sleep(std::time::Duration::from_millis(1));
176            }
177        }
178        0
179    }
180
181    /// Parse start time (field 22) from /proc/<pid>/stat content
182    fn parse_start_ticks(content: &str) -> Option<u64> {
183        // Field 2 (comm) is in parens and may contain spaces; find last ')'
184        let after_comm = content.rfind(')')?;
185        // After ')' we have fields 3..N; field 22 is index 19 (22 - 3 = 19)
186        // Use nth() instead of collecting into a Vec to avoid a heap allocation
187        // on every liveness check.
188        content[after_comm + 2..]
189            .split_whitespace()
190            .nth(19)?
191            .parse()
192            .ok()
193    }
194
195    /// Check if the container process is still running
196    ///
197    /// Cross-checks PID start time to detect PID reuse after process exit.
198    /// Also returns false if the OCI status is `Stopped`.
199    pub fn is_running(&self) -> bool {
200        if self.status == OciStatus::Stopped {
201            return false;
202        }
203        let stat_path = format!("/proc/{}/stat", self.pid);
204        match std::fs::read_to_string(&stat_path) {
205            Ok(content) => {
206                if self.start_ticks == 0 {
207                    // PID existence alone is insufficient because the PID may have
208                    // been recycled since this state was recorded.
209                    return false;
210                }
211                Self::parse_start_ticks(&content)
212                    .map(|ticks| ticks == self.start_ticks)
213                    .unwrap_or(false)
214            }
215            Err(_) => false,
216        }
217    }
218
219    /// Return OCI runtime state as a JSON value
220    pub fn oci_state(&self) -> serde_json::Value {
221        let live_status = match self.status {
222            OciStatus::Running if !self.is_running() => "stopped",
223            OciStatus::Creating => "creating",
224            OciStatus::Created => "created",
225            OciStatus::Running => "running",
226            OciStatus::Stopped => "stopped",
227        };
228        serde_json::json!({
229            "ociVersion": "1.0.2",
230            "id": self.id,
231            "status": live_status,
232            "pid": if live_status == "stopped" { 0 } else { self.pid },
233            "bundle": self.bundle_path.as_deref().unwrap_or(""),
234            "annotations": self.annotations,
235        })
236    }
237
238    /// Get uptime in seconds
239    pub fn uptime(&self) -> u64 {
240        let now = SystemTime::now()
241            .duration_since(SystemTime::UNIX_EPOCH)
242            .unwrap_or_default()
243            .as_secs();
244        now.saturating_sub(self.started_at)
245    }
246}
247
248/// Container state manager
249///
250/// Manages persistent state of running containers
251pub struct ContainerStateManager {
252    state_dir: PathBuf,
253}
254
255impl ContainerStateManager {
256    /// Create a state manager rooted at an explicit directory, falling back to
257    /// default candidates if `root` is `None`.
258    pub fn new_with_root(root: Option<PathBuf>) -> Result<Self> {
259        if let Some(root) = root {
260            return Self::with_state_dir(root);
261        }
262        Self::new()
263    }
264
265    /// Create a new state manager
266    ///
267    /// Creates the state directory if it doesn't exist
268    pub fn new() -> Result<Self> {
269        let mut last_error = None;
270        for candidate in Self::default_state_dir_candidates() {
271            match Self::with_state_dir(candidate.clone()) {
272                Ok(manager) => return Ok(manager),
273                Err(err) => {
274                    debug!(
275                        path = ?candidate,
276                        error = %err,
277                        "State directory candidate unavailable, trying next fallback"
278                    );
279                    last_error = Some(err);
280                }
281            }
282        }
283
284        Err(last_error.unwrap_or_else(|| {
285            NucleusError::ConfigError("No usable state directory candidates found".to_string())
286        }))
287    }
288
289    /// Create a state manager rooted at an explicit directory.
290    pub fn with_state_dir(state_dir: PathBuf) -> Result<Self> {
291        Self::reject_symlink_path(&state_dir)?;
292
293        // Create state directory if it doesn't exist (idempotent)
294        fs::create_dir_all(&state_dir).map_err(|e| {
295            NucleusError::ConfigError(format!(
296                "Failed to create state directory {:?}: {}",
297                state_dir, e
298            ))
299        })?;
300        Self::reject_symlink_path(&state_dir)?;
301        Self::ensure_secure_state_dir_permissions(&state_dir)?;
302        Self::ensure_state_dir_writable(&state_dir)?;
303
304        Ok(Self { state_dir })
305    }
306
307    fn reject_symlink_path(state_dir: &Path) -> Result<()> {
308        match fs::symlink_metadata(state_dir) {
309            Ok(metadata) if metadata.file_type().is_symlink() => {
310                Err(NucleusError::ConfigError(format!(
311                    "Refusing symlink state directory path {:?}; use a real directory",
312                    state_dir
313                )))
314            }
315            Ok(_) | Err(_) => Ok(()),
316        }
317    }
318
319    fn ensure_secure_state_dir_permissions(state_dir: &Path) -> Result<()> {
320        match fs::set_permissions(state_dir, fs::Permissions::from_mode(0o700)) {
321            Ok(()) => Ok(()),
322            Err(e)
323                if matches!(
324                    e.raw_os_error(),
325                    Some(libc::EROFS) | Some(libc::EPERM) | Some(libc::EACCES)
326                ) =>
327            {
328                let metadata = fs::metadata(state_dir).map_err(|meta_err| {
329                    NucleusError::ConfigError(format!(
330                        "Failed to secure state directory permissions {:?}: {} (and could not \
331                         inspect existing permissions: {})",
332                        state_dir, e, meta_err
333                    ))
334                })?;
335
336                let mode = metadata.permissions().mode() & 0o777;
337                let owner = metadata.uid();
338                let current_uid = nix::unistd::Uid::effective().as_raw();
339                let is_owner_ok = owner == current_uid || nix::unistd::Uid::effective().is_root();
340                let is_mode_ok = mode & 0o077 == 0;
341
342                if is_owner_ok && is_mode_ok {
343                    debug!(
344                        path = ?state_dir,
345                        mode = format!("{:o}", mode),
346                        owner,
347                        "State directory already has secure permissions; skipping chmod failure"
348                    );
349                    Ok(())
350                } else {
351                    Err(NucleusError::ConfigError(format!(
352                        "Failed to secure state directory permissions {:?}: {} (existing mode \
353                         {:o}, owner uid {})",
354                        state_dir, e, mode, owner
355                    )))
356                }
357            }
358            Err(e) => Err(NucleusError::ConfigError(format!(
359                "Failed to secure state directory permissions {:?}: {}",
360                state_dir, e
361            ))),
362        }
363    }
364
365    fn ensure_state_dir_writable(state_dir: &Path) -> Result<()> {
366        let probe_name = format!(
367            ".nucleus-write-test-{}-{}",
368            std::process::id(),
369            SystemTime::now()
370                .duration_since(SystemTime::UNIX_EPOCH)
371                .unwrap_or_default()
372                .as_nanos()
373        );
374        let probe_path = state_dir.join(probe_name);
375
376        let file = OpenOptions::new()
377            .write(true)
378            .create_new(true)
379            .mode(0o600)
380            .open(&probe_path)
381            .map_err(|e| {
382                NucleusError::ConfigError(format!(
383                    "State directory {:?} is not writable: {}",
384                    state_dir, e
385                ))
386            })?;
387        drop(file);
388
389        fs::remove_file(&probe_path).map_err(|e| {
390            NucleusError::ConfigError(format!(
391                "Failed to cleanup state directory probe {:?}: {}",
392                probe_path, e
393            ))
394        })?;
395
396        Ok(())
397    }
398
399    /// Get ordered default state directory candidates.
400    fn default_state_dir_candidates() -> Vec<PathBuf> {
401        if let Some(path) = std::env::var_os("NUCLEUS_STATE_DIR").filter(|p| !p.is_empty()) {
402            return vec![PathBuf::from(path)];
403        }
404
405        if nix::unistd::Uid::effective().is_root() {
406            vec![PathBuf::from("/var/run/nucleus")]
407        } else {
408            let mut candidates = Vec::new();
409
410            if let Some(dir) = dirs::runtime_dir() {
411                candidates.push(dir.join("nucleus"));
412            }
413            if let Some(dir) = dirs::data_local_dir() {
414                candidates.push(dir.join("nucleus"));
415            }
416            if let Some(dir) = dirs::home_dir() {
417                candidates.push(dir.join(".nucleus"));
418            }
419
420            // Final fallback for restricted sandboxes where standard runtime/home
421            // paths are mounted read-only. Use a private directory under /tmp
422            // with O_NOFOLLOW semantics to prevent symlink attacks.
423            let uid = nix::unistd::Uid::effective().as_raw();
424            let fallback = PathBuf::from(format!("/tmp/nucleus-{}", uid));
425            // Only add the /tmp fallback if it either doesn't exist yet
426            // (will be created later) or passes symlink/ownership checks.
427            let fallback_ok = if fallback.exists() {
428                match std::fs::symlink_metadata(&fallback) {
429                    Ok(meta) => {
430                        use std::os::unix::fs::MetadataExt;
431                        if meta.file_type().is_symlink() {
432                            tracing::warn!(
433                                "Skipping {} — it is a symlink (possible attack)",
434                                fallback.display()
435                            );
436                            false
437                        } else if meta.uid() != uid {
438                            tracing::warn!(
439                                "Skipping {} — owned by UID {} not {}",
440                                fallback.display(),
441                                meta.uid(),
442                                uid
443                            );
444                            false
445                        } else {
446                            true
447                        }
448                    }
449                    Err(e) => {
450                        tracing::warn!("Skipping {} — cannot stat: {}", fallback.display(), e);
451                        false
452                    }
453                }
454            } else {
455                true
456            };
457            if fallback_ok {
458                candidates.push(fallback);
459            }
460
461            candidates
462        }
463    }
464
465    /// Validate a container ID for safe filesystem use
466    fn validate_container_id(container_id: &str) -> Result<()> {
467        if container_id.is_empty() {
468            return Err(NucleusError::ConfigError(
469                "Container ID cannot be empty".to_string(),
470            ));
471        }
472
473        if !container_id
474            .chars()
475            .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
476        {
477            return Err(NucleusError::ConfigError(format!(
478                "Invalid container ID (allowed: a-zA-Z0-9_-): {}",
479                container_id
480            )));
481        }
482
483        Ok(())
484    }
485
486    fn state_file_path(&self, container_id: &str) -> Result<PathBuf> {
487        Self::validate_container_id(container_id)?;
488        Ok(self.state_dir.join(format!("{}.json", container_id)))
489    }
490
491    /// Return the path to the exec FIFO used for two-phase create/start.
492    pub fn exec_fifo_path(&self, container_id: &str) -> Result<PathBuf> {
493        Self::validate_container_id(container_id)?;
494        Ok(self.state_dir.join(format!("{}.exec", container_id)))
495    }
496
497    /// Resolve a container reference by exact ID, name, or ID prefix
498    pub fn resolve_container(&self, reference: &str) -> Result<ContainerState> {
499        let states = self.list_states()?;
500
501        // Try exact ID match
502        if let Some(state) = states.iter().find(|s| s.id == reference) {
503            return Ok(state.clone());
504        }
505
506        // Try exact name match (must be unambiguous)
507        let name_matches: Vec<&ContainerState> =
508            states.iter().filter(|s| s.name == reference).collect();
509        match name_matches.len() {
510            1 => return Ok(name_matches[0].clone()),
511            n if n > 1 => {
512                return Err(NucleusError::AmbiguousContainer(format!(
513                    "Name '{}' matches {} containers; use container ID instead",
514                    reference, n
515                )))
516            }
517            _ => {}
518        }
519
520        // Try ID prefix match
521        let prefix_matches: Vec<&ContainerState> = states
522            .iter()
523            .filter(|s| s.id.starts_with(reference))
524            .collect();
525
526        match prefix_matches.len() {
527            0 => Err(NucleusError::ContainerNotFound(reference.to_string())),
528            1 => Ok(prefix_matches[0].clone()),
529            _ => Err(NucleusError::AmbiguousContainer(format!(
530                "'{}' matches {} containers",
531                reference,
532                prefix_matches.len()
533            ))),
534        }
535    }
536
537    /// Save container state
538    pub fn save_state(&self, state: &ContainerState) -> Result<()> {
539        let path = self.state_file_path(&state.id)?;
540        let tmp_path = self.state_dir.join(format!("{}.json.tmp", state.id));
541        let json = serde_json::to_string_pretty(state).map_err(|e| {
542            NucleusError::ConfigError(format!("Failed to serialize container state: {}", e))
543        })?;
544
545        // O_NOFOLLOW prevents TOCTOU symlink attacks: if an attacker replaces
546        // the temp path with a symlink between check and open, the open fails
547        // instead of following the symlink to an attacker-controlled location.
548        let mut file = OpenOptions::new()
549            .create(true)
550            .truncate(true)
551            .write(true)
552            .mode(0o600)
553            .custom_flags(libc::O_NOFOLLOW)
554            .open(&tmp_path)
555            .map_err(|e| {
556                NucleusError::ConfigError(format!(
557                    "Failed to open temp state file {:?}: {}",
558                    tmp_path, e
559                ))
560            })?;
561
562        file.write_all(json.as_bytes()).map_err(|e| {
563            NucleusError::ConfigError(format!("Failed to write state file {:?}: {}", tmp_path, e))
564        })?;
565        file.sync_all().map_err(|e| {
566            NucleusError::ConfigError(format!("Failed to sync state file {:?}: {}", tmp_path, e))
567        })?;
568
569        fs::rename(&tmp_path, &path).map_err(|e| {
570            NucleusError::ConfigError(format!(
571                "Failed to atomically replace state file {:?}: {}",
572                path, e
573            ))
574        })?;
575
576        debug!("Saved container state: {}", state.id);
577        Ok(())
578    }
579
580    /// Read a file with O_NOFOLLOW to prevent symlink attacks.
581    pub fn read_file_nofollow(
582        path: &std::path::Path,
583    ) -> std::result::Result<String, std::io::Error> {
584        use std::io::Read;
585        let file = OpenOptions::new()
586            .read(true)
587            .custom_flags(libc::O_NOFOLLOW)
588            .open(path)?;
589        let mut buf = String::new();
590        std::io::BufReader::new(file).read_to_string(&mut buf)?;
591        Ok(buf)
592    }
593
594    /// Load container state
595    ///
596    /// Opens with O_NOFOLLOW to prevent symlink-based TOCTOU attacks.
597    pub fn load_state(&self, container_id: &str) -> Result<ContainerState> {
598        let path = self.state_file_path(container_id)?;
599
600        let json = Self::read_file_nofollow(&path).map_err(|e| {
601            NucleusError::ConfigError(format!("Failed to read state file {:?}: {}", path, e))
602        })?;
603
604        let state = serde_json::from_str(&json).map_err(|e| {
605            NucleusError::ConfigError(format!("Failed to parse container state: {}", e))
606        })?;
607
608        Ok(state)
609    }
610
611    /// Delete container state
612    pub fn delete_state(&self, container_id: &str) -> Result<()> {
613        let path = self.state_file_path(container_id)?;
614
615        match fs::remove_file(&path) {
616            Ok(()) => {
617                debug!("Deleted container state: {}", container_id);
618            }
619            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
620                // Already deleted — idempotent (handles TOCTOU race)
621                debug!("Container state already deleted: {}", container_id);
622            }
623            Err(e) => {
624                return Err(NucleusError::ConfigError(format!(
625                    "Failed to delete state file {:?}: {}",
626                    path, e
627                )));
628            }
629        }
630
631        Ok(())
632    }
633
634    /// List all container states
635    pub fn list_states(&self) -> Result<Vec<ContainerState>> {
636        let mut states = Vec::new();
637
638        let entries = fs::read_dir(&self.state_dir).map_err(|e| {
639            NucleusError::ConfigError(format!(
640                "Failed to read state directory {:?}: {}",
641                self.state_dir, e
642            ))
643        })?;
644
645        for entry in entries {
646            let entry = entry.map_err(|e| {
647                NucleusError::ConfigError(format!("Failed to read directory entry: {}", e))
648            })?;
649
650            let path = entry.path();
651            if path.extension().and_then(|s| s.to_str()) == Some("json") {
652                // Use O_NOFOLLOW to prevent symlink attacks, consistent with
653                // load_state/save_state. Without this, a symlink in the state
654                // directory could be used as a file-read oracle.
655                match Self::read_file_nofollow(&path) {
656                    Ok(json) => match serde_json::from_str::<ContainerState>(&json) {
657                        Ok(state) => states.push(state),
658                        Err(e) => {
659                            warn!("Failed to parse state file {:?}: {}", path, e);
660                        }
661                    },
662                    Err(e) => {
663                        warn!("Failed to read state file {:?}: {}", path, e);
664                    }
665                }
666            }
667        }
668
669        Ok(states)
670    }
671
672    /// List only running containers
673    pub fn list_running(&self) -> Result<Vec<ContainerState>> {
674        let states = self.list_states()?;
675        Ok(states.into_iter().filter(|s| s.is_running()).collect())
676    }
677
678    /// Clean up stale state files (for containers that are no longer running)
679    pub fn cleanup_stale(&self) -> Result<()> {
680        let states = self.list_states()?;
681
682        for state in states {
683            if !state.is_running() {
684                info!(
685                    "Cleaning up stale state for container {} (PID {})",
686                    state.id, state.pid
687                );
688                self.delete_state(&state.id)?;
689            }
690        }
691
692        Ok(())
693    }
694}
695
696#[cfg(test)]
697mod tests {
698    use super::*;
699    use tempfile::TempDir;
700
701    fn temp_state_manager() -> (ContainerStateManager, TempDir) {
702        let temp_dir = TempDir::new().unwrap();
703        let mgr = ContainerStateManager {
704            state_dir: temp_dir.path().to_path_buf(),
705        };
706        (mgr, temp_dir)
707    }
708
709    #[test]
710    fn test_container_state_new() {
711        let state = ContainerState::new(ContainerStateParams {
712            id: "test".to_string(),
713            name: "test".to_string(),
714            pid: 1234,
715            command: vec!["/bin/sh".to_string()],
716            memory_limit: Some(512 * 1024 * 1024),
717            cpu_limit: Some(2000),
718            using_gvisor: false,
719            rootless: false,
720            cgroup_path: Some("/sys/fs/cgroup/nucleus-test".to_string()),
721            process_uid: 0,
722            process_gid: 0,
723            additional_gids: Vec::new(),
724        });
725
726        assert_eq!(state.id, "test");
727        assert_eq!(state.pid, 1234);
728        assert_eq!(state.memory_limit, Some(512 * 1024 * 1024));
729        assert_eq!(state.cpu_limit, Some(2000));
730        assert_eq!(state.creator_uid, nix::unistd::Uid::effective().as_raw());
731    }
732
733    #[test]
734    fn test_save_and_load_state() {
735        let (mgr, _temp_dir) = temp_state_manager();
736
737        let state = ContainerState::new(ContainerStateParams {
738            id: "test".to_string(),
739            name: "test".to_string(),
740            pid: 1234,
741            command: vec!["/bin/sh".to_string()],
742            memory_limit: Some(512 * 1024 * 1024),
743            cpu_limit: None,
744            using_gvisor: false,
745            rootless: false,
746            cgroup_path: None,
747            process_uid: 0,
748            process_gid: 0,
749            additional_gids: Vec::new(),
750        });
751
752        mgr.save_state(&state).unwrap();
753
754        let loaded = mgr.load_state("test").unwrap();
755        assert_eq!(loaded.id, state.id);
756        assert_eq!(loaded.pid, state.pid);
757        assert_eq!(loaded.command, state.command);
758    }
759
760    #[test]
761    fn test_delete_state() {
762        let (mgr, _temp_dir) = temp_state_manager();
763
764        let state = ContainerState::new(ContainerStateParams {
765            id: "test".to_string(),
766            name: "test".to_string(),
767            pid: 1234,
768            command: vec!["/bin/sh".to_string()],
769            memory_limit: None,
770            cpu_limit: None,
771            using_gvisor: false,
772            rootless: false,
773            cgroup_path: None,
774            process_uid: 0,
775            process_gid: 0,
776            additional_gids: Vec::new(),
777        });
778
779        mgr.save_state(&state).unwrap();
780        assert!(mgr.load_state("test").is_ok());
781
782        mgr.delete_state("test").unwrap();
783        assert!(mgr.load_state("test").is_err());
784    }
785
786    #[test]
787    fn test_list_states() {
788        let (mgr, _temp_dir) = temp_state_manager();
789
790        let state1 = ContainerState::new(ContainerStateParams {
791            id: "test1".to_string(),
792            name: "test1".to_string(),
793            pid: 1234,
794            command: vec!["/bin/sh".to_string()],
795            memory_limit: None,
796            cpu_limit: None,
797            using_gvisor: false,
798            rootless: false,
799            cgroup_path: None,
800            process_uid: 0,
801            process_gid: 0,
802            additional_gids: Vec::new(),
803        });
804
805        let state2 = ContainerState::new(ContainerStateParams {
806            id: "test2".to_string(),
807            name: "test2".to_string(),
808            pid: 5678,
809            command: vec!["/bin/bash".to_string()],
810            memory_limit: None,
811            cpu_limit: None,
812            using_gvisor: false,
813            rootless: false,
814            cgroup_path: None,
815            process_uid: 0,
816            process_gid: 0,
817            additional_gids: Vec::new(),
818        });
819
820        mgr.save_state(&state1).unwrap();
821        mgr.save_state(&state2).unwrap();
822
823        let states = mgr.list_states().unwrap();
824        assert_eq!(states.len(), 2);
825    }
826
827    #[test]
828    fn test_resolve_container_by_id() {
829        let (mgr, _temp_dir) = temp_state_manager();
830
831        let state = ContainerState::new(ContainerStateParams {
832            id: "abc123def456".to_string(),
833            name: "mycontainer".to_string(),
834            pid: 1234,
835            command: vec!["/bin/sh".to_string()],
836            memory_limit: None,
837            cpu_limit: None,
838            using_gvisor: false,
839            rootless: false,
840            cgroup_path: None,
841            process_uid: 0,
842            process_gid: 0,
843            additional_gids: Vec::new(),
844        });
845        mgr.save_state(&state).unwrap();
846
847        // Exact ID
848        let resolved = mgr.resolve_container("abc123def456").unwrap();
849        assert_eq!(resolved.id, "abc123def456");
850
851        // Name
852        let resolved = mgr.resolve_container("mycontainer").unwrap();
853        assert_eq!(resolved.id, "abc123def456");
854
855        // ID prefix
856        let resolved = mgr.resolve_container("abc123").unwrap();
857        assert_eq!(resolved.id, "abc123def456");
858
859        // Not found
860        assert!(mgr.resolve_container("nonexistent").is_err());
861    }
862
863    #[test]
864    fn test_load_state_rejects_symlink() {
865        // H-3: O_NOFOLLOW must prevent loading state through a symlink
866        let (mgr, temp_dir) = temp_state_manager();
867
868        // Create a real state file
869        let state = ContainerState::new(ContainerStateParams {
870            id: "real".to_string(),
871            name: "real".to_string(),
872            pid: 1234,
873            command: vec!["/bin/sh".to_string()],
874            memory_limit: None,
875            cpu_limit: None,
876            using_gvisor: false,
877            rootless: false,
878            cgroup_path: None,
879            process_uid: 0,
880            process_gid: 0,
881            additional_gids: Vec::new(),
882        });
883        mgr.save_state(&state).unwrap();
884
885        // Create a symlink pointing to the real state file
886        let symlink_path = temp_dir.path().join("symlinked.json");
887        let real_path = temp_dir.path().join("real.json");
888        std::os::unix::fs::symlink(&real_path, &symlink_path).unwrap();
889
890        // Loading through the symlink ID must fail (O_NOFOLLOW)
891        let result = mgr.load_state("symlinked");
892        assert!(result.is_err(), "load_state must reject symlinks");
893    }
894
895    #[test]
896    fn test_list_states_ignores_symlinks() {
897        // list_states must use O_NOFOLLOW, so symlinked state files are skipped
898        // rather than followed (which would be a file-read oracle).
899        let (mgr, temp_dir) = temp_state_manager();
900
901        // Create a real state file
902        let state = ContainerState::new(ContainerStateParams {
903            id: "real123456789012345678".to_string(),
904            name: "real".to_string(),
905            pid: 1234,
906            command: vec!["/bin/sh".to_string()],
907            memory_limit: None,
908            cpu_limit: None,
909            using_gvisor: false,
910            rootless: false,
911            cgroup_path: None,
912            process_uid: 0,
913            process_gid: 0,
914            additional_gids: Vec::new(),
915        });
916        mgr.save_state(&state).unwrap();
917
918        // Create a symlink masquerading as a state file
919        let real_path = temp_dir.path().join("real123456789012345678.json");
920        let symlink_path = temp_dir.path().join("evil.json");
921        std::os::unix::fs::symlink(&real_path, &symlink_path).unwrap();
922
923        // list_states should only return the real file, not follow the symlink
924        let states = mgr.list_states().unwrap();
925        // The symlink should fail to open with O_NOFOLLOW, leaving only the real state
926        assert_eq!(states.len(), 1, "symlinked state file must be skipped");
927        assert_eq!(states[0].id, "real123456789012345678");
928    }
929
930    #[test]
931    fn test_save_state_rejects_symlink_tmp() {
932        // H-3: O_NOFOLLOW on save must prevent writing through a symlink
933        let (mgr, temp_dir) = temp_state_manager();
934
935        let state = ContainerState::new(ContainerStateParams {
936            id: "target".to_string(),
937            name: "target".to_string(),
938            pid: 1234,
939            command: vec!["/bin/sh".to_string()],
940            memory_limit: None,
941            cpu_limit: None,
942            using_gvisor: false,
943            rootless: false,
944            cgroup_path: None,
945            process_uid: 0,
946            process_gid: 0,
947            additional_gids: Vec::new(),
948        });
949
950        // Pre-create a symlink at the temp path to simulate an attack
951        let tmp_path = temp_dir.path().join("target.json.tmp");
952        let evil_path = temp_dir.path().join("evil");
953        std::os::unix::fs::symlink(&evil_path, &tmp_path).unwrap();
954
955        // save_state should fail because O_NOFOLLOW rejects the symlink
956        let result = mgr.save_state(&state);
957        assert!(
958            result.is_err(),
959            "save_state must reject symlinks at tmp path"
960        );
961    }
962
963    #[test]
964    fn test_is_running_returns_false_when_start_ticks_is_zero() {
965        // BUG-04: When start_ticks=0 (failed to read), is_running() must return
966        // false to avoid PID reuse false positives, not fall back to existence check
967        let mut state = ContainerState::new(ContainerStateParams {
968            id: "test".to_string(),
969            name: "test".to_string(),
970            pid: std::process::id(), // our PID exists in /proc
971            command: vec!["/bin/sh".to_string()],
972            memory_limit: None,
973            cpu_limit: None,
974            using_gvisor: false,
975            rootless: false,
976            cgroup_path: None,
977            process_uid: 0,
978            process_gid: 0,
979            additional_gids: Vec::new(),
980        });
981        // Force start_ticks to 0 to simulate failed read
982        state.start_ticks = 0;
983        // With BUG-04 present, this returns true (falls back to existence check)
984        // After fix, must return false
985        assert!(
986            !state.is_running(),
987            "is_running() must return false when start_ticks=0 (cannot verify PID identity)"
988        );
989    }
990
991    #[test]
992    fn test_read_start_ticks_retries_on_failure() {
993        // BUG-09: read_start_ticks must retry when /proc/<pid>/stat is temporarily
994        // unavailable after fork, instead of immediately returning 0.
995        // Verify by calling with our own PID (should succeed) and a non-existent
996        // PID (should return 0 after retries, not panic).
997        let own_ticks = ContainerState::read_start_ticks(std::process::id());
998        assert!(
999            own_ticks > 0,
1000            "read_start_ticks must return non-zero for a live process"
1001        );
1002        // Non-existent PID should gracefully return 0 (after retries)
1003        let bogus_ticks = ContainerState::read_start_ticks(u32::MAX);
1004        assert_eq!(
1005            bogus_ticks, 0,
1006            "read_start_ticks must return 0 for non-existent PID"
1007        );
1008    }
1009
1010    #[test]
1011    fn test_delete_state_handles_already_deleted() {
1012        // BUG-16: delete_state must not fail if file was already deleted (TOCTOU)
1013        let (mgr, _temp_dir) = temp_state_manager();
1014        // Delete a state that doesn't exist — should succeed (idempotent)
1015        let result = mgr.delete_state("nonexistent-id");
1016        assert!(
1017            result.is_ok(),
1018            "delete_state must be idempotent for missing files"
1019        );
1020    }
1021}