Skip to main content

nucleus/container/
state.rs

1use crate::error::{NucleusError, Result};
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4use std::fs;
5use std::fs::OpenOptions;
6use std::io::Write;
7use std::os::unix::fs::{MetadataExt, OpenOptionsExt, PermissionsExt};
8use std::path::{Path, PathBuf};
9use std::time::SystemTime;
10use tracing::{debug, info, warn};
11
12/// OCI-compliant container status
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
14#[serde(rename_all = "lowercase")]
15pub enum OciStatus {
16    /// Container is being created
17    Creating,
18    /// Container has been created but not started
19    Created,
20    /// Container process is running
21    Running,
22    /// Container process has stopped
23    Stopped,
24}
25
26impl std::fmt::Display for OciStatus {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match self {
29            OciStatus::Creating => write!(f, "creating"),
30            OciStatus::Created => write!(f, "created"),
31            OciStatus::Running => write!(f, "running"),
32            OciStatus::Stopped => write!(f, "stopped"),
33        }
34    }
35}
36
37/// Container state tracking information
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct ContainerState {
40    /// Container ID (unique 32 hex chars, 128-bit)
41    pub id: String,
42
43    /// Container name (user-supplied or same as ID)
44    pub name: String,
45
46    /// PID of the container process
47    pub pid: u32,
48
49    /// Command being executed
50    pub command: Vec<String>,
51
52    /// Start time (Unix timestamp)
53    pub started_at: u64,
54
55    /// Memory limit in bytes (None = unlimited)
56    pub memory_limit: Option<u64>,
57
58    /// CPU limit in millicores (None = unlimited)
59    pub cpu_limit: Option<u64>,
60
61    /// Whether using gVisor runtime
62    pub using_gvisor: bool,
63
64    /// Whether using rootless mode
65    pub rootless: bool,
66
67    /// cgroup path
68    pub cgroup_path: Option<String>,
69
70    /// Desired topology config hash associated with this container, if any.
71    #[serde(default)]
72    pub config_hash: Option<u64>,
73
74    /// UID of the user who created this container
75    #[serde(default)]
76    pub creator_uid: u32,
77
78    /// Effective uid of the workload process inside the container.
79    #[serde(default)]
80    pub process_uid: u32,
81
82    /// Effective gid of the workload process inside the container.
83    #[serde(default)]
84    pub process_gid: u32,
85
86    /// Supplementary gids of the workload process inside the container.
87    #[serde(default)]
88    pub additional_gids: Vec<u32>,
89
90    /// Process start time in clock ticks (from /proc/`<pid>`/stat field 22)
91    /// Used to detect PID reuse in is_running()
92    #[serde(default)]
93    pub start_ticks: u64,
94
95    /// OCI container status
96    #[serde(default = "default_oci_status")]
97    pub status: OciStatus,
98
99    /// OCI bundle path
100    #[serde(default)]
101    pub bundle_path: Option<String>,
102
103    /// OCI annotations
104    #[serde(default)]
105    pub annotations: HashMap<String, String>,
106}
107
108fn default_oci_status() -> OciStatus {
109    OciStatus::Stopped
110}
111
112/// Parameters for creating a new `ContainerState`.
113pub struct ContainerStateParams {
114    pub id: String,
115    pub name: String,
116    pub pid: u32,
117    pub command: Vec<String>,
118    pub memory_limit: Option<u64>,
119    pub cpu_limit: Option<u64>,
120    pub using_gvisor: bool,
121    pub rootless: bool,
122    pub cgroup_path: Option<String>,
123    pub process_uid: u32,
124    pub process_gid: u32,
125    pub additional_gids: Vec<u32>,
126}
127
128impl ContainerState {
129    /// Create a new container state from the given parameters.
130    pub fn new(params: ContainerStateParams) -> Self {
131        let started_at = SystemTime::now()
132            .duration_since(SystemTime::UNIX_EPOCH)
133            .unwrap_or_default()
134            .as_secs();
135
136        let start_ticks = Self::read_start_ticks(params.pid);
137
138        Self {
139            id: params.id,
140            name: params.name,
141            pid: params.pid,
142            command: params.command,
143            started_at,
144            memory_limit: params.memory_limit,
145            cpu_limit: params.cpu_limit,
146            using_gvisor: params.using_gvisor,
147            rootless: params.rootless,
148            cgroup_path: params.cgroup_path,
149            config_hash: None,
150            creator_uid: nix::unistd::Uid::effective().as_raw(),
151            process_uid: params.process_uid,
152            process_gid: params.process_gid,
153            additional_gids: params.additional_gids,
154            start_ticks,
155            status: OciStatus::Creating,
156            bundle_path: None,
157            annotations: HashMap::new(),
158        }
159    }
160
161    /// Read the start time in clock ticks from /proc/<pid>/stat (field 22)
162    ///
163    /// BUG-09: After fork, /proc/<pid>/stat may not be immediately available.
164    /// Retry a few times with short sleeps to avoid returning 0 and breaking
165    /// PID-reuse detection in is_running().
166    fn read_start_ticks(pid: u32) -> u64 {
167        let stat_path = format!("/proc/{}/stat", pid);
168        for attempt in 0..5 {
169            if let Ok(content) = std::fs::read_to_string(&stat_path) {
170                if let Some(ticks) = Self::parse_start_ticks(&content) {
171                    return ticks;
172                }
173            }
174            if attempt < 4 {
175                std::thread::sleep(std::time::Duration::from_millis(1));
176            }
177        }
178        0
179    }
180
181    /// Parse start time (field 22) from /proc/<pid>/stat content
182    fn parse_start_ticks(content: &str) -> Option<u64> {
183        // Field 2 (comm) is in parens and may contain spaces; find last ')'
184        let after_comm = content.rfind(')')?;
185        // After ')' we have fields 3..N; field 22 is index 19 (22 - 3 = 19)
186        // Use nth() instead of collecting into a Vec to avoid a heap allocation
187        // on every liveness check.
188        content[after_comm + 2..]
189            .split_whitespace()
190            .nth(19)?
191            .parse()
192            .ok()
193    }
194
195    /// Check if the container process is still running
196    ///
197    /// Cross-checks PID start time to detect PID reuse after process exit.
198    /// Also returns false if the OCI status is `Stopped`.
199    pub fn is_running(&self) -> bool {
200        if self.status == OciStatus::Stopped {
201            return false;
202        }
203        let stat_path = format!("/proc/{}/stat", self.pid);
204        match std::fs::read_to_string(&stat_path) {
205            Ok(content) => {
206                if self.start_ticks == 0 {
207                    // PID existence alone is insufficient because the PID may have
208                    // been recycled since this state was recorded.
209                    return false;
210                }
211                Self::parse_start_ticks(&content)
212                    .map(|ticks| ticks == self.start_ticks)
213                    .unwrap_or(false)
214            }
215            Err(_) => false,
216        }
217    }
218
219    /// Return OCI runtime state as a JSON value
220    pub fn oci_state(&self) -> serde_json::Value {
221        let live_status = match self.status {
222            OciStatus::Running if !self.is_running() => "stopped",
223            OciStatus::Creating => "creating",
224            OciStatus::Created => "created",
225            OciStatus::Running => "running",
226            OciStatus::Stopped => "stopped",
227        };
228        serde_json::json!({
229            "ociVersion": "1.0.2",
230            "id": self.id,
231            "status": live_status,
232            "pid": if live_status == "stopped" { 0 } else { self.pid },
233            "bundle": self.bundle_path.as_deref().unwrap_or(""),
234            "annotations": self.annotations,
235        })
236    }
237
238    /// Get uptime in seconds
239    pub fn uptime(&self) -> u64 {
240        let now = SystemTime::now()
241            .duration_since(SystemTime::UNIX_EPOCH)
242            .unwrap_or_default()
243            .as_secs();
244        now.saturating_sub(self.started_at)
245    }
246}
247
248/// Container state manager
249///
250/// Manages persistent state of running containers
251pub struct ContainerStateManager {
252    state_dir: PathBuf,
253}
254
255impl ContainerStateManager {
256    /// Create a state manager rooted at an explicit directory, falling back to
257    /// default candidates if `root` is `None`.
258    pub fn new_with_root(root: Option<PathBuf>) -> Result<Self> {
259        if let Some(root) = root {
260            return Self::with_state_dir(root);
261        }
262        Self::new()
263    }
264
265    /// Create a new state manager
266    ///
267    /// Creates the state directory if it doesn't exist
268    pub fn new() -> Result<Self> {
269        let mut last_error = None;
270        for candidate in Self::default_state_dir_candidates() {
271            match Self::with_state_dir(candidate.clone()) {
272                Ok(manager) => return Ok(manager),
273                Err(err) => {
274                    debug!(
275                        path = ?candidate,
276                        error = %err,
277                        "State directory candidate unavailable, trying next fallback"
278                    );
279                    last_error = Some(err);
280                }
281            }
282        }
283
284        Err(last_error.unwrap_or_else(|| {
285            NucleusError::ConfigError("No usable state directory candidates found".to_string())
286        }))
287    }
288
289    /// Create a state manager rooted at an explicit directory.
290    pub fn with_state_dir(state_dir: PathBuf) -> Result<Self> {
291        Self::reject_symlink_path(&state_dir)?;
292
293        // Create state directory if it doesn't exist (idempotent)
294        fs::create_dir_all(&state_dir).map_err(|e| {
295            NucleusError::ConfigError(format!(
296                "Failed to create state directory {:?}: {}",
297                state_dir, e
298            ))
299        })?;
300        Self::reject_symlink_path(&state_dir)?;
301        Self::ensure_secure_state_dir_permissions(&state_dir)?;
302        Self::ensure_state_dir_writable(&state_dir)?;
303
304        Ok(Self { state_dir })
305    }
306
307    fn reject_symlink_path(state_dir: &Path) -> Result<()> {
308        match fs::symlink_metadata(state_dir) {
309            Ok(metadata) if metadata.file_type().is_symlink() => {
310                Err(NucleusError::ConfigError(format!(
311                    "Refusing symlink state directory path {:?}; use a real directory",
312                    state_dir
313                )))
314            }
315            Ok(_) | Err(_) => Ok(()),
316        }
317    }
318
319    fn ensure_secure_state_dir_permissions(state_dir: &Path) -> Result<()> {
320        match fs::set_permissions(state_dir, fs::Permissions::from_mode(0o700)) {
321            Ok(()) => Ok(()),
322            Err(e)
323                if matches!(
324                    e.raw_os_error(),
325                    Some(libc::EROFS) | Some(libc::EPERM) | Some(libc::EACCES)
326                ) =>
327            {
328                let metadata = fs::metadata(state_dir).map_err(|meta_err| {
329                    NucleusError::ConfigError(format!(
330                        "Failed to secure state directory permissions {:?}: {} (and could not \
331                         inspect existing permissions: {})",
332                        state_dir, e, meta_err
333                    ))
334                })?;
335
336                let mode = metadata.permissions().mode() & 0o777;
337                let owner = metadata.uid();
338                let current_uid = nix::unistd::Uid::effective().as_raw();
339                let is_owner_ok = owner == current_uid || nix::unistd::Uid::effective().is_root();
340                let is_mode_ok = mode & 0o077 == 0;
341
342                if is_owner_ok && is_mode_ok {
343                    debug!(
344                        path = ?state_dir,
345                        mode = format!("{:o}", mode),
346                        owner,
347                        "State directory already has secure permissions; skipping chmod failure"
348                    );
349                    Ok(())
350                } else {
351                    Err(NucleusError::ConfigError(format!(
352                        "Failed to secure state directory permissions {:?}: {} (existing mode \
353                         {:o}, owner uid {})",
354                        state_dir, e, mode, owner
355                    )))
356                }
357            }
358            Err(e) => Err(NucleusError::ConfigError(format!(
359                "Failed to secure state directory permissions {:?}: {}",
360                state_dir, e
361            ))),
362        }
363    }
364
365    fn ensure_state_dir_writable(state_dir: &Path) -> Result<()> {
366        let probe_name = format!(
367            ".nucleus-write-test-{}-{}",
368            std::process::id(),
369            SystemTime::now()
370                .duration_since(SystemTime::UNIX_EPOCH)
371                .unwrap_or_default()
372                .as_nanos()
373        );
374        let probe_path = state_dir.join(probe_name);
375
376        let file = OpenOptions::new()
377            .write(true)
378            .create_new(true)
379            .mode(0o600)
380            .open(&probe_path)
381            .map_err(|e| {
382                NucleusError::ConfigError(format!(
383                    "State directory {:?} is not writable: {}",
384                    state_dir, e
385                ))
386            })?;
387        drop(file);
388
389        fs::remove_file(&probe_path).map_err(|e| {
390            NucleusError::ConfigError(format!(
391                "Failed to cleanup state directory probe {:?}: {}",
392                probe_path, e
393            ))
394        })?;
395
396        Ok(())
397    }
398
399    /// Get ordered default state directory candidates.
400    fn default_state_dir_candidates() -> Vec<PathBuf> {
401        if let Some(path) = std::env::var_os("NUCLEUS_STATE_DIR").filter(|p| !p.is_empty()) {
402            return vec![PathBuf::from(path)];
403        }
404
405        if nix::unistd::Uid::effective().is_root() {
406            vec![PathBuf::from("/var/run/nucleus")]
407        } else {
408            let mut candidates = Vec::new();
409
410            if let Some(dir) = dirs::runtime_dir() {
411                candidates.push(dir.join("nucleus"));
412            }
413            if let Some(dir) = dirs::data_local_dir() {
414                candidates.push(dir.join("nucleus"));
415            }
416            if let Some(dir) = dirs::home_dir() {
417                candidates.push(dir.join(".nucleus"));
418            }
419
420            // Final fallback for restricted sandboxes where standard runtime/home
421            // paths are mounted read-only. Use a private directory under /tmp
422            // with O_NOFOLLOW semantics to prevent symlink attacks.
423            //
424            // We avoid the TOCTOU pattern of .exists() then create_dir_all()
425            // by attempting mkdir atomically and validating the result.
426            let uid = nix::unistd::Uid::effective().as_raw();
427            let fallback = PathBuf::from(format!("/tmp/nucleus-{}", uid));
428            let fallback_ok = match std::fs::create_dir(&fallback) {
429                Ok(()) => {
430                    // We created it – it's ours with correct ownership.
431                    true
432                }
433                Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
434                    // Already exists: validate it is not a symlink and is owned by us.
435                    // symlink_metadata (lstat) does not follow symlinks.
436                    use std::os::unix::fs::MetadataExt;
437                    match std::fs::symlink_metadata(&fallback) {
438                        Ok(meta) => {
439                            if meta.file_type().is_symlink() {
440                                tracing::warn!(
441                                    "Skipping {} – it is a symlink (possible attack)",
442                                    fallback.display()
443                                );
444                                false
445                            } else if meta.uid() != uid {
446                                tracing::warn!(
447                                    "Skipping {} – owned by UID {} not {}",
448                                    fallback.display(),
449                                    meta.uid(),
450                                    uid
451                                );
452                                false
453                            } else {
454                                true
455                            }
456                        }
457                        Err(e) => {
458                            tracing::warn!("Skipping {} – cannot stat: {}", fallback.display(), e);
459                            false
460                        }
461                    }
462                }
463                Err(_) => {
464                    // Cannot create (e.g. /tmp read-only) – skip this candidate.
465                    false
466                }
467            };
468            if fallback_ok {
469                candidates.push(fallback);
470            }
471
472            candidates
473        }
474    }
475
476    /// Validate a container ID for safe filesystem use
477    fn validate_container_id(container_id: &str) -> Result<()> {
478        if container_id.is_empty() {
479            return Err(NucleusError::ConfigError(
480                "Container ID cannot be empty".to_string(),
481            ));
482        }
483
484        if !container_id
485            .chars()
486            .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
487        {
488            return Err(NucleusError::ConfigError(format!(
489                "Invalid container ID (allowed: a-zA-Z0-9_-): {}",
490                container_id
491            )));
492        }
493
494        Ok(())
495    }
496
497    fn state_file_path(&self, container_id: &str) -> Result<PathBuf> {
498        Self::validate_container_id(container_id)?;
499        Ok(self.state_dir.join(format!("{}.json", container_id)))
500    }
501
502    /// Return the path to the exec FIFO used for two-phase create/start.
503    pub fn exec_fifo_path(&self, container_id: &str) -> Result<PathBuf> {
504        Self::validate_container_id(container_id)?;
505        Ok(self.state_dir.join(format!("{}.exec", container_id)))
506    }
507
508    /// Resolve a container reference by exact ID, name, or ID prefix
509    pub fn resolve_container(&self, reference: &str) -> Result<ContainerState> {
510        let states = self.list_states()?;
511
512        // Try exact ID match
513        if let Some(state) = states.iter().find(|s| s.id == reference) {
514            return Ok(state.clone());
515        }
516
517        // Try exact name match (must be unambiguous)
518        let name_matches: Vec<&ContainerState> =
519            states.iter().filter(|s| s.name == reference).collect();
520        match name_matches.len() {
521            1 => return Ok(name_matches[0].clone()),
522            n if n > 1 => {
523                return Err(NucleusError::AmbiguousContainer(format!(
524                    "Name '{}' matches {} containers; use container ID instead",
525                    reference, n
526                )))
527            }
528            _ => {}
529        }
530
531        // Try ID prefix match
532        let prefix_matches: Vec<&ContainerState> = states
533            .iter()
534            .filter(|s| s.id.starts_with(reference))
535            .collect();
536
537        match prefix_matches.len() {
538            0 => Err(NucleusError::ContainerNotFound(reference.to_string())),
539            1 => Ok(prefix_matches[0].clone()),
540            _ => Err(NucleusError::AmbiguousContainer(format!(
541                "'{}' matches {} containers",
542                reference,
543                prefix_matches.len()
544            ))),
545        }
546    }
547
548    /// Save container state
549    pub fn save_state(&self, state: &ContainerState) -> Result<()> {
550        let path = self.state_file_path(&state.id)?;
551        let tmp_path = self.state_dir.join(format!("{}.json.tmp", state.id));
552        let json = serde_json::to_string_pretty(state).map_err(|e| {
553            NucleusError::ConfigError(format!("Failed to serialize container state: {}", e))
554        })?;
555
556        // O_NOFOLLOW prevents TOCTOU symlink attacks: if an attacker replaces
557        // the temp path with a symlink between check and open, the open fails
558        // instead of following the symlink to an attacker-controlled location.
559        let mut file = OpenOptions::new()
560            .create(true)
561            .truncate(true)
562            .write(true)
563            .mode(0o600)
564            .custom_flags(libc::O_NOFOLLOW)
565            .open(&tmp_path)
566            .map_err(|e| {
567                NucleusError::ConfigError(format!(
568                    "Failed to open temp state file {:?}: {}",
569                    tmp_path, e
570                ))
571            })?;
572
573        file.write_all(json.as_bytes()).map_err(|e| {
574            NucleusError::ConfigError(format!("Failed to write state file {:?}: {}", tmp_path, e))
575        })?;
576        file.sync_all().map_err(|e| {
577            NucleusError::ConfigError(format!("Failed to sync state file {:?}: {}", tmp_path, e))
578        })?;
579
580        fs::rename(&tmp_path, &path).map_err(|e| {
581            NucleusError::ConfigError(format!(
582                "Failed to atomically replace state file {:?}: {}",
583                path, e
584            ))
585        })?;
586
587        debug!("Saved container state: {}", state.id);
588        Ok(())
589    }
590
591    /// Read a file with O_NOFOLLOW to prevent symlink attacks.
592    pub fn read_file_nofollow(
593        path: &std::path::Path,
594    ) -> std::result::Result<String, std::io::Error> {
595        use std::io::Read;
596        let file = OpenOptions::new()
597            .read(true)
598            .custom_flags(libc::O_NOFOLLOW)
599            .open(path)?;
600        let mut buf = String::new();
601        std::io::BufReader::new(file).read_to_string(&mut buf)?;
602        Ok(buf)
603    }
604
605    /// Load container state
606    ///
607    /// Opens with O_NOFOLLOW to prevent symlink-based TOCTOU attacks.
608    pub fn load_state(&self, container_id: &str) -> Result<ContainerState> {
609        let path = self.state_file_path(container_id)?;
610
611        let json = Self::read_file_nofollow(&path).map_err(|e| {
612            NucleusError::ConfigError(format!("Failed to read state file {:?}: {}", path, e))
613        })?;
614
615        let state = serde_json::from_str(&json).map_err(|e| {
616            NucleusError::ConfigError(format!("Failed to parse container state: {}", e))
617        })?;
618
619        Ok(state)
620    }
621
622    /// Delete container state
623    pub fn delete_state(&self, container_id: &str) -> Result<()> {
624        let path = self.state_file_path(container_id)?;
625
626        match fs::remove_file(&path) {
627            Ok(()) => {
628                debug!("Deleted container state: {}", container_id);
629            }
630            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
631                // Already deleted – idempotent (handles TOCTOU race)
632                debug!("Container state already deleted: {}", container_id);
633            }
634            Err(e) => {
635                return Err(NucleusError::ConfigError(format!(
636                    "Failed to delete state file {:?}: {}",
637                    path, e
638                )));
639            }
640        }
641
642        Ok(())
643    }
644
645    /// List all container states
646    pub fn list_states(&self) -> Result<Vec<ContainerState>> {
647        let mut states = Vec::new();
648
649        let entries = fs::read_dir(&self.state_dir).map_err(|e| {
650            NucleusError::ConfigError(format!(
651                "Failed to read state directory {:?}: {}",
652                self.state_dir, e
653            ))
654        })?;
655
656        for entry in entries {
657            let entry = entry.map_err(|e| {
658                NucleusError::ConfigError(format!("Failed to read directory entry: {}", e))
659            })?;
660
661            let path = entry.path();
662            if path.extension().and_then(|s| s.to_str()) == Some("json") {
663                // Use O_NOFOLLOW to prevent symlink attacks, consistent with
664                // load_state/save_state. Without this, a symlink in the state
665                // directory could be used as a file-read oracle.
666                match Self::read_file_nofollow(&path) {
667                    Ok(json) => match serde_json::from_str::<ContainerState>(&json) {
668                        Ok(state) => states.push(state),
669                        Err(e) => {
670                            warn!("Failed to parse state file {:?}: {}", path, e);
671                        }
672                    },
673                    Err(e) => {
674                        warn!("Failed to read state file {:?}: {}", path, e);
675                    }
676                }
677            }
678        }
679
680        Ok(states)
681    }
682
683    /// List only running containers
684    pub fn list_running(&self) -> Result<Vec<ContainerState>> {
685        let states = self.list_states()?;
686        Ok(states.into_iter().filter(|s| s.is_running()).collect())
687    }
688
689    /// Clean up stale state files (for containers that are no longer running)
690    pub fn cleanup_stale(&self) -> Result<()> {
691        let states = self.list_states()?;
692
693        for state in states {
694            if !state.is_running() {
695                info!(
696                    "Cleaning up stale state for container {} (PID {})",
697                    state.id, state.pid
698                );
699                self.delete_state(&state.id)?;
700            }
701        }
702
703        Ok(())
704    }
705}
706
707#[cfg(test)]
708mod tests {
709    use super::*;
710    use tempfile::TempDir;
711
712    fn temp_state_manager() -> (ContainerStateManager, TempDir) {
713        let temp_dir = TempDir::new().unwrap();
714        let mgr = ContainerStateManager {
715            state_dir: temp_dir.path().to_path_buf(),
716        };
717        (mgr, temp_dir)
718    }
719
720    #[test]
721    fn test_container_state_new() {
722        let state = ContainerState::new(ContainerStateParams {
723            id: "test".to_string(),
724            name: "test".to_string(),
725            pid: 1234,
726            command: vec!["/bin/sh".to_string()],
727            memory_limit: Some(512 * 1024 * 1024),
728            cpu_limit: Some(2000),
729            using_gvisor: false,
730            rootless: false,
731            cgroup_path: Some("/sys/fs/cgroup/nucleus-test".to_string()),
732            process_uid: 0,
733            process_gid: 0,
734            additional_gids: Vec::new(),
735        });
736
737        assert_eq!(state.id, "test");
738        assert_eq!(state.pid, 1234);
739        assert_eq!(state.memory_limit, Some(512 * 1024 * 1024));
740        assert_eq!(state.cpu_limit, Some(2000));
741        assert_eq!(state.creator_uid, nix::unistd::Uid::effective().as_raw());
742    }
743
744    #[test]
745    fn test_save_and_load_state() {
746        let (mgr, _temp_dir) = temp_state_manager();
747
748        let state = ContainerState::new(ContainerStateParams {
749            id: "test".to_string(),
750            name: "test".to_string(),
751            pid: 1234,
752            command: vec!["/bin/sh".to_string()],
753            memory_limit: Some(512 * 1024 * 1024),
754            cpu_limit: None,
755            using_gvisor: false,
756            rootless: false,
757            cgroup_path: None,
758            process_uid: 0,
759            process_gid: 0,
760            additional_gids: Vec::new(),
761        });
762
763        mgr.save_state(&state).unwrap();
764
765        let loaded = mgr.load_state("test").unwrap();
766        assert_eq!(loaded.id, state.id);
767        assert_eq!(loaded.pid, state.pid);
768        assert_eq!(loaded.command, state.command);
769    }
770
771    #[test]
772    fn test_delete_state() {
773        let (mgr, _temp_dir) = temp_state_manager();
774
775        let state = ContainerState::new(ContainerStateParams {
776            id: "test".to_string(),
777            name: "test".to_string(),
778            pid: 1234,
779            command: vec!["/bin/sh".to_string()],
780            memory_limit: None,
781            cpu_limit: None,
782            using_gvisor: false,
783            rootless: false,
784            cgroup_path: None,
785            process_uid: 0,
786            process_gid: 0,
787            additional_gids: Vec::new(),
788        });
789
790        mgr.save_state(&state).unwrap();
791        assert!(mgr.load_state("test").is_ok());
792
793        mgr.delete_state("test").unwrap();
794        assert!(mgr.load_state("test").is_err());
795    }
796
797    #[test]
798    fn test_list_states() {
799        let (mgr, _temp_dir) = temp_state_manager();
800
801        let state1 = ContainerState::new(ContainerStateParams {
802            id: "test1".to_string(),
803            name: "test1".to_string(),
804            pid: 1234,
805            command: vec!["/bin/sh".to_string()],
806            memory_limit: None,
807            cpu_limit: None,
808            using_gvisor: false,
809            rootless: false,
810            cgroup_path: None,
811            process_uid: 0,
812            process_gid: 0,
813            additional_gids: Vec::new(),
814        });
815
816        let state2 = ContainerState::new(ContainerStateParams {
817            id: "test2".to_string(),
818            name: "test2".to_string(),
819            pid: 5678,
820            command: vec!["/bin/bash".to_string()],
821            memory_limit: None,
822            cpu_limit: None,
823            using_gvisor: false,
824            rootless: false,
825            cgroup_path: None,
826            process_uid: 0,
827            process_gid: 0,
828            additional_gids: Vec::new(),
829        });
830
831        mgr.save_state(&state1).unwrap();
832        mgr.save_state(&state2).unwrap();
833
834        let states = mgr.list_states().unwrap();
835        assert_eq!(states.len(), 2);
836    }
837
838    #[test]
839    fn test_resolve_container_by_id() {
840        let (mgr, _temp_dir) = temp_state_manager();
841
842        let state = ContainerState::new(ContainerStateParams {
843            id: "abc123def456".to_string(),
844            name: "mycontainer".to_string(),
845            pid: 1234,
846            command: vec!["/bin/sh".to_string()],
847            memory_limit: None,
848            cpu_limit: None,
849            using_gvisor: false,
850            rootless: false,
851            cgroup_path: None,
852            process_uid: 0,
853            process_gid: 0,
854            additional_gids: Vec::new(),
855        });
856        mgr.save_state(&state).unwrap();
857
858        // Exact ID
859        let resolved = mgr.resolve_container("abc123def456").unwrap();
860        assert_eq!(resolved.id, "abc123def456");
861
862        // Name
863        let resolved = mgr.resolve_container("mycontainer").unwrap();
864        assert_eq!(resolved.id, "abc123def456");
865
866        // ID prefix
867        let resolved = mgr.resolve_container("abc123").unwrap();
868        assert_eq!(resolved.id, "abc123def456");
869
870        // Not found
871        assert!(mgr.resolve_container("nonexistent").is_err());
872    }
873
874    #[test]
875    fn test_load_state_rejects_symlink() {
876        // H-3: O_NOFOLLOW must prevent loading state through a symlink
877        let (mgr, temp_dir) = temp_state_manager();
878
879        // Create a real state file
880        let state = ContainerState::new(ContainerStateParams {
881            id: "real".to_string(),
882            name: "real".to_string(),
883            pid: 1234,
884            command: vec!["/bin/sh".to_string()],
885            memory_limit: None,
886            cpu_limit: None,
887            using_gvisor: false,
888            rootless: false,
889            cgroup_path: None,
890            process_uid: 0,
891            process_gid: 0,
892            additional_gids: Vec::new(),
893        });
894        mgr.save_state(&state).unwrap();
895
896        // Create a symlink pointing to the real state file
897        let symlink_path = temp_dir.path().join("symlinked.json");
898        let real_path = temp_dir.path().join("real.json");
899        std::os::unix::fs::symlink(&real_path, &symlink_path).unwrap();
900
901        // Loading through the symlink ID must fail (O_NOFOLLOW)
902        let result = mgr.load_state("symlinked");
903        assert!(result.is_err(), "load_state must reject symlinks");
904    }
905
906    #[test]
907    fn test_list_states_ignores_symlinks() {
908        // list_states must use O_NOFOLLOW, so symlinked state files are skipped
909        // rather than followed (which would be a file-read oracle).
910        let (mgr, temp_dir) = temp_state_manager();
911
912        // Create a real state file
913        let state = ContainerState::new(ContainerStateParams {
914            id: "real123456789012345678".to_string(),
915            name: "real".to_string(),
916            pid: 1234,
917            command: vec!["/bin/sh".to_string()],
918            memory_limit: None,
919            cpu_limit: None,
920            using_gvisor: false,
921            rootless: false,
922            cgroup_path: None,
923            process_uid: 0,
924            process_gid: 0,
925            additional_gids: Vec::new(),
926        });
927        mgr.save_state(&state).unwrap();
928
929        // Create a symlink masquerading as a state file
930        let real_path = temp_dir.path().join("real123456789012345678.json");
931        let symlink_path = temp_dir.path().join("evil.json");
932        std::os::unix::fs::symlink(&real_path, &symlink_path).unwrap();
933
934        // list_states should only return the real file, not follow the symlink
935        let states = mgr.list_states().unwrap();
936        // The symlink should fail to open with O_NOFOLLOW, leaving only the real state
937        assert_eq!(states.len(), 1, "symlinked state file must be skipped");
938        assert_eq!(states[0].id, "real123456789012345678");
939    }
940
941    #[test]
942    fn test_save_state_rejects_symlink_tmp() {
943        // H-3: O_NOFOLLOW on save must prevent writing through a symlink
944        let (mgr, temp_dir) = temp_state_manager();
945
946        let state = ContainerState::new(ContainerStateParams {
947            id: "target".to_string(),
948            name: "target".to_string(),
949            pid: 1234,
950            command: vec!["/bin/sh".to_string()],
951            memory_limit: None,
952            cpu_limit: None,
953            using_gvisor: false,
954            rootless: false,
955            cgroup_path: None,
956            process_uid: 0,
957            process_gid: 0,
958            additional_gids: Vec::new(),
959        });
960
961        // Pre-create a symlink at the temp path to simulate an attack
962        let tmp_path = temp_dir.path().join("target.json.tmp");
963        let evil_path = temp_dir.path().join("evil");
964        std::os::unix::fs::symlink(&evil_path, &tmp_path).unwrap();
965
966        // save_state should fail because O_NOFOLLOW rejects the symlink
967        let result = mgr.save_state(&state);
968        assert!(
969            result.is_err(),
970            "save_state must reject symlinks at tmp path"
971        );
972    }
973
974    #[test]
975    fn test_is_running_returns_false_when_start_ticks_is_zero() {
976        // BUG-04: When start_ticks=0 (failed to read), is_running() must return
977        // false to avoid PID reuse false positives, not fall back to existence check
978        let mut state = ContainerState::new(ContainerStateParams {
979            id: "test".to_string(),
980            name: "test".to_string(),
981            pid: std::process::id(), // our PID exists in /proc
982            command: vec!["/bin/sh".to_string()],
983            memory_limit: None,
984            cpu_limit: None,
985            using_gvisor: false,
986            rootless: false,
987            cgroup_path: None,
988            process_uid: 0,
989            process_gid: 0,
990            additional_gids: Vec::new(),
991        });
992        // Force start_ticks to 0 to simulate failed read
993        state.start_ticks = 0;
994        // With BUG-04 present, this returns true (falls back to existence check)
995        // After fix, must return false
996        assert!(
997            !state.is_running(),
998            "is_running() must return false when start_ticks=0 (cannot verify PID identity)"
999        );
1000    }
1001
1002    #[test]
1003    fn test_read_start_ticks_retries_on_failure() {
1004        // BUG-09: read_start_ticks must retry when /proc/<pid>/stat is temporarily
1005        // unavailable after fork, instead of immediately returning 0.
1006        // Verify by calling with our own PID (should succeed) and a non-existent
1007        // PID (should return 0 after retries, not panic).
1008        let own_ticks = ContainerState::read_start_ticks(std::process::id());
1009        assert!(
1010            own_ticks > 0,
1011            "read_start_ticks must return non-zero for a live process"
1012        );
1013        // Non-existent PID should gracefully return 0 (after retries)
1014        let bogus_ticks = ContainerState::read_start_ticks(u32::MAX);
1015        assert_eq!(
1016            bogus_ticks, 0,
1017            "read_start_ticks must return 0 for non-existent PID"
1018        );
1019    }
1020
1021    #[test]
1022    fn test_delete_state_handles_already_deleted() {
1023        // BUG-16: delete_state must not fail if file was already deleted (TOCTOU)
1024        let (mgr, _temp_dir) = temp_state_manager();
1025        // Delete a state that doesn't exist – should succeed (idempotent)
1026        let result = mgr.delete_state("nonexistent-id");
1027        assert!(
1028            result.is_ok(),
1029            "delete_state must be idempotent for missing files"
1030        );
1031    }
1032}