zinit 0.1.0

Process supervisor with dependency management
Documentation
//! Service state machine definitions.
//!
//! Defines the 7 explicit states a service can be in, along with failure reasons.

use serde::{Deserialize, Serialize};

use super::signal;

/// Reason why a service failed.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum FailureReason {
    /// Process exited with non-zero code.
    ExitCode { code: i32 },
    /// Process was killed by a signal.
    Signal { signal: i32 },
    /// Process did not start within timeout.
    StartTimeout,
    /// Process did not stop within timeout after SIGTERM.
    StopTimeout,
    /// Health check failed after retries.
    HealthCheckFailed { attempts: u32 },
    /// A required dependency failed.
    DependencyFailed { service: String },
    /// Failed to spawn process.
    SpawnError { message: String },
    /// Configuration error - a required dependency doesn't exist.
    MissingDependency { dependency: String },
}

impl FailureReason {
    /// Returns a human-readable description of the failure.
    pub fn display(&self) -> String {
        match self {
            FailureReason::ExitCode { code } => format!("exited with code {}", code),
            FailureReason::Signal { signal: sig } => {
                format!("killed by {} ({})", signal::name(*sig), sig)
            }
            FailureReason::StartTimeout => "start timeout".to_string(),
            FailureReason::StopTimeout => "stop timeout".to_string(),
            FailureReason::HealthCheckFailed { attempts } => {
                format!("health check failed after {} attempts", attempts)
            }
            FailureReason::DependencyFailed { service } => {
                format!("dependency '{}' failed", service)
            }
            FailureReason::SpawnError { message } => format!("spawn error: {}", message),
            FailureReason::MissingDependency { dependency } => {
                format!("missing dependency '{}'", dependency)
            }
        }
    }
}

impl std::fmt::Display for FailureReason {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.display())
    }
}

/// The state of a service in the supervisor.
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "state", rename_all = "snake_case")]
pub enum ServiceState {
    /// Service has never been started.
    #[default]
    Inactive,
    /// Service is waiting on dependencies.
    Blocked { waiting_on: Vec<String> },
    /// Process has been spawned, waiting for health check or startup.
    Starting { pid: u32 },
    /// Process is running and healthy.
    Running { pid: u32 },
    /// SIGTERM sent, waiting for process to exit.
    Stopping { pid: u32 },
    /// Process exited cleanly.
    Exited { exit_code: Option<i32> },
    /// Process failed.
    Failed { reason: FailureReason },
}

impl ServiceState {
    /// Returns the state name as a string.
    pub fn name(&self) -> &'static str {
        match self {
            ServiceState::Inactive => "inactive",
            ServiceState::Blocked { .. } => "blocked",
            ServiceState::Starting { .. } => "starting",
            ServiceState::Running { .. } => "running",
            ServiceState::Stopping { .. } => "stopping",
            ServiceState::Exited { .. } => "exited",
            ServiceState::Failed { .. } => "failed",
        }
    }

    /// Returns a symbol representing the state for display.
    pub fn symbol(&self) -> &'static str {
        match self {
            ServiceState::Inactive => "[-]",
            ServiceState::Blocked { .. } => "[?]",
            ServiceState::Starting { .. } => "[>]",
            ServiceState::Running { .. } => "[+]",
            ServiceState::Stopping { .. } => "[!]",
            ServiceState::Exited { .. } => "[.]",
            ServiceState::Failed { .. } => "[X]",
        }
    }

    /// Returns the PID if the service has a running process.
    pub fn pid(&self) -> Option<u32> {
        match self {
            ServiceState::Starting { pid }
            | ServiceState::Running { pid }
            | ServiceState::Stopping { pid } => Some(*pid),
            _ => None,
        }
    }

    /// Returns true if the service has an active process.
    pub fn is_active(&self) -> bool {
        matches!(
            self,
            ServiceState::Starting { .. }
                | ServiceState::Running { .. }
                | ServiceState::Stopping { .. }
        )
    }

    /// Returns true if the service can satisfy a "requires" dependency.
    /// A service satisfies requires if it's Running OR exited successfully (exit code 0).
    /// This allows oneshot services to satisfy dependencies after they complete.
    pub fn is_satisfied(&self) -> bool {
        matches!(
            self,
            ServiceState::Running { .. } | ServiceState::Exited { exit_code: Some(0) }
        )
    }

    /// Returns true if the service is in a state where start can be attempted.
    /// Note: Services that failed due to MissingDependency cannot be restarted
    /// because the dependency will never appear - it's a permanent config error.
    pub fn can_attempt_start(&self) -> bool {
        match self {
            ServiceState::Inactive | ServiceState::Blocked { .. } | ServiceState::Exited { .. } => {
                true
            }
            ServiceState::Failed { reason } => {
                // MissingDependency is a permanent config error - can't retry
                !matches!(reason, FailureReason::MissingDependency { .. })
            }
            _ => false,
        }
    }
}

impl std::fmt::Display for ServiceState {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.name())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_state_names() {
        assert_eq!(ServiceState::Inactive.name(), "inactive");
        assert_eq!(
            ServiceState::Blocked { waiting_on: vec![] }.name(),
            "blocked"
        );
        assert_eq!(ServiceState::Starting { pid: 1 }.name(), "starting");
        assert_eq!(ServiceState::Running { pid: 1 }.name(), "running");
        assert_eq!(ServiceState::Stopping { pid: 1 }.name(), "stopping");
        assert_eq!(ServiceState::Exited { exit_code: None }.name(), "exited");
        assert_eq!(
            ServiceState::Failed {
                reason: FailureReason::StartTimeout
            }
            .name(),
            "failed"
        );
    }

    #[test]
    fn test_state_symbols() {
        assert_eq!(ServiceState::Inactive.symbol(), "[-]");
        assert_eq!(ServiceState::Running { pid: 1 }.symbol(), "[+]");
        assert_eq!(
            ServiceState::Failed {
                reason: FailureReason::StartTimeout
            }
            .symbol(),
            "[X]"
        );
    }

    #[test]
    fn test_pid_extraction() {
        assert_eq!(ServiceState::Inactive.pid(), None);
        assert_eq!(ServiceState::Starting { pid: 123 }.pid(), Some(123));
        assert_eq!(ServiceState::Running { pid: 456 }.pid(), Some(456));
        assert_eq!(ServiceState::Stopping { pid: 789 }.pid(), Some(789));
        assert_eq!(ServiceState::Exited { exit_code: Some(0) }.pid(), None);
    }

    #[test]
    fn test_is_active() {
        assert!(!ServiceState::Inactive.is_active());
        assert!(ServiceState::Starting { pid: 1 }.is_active());
        assert!(ServiceState::Running { pid: 1 }.is_active());
        assert!(ServiceState::Stopping { pid: 1 }.is_active());
        assert!(!ServiceState::Exited { exit_code: None }.is_active());
    }

    #[test]
    fn test_is_satisfied() {
        assert!(!ServiceState::Inactive.is_satisfied());
        assert!(!ServiceState::Starting { pid: 1 }.is_satisfied());
        assert!(ServiceState::Running { pid: 1 }.is_satisfied());
        assert!(!ServiceState::Stopping { pid: 1 }.is_satisfied());
    }

    #[test]
    fn test_can_attempt_start() {
        assert!(ServiceState::Inactive.can_attempt_start());
        assert!(!ServiceState::Running { pid: 1 }.can_attempt_start());
        assert!(ServiceState::Exited { exit_code: Some(0) }.can_attempt_start());
        // Regular failures can retry
        assert!(
            ServiceState::Failed {
                reason: FailureReason::StartTimeout
            }
            .can_attempt_start()
        );
        assert!(
            ServiceState::Failed {
                reason: FailureReason::ExitCode { code: 1 }
            }
            .can_attempt_start()
        );
        // MissingDependency is permanent - cannot retry
        assert!(
            !ServiceState::Failed {
                reason: FailureReason::MissingDependency {
                    dependency: "missing".to_string()
                }
            }
            .can_attempt_start()
        );
    }

    #[test]
    fn test_failure_reason_display() {
        assert_eq!(
            FailureReason::ExitCode { code: 1 }.display(),
            "exited with code 1"
        );
        assert_eq!(FailureReason::StartTimeout.display(), "start timeout");
        assert_eq!(
            FailureReason::DependencyFailed {
                service: "foo".to_string()
            }
            .display(),
            "dependency 'foo' failed"
        );
    }

    #[test]
    fn test_serialization() {
        let state = ServiceState::Running { pid: 123 };
        let json = serde_json::to_string(&state).unwrap();
        assert!(json.contains("running"));
        assert!(json.contains("123"));

        let parsed: ServiceState = serde_json::from_str(&json).unwrap();
        assert_eq!(parsed, state);
    }
}