zinit 0.1.0

Process supervisor with dependency management
Documentation
//! Builtin service handlers (sysvol, etc).

use crate::sdk::{FailureReason, ServiceConfig, ServiceState};
use crate::server::sysvol;

use crate::server::graph::ServiceId;
use crate::server::log;

use super::Supervisor;
use super::events::SupervisorEvent;

/// Prefix for builtin services that run code instead of spawning processes.
pub(crate) const BUILTIN_PREFIX: &str = "@builtin:";

impl Supervisor {
    /// Check if a service config uses a builtin handler.
    /// Returns the builtin name if the exec starts with "@builtin:".
    pub(crate) fn is_builtin_service(config: &ServiceConfig) -> Option<&str> {
        config.service.exec.strip_prefix(BUILTIN_PREFIX)
    }

    /// Run a builtin service (instead of spawning a process).
    /// SAFETY: Builtins that touch hardware only run in PID1 mode.
    pub(crate) async fn run_builtin_service(
        &mut self,
        id: ServiceId,
        builtin: &str,
        config: &ServiceConfig,
    ) {
        let name = config.service.name.clone();

        // SAFETY GATE: Only run hardware-touching builtins in PID1 mode
        if !self.pid1_mode {
            tracing::warn!(
                service = %name,
                builtin = builtin,
                "builtin service skipped (not in PID1 mode)"
            );
            // No-op success - let dependents proceed
            let dependents = {
                let mut graph = self.graph.write().await;
                if let Some(service) = graph.get_mut(id) {
                    service.state = ServiceState::Running { pid: 0 };
                }
                graph.dependents(id)
            };
            self.queue_reevaluate(dependents).await;
            return;
        }

        match builtin {
            "sysvol" => self.run_sysvol(id, &name).await,
            _ => {
                tracing::error!(builtin = builtin, "unknown builtin service");
                let mut graph = self.graph.write().await;
                graph.set_state(
                    id,
                    ServiceState::Failed {
                        reason: FailureReason::SpawnError {
                            message: format!("unknown builtin: {}", builtin),
                        },
                    },
                );
            }
        }
    }

    /// Run the sysvol builtin (storage initialization).
    pub(crate) async fn run_sysvol(&mut self, id: ServiceId, name: &str) {
        // Set Starting state
        {
            let mut graph = self.graph.write().await;
            if let Some(service) = graph.get_mut(id) {
                service.record_started();
                service.state = ServiceState::Starting { pid: 0 };
            }
        }

        // Initialize log buffer for this builtin
        log::init_buffer(&self.log_buffers, id, 100).await;

        let event_tx = self.event_tx.clone();
        let log_buffers = self.log_buffers.clone();
        let service_name = name.to_string();

        tokio::spawn(async move {
            // Helper to log to the service's buffer
            let log_line = |msg: &str| crate::sdk::LogLine {
                timestamp_ms: std::time::SystemTime::now()
                    .duration_since(std::time::UNIX_EPOCH)
                    .map(|d| d.as_millis() as u64)
                    .unwrap_or(0),
                service: service_name.clone(),
                stream: crate::sdk::LogStream::Stdout,
                content: msg.to_string(),
            };

            // Log start
            {
                let mut buffers = log_buffers.write().await;
                if let Some(buf) = buffers.get_mut(&id) {
                    buf.push(log_line("sysvol: starting storage initialization..."));
                }
            }

            let result = tokio::task::spawn_blocking(sysvol::init).await;

            let (success, error_msg) = match result {
                Ok(Ok(state)) => {
                    match state {
                        sysvol::StorageState::Mounted { device, .. } => {
                            let msg = format!("sysvol: mounted {}", device.display());
                            tracing::info!(device = %device.display(), "sysvol mounted");
                            let mut buffers = log_buffers.write().await;
                            if let Some(buf) = buffers.get_mut(&id) {
                                buf.push(log_line(&msg));
                            }
                            (true, None)
                        }
                        sysvol::StorageState::Initialized { device, .. } => {
                            let msg = format!("sysvol: initialized {}", device.display());
                            tracing::info!(device = %device.display(), "sysvol initialized");
                            let mut buffers = log_buffers.write().await;
                            if let Some(buf) = buffers.get_mut(&id) {
                                buf.push(log_line(&msg));
                            }
                            (true, None)
                        }
                        sysvol::StorageState::NoDisk => {
                            tracing::info!("no disk found, running diskless");
                            let mut buffers = log_buffers.write().await;
                            if let Some(buf) = buffers.get_mut(&id) {
                                buf.push(log_line("sysvol: no disk found, running diskless"));
                            }
                            (true, None) // NoDisk is success - system continues
                        }
                    }
                }
                Ok(Err(e)) => {
                    let msg = format!("sysvol: ERROR - {}", e);
                    tracing::error!(service = %service_name, error = %e, "sysvol failed");
                    let mut buffers = log_buffers.write().await;
                    if let Some(buf) = buffers.get_mut(&id) {
                        buf.push(log_line(&msg));
                    }
                    (false, Some(e.to_string()))
                }
                Err(e) => {
                    let msg = format!("sysvol: PANIC - {}", e);
                    tracing::error!(service = %service_name, error = %e, "sysvol task panicked");
                    let mut buffers = log_buffers.write().await;
                    if let Some(buf) = buffers.get_mut(&id) {
                        buf.push(log_line(&msg));
                    }
                    (false, Some(e.to_string()))
                }
            };

            let _ = event_tx
                .send(SupervisorEvent::BuiltinCompleted {
                    service_id: id,
                    success,
                    error: error_msg,
                })
                .await;
        });
    }

    /// Handle completion of a builtin service.
    pub(crate) async fn handle_builtin_completed(
        &mut self,
        id: ServiceId,
        success: bool,
        error: Option<String>,
    ) {
        let (name, critical, dependents) = {
            let mut graph = self.graph.write().await;
            let service = match graph.get_mut(id) {
                Some(s) => s,
                None => return,
            };

            let name = service.name.clone();
            let critical = service.is_critical();

            if success {
                service.state = ServiceState::Running { pid: 0 };
                tracing::info!(service = %name, "builtin service completed");
            } else {
                service.state = ServiceState::Failed {
                    reason: FailureReason::SpawnError {
                        message: error.clone().unwrap_or_else(|| "unknown error".to_string()),
                    },
                };
                tracing::error!(service = %name, "builtin service failed");
            }

            let dependents = graph.dependents(id);
            (name, critical, dependents)
        };

        // Handle critical failure
        if !success && critical {
            let reason = FailureReason::SpawnError {
                message: error.unwrap_or_else(|| "unknown error".to_string()),
            };
            self.handle_critical_failure(&name, &reason).await;
        }

        // Notify dependents
        self.queue_reevaluate(dependents).await;
    }
}