zinit 0.3.9

Process supervisor with dependency management
Documentation
# zinit-pid1 - Specification

The init process (PID 1). Minimal, no async.

## Overview

zinit-pid1 must:
- Spawn and monitor zinit-server
- Reap orphaned zombies
- Forward signals to zinit-server
- Handle system shutdown/reboot
- **Never exit 0** on VM/bare-metal (causes kernel panic)

## Cargo.toml

```toml
[package]
name = "zinit-pid1"
version.workspace = true
edition.workspace = true

[dependencies]
zinit-common = { path = "../zinit-common" }

nix.workspace = true
libc.workspace = true
tracing.workspace = true
tracing-subscriber.workspace = true
```

## Core Loop

```rust
use nix::sys::signal::{self, Signal, SigHandler, SigSet, SigAction, SaFlags};
use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus};
use nix::unistd::{fork, ForkResult, Pid, execv, getpid};
use nix::sys::reboot::{reboot, RebootMode};
use std::ffi::CString;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::Duration;
use std::thread;

use zinit_common::ZinitClient;

// Signal flags
static SIGTERM_RECEIVED: AtomicBool = AtomicBool::new(false);
static SIGINT_RECEIVED: AtomicBool = AtomicBool::new(false);
static SIGUSR1_RECEIVED: AtomicBool = AtomicBool::new(false);
static SIGUSR2_RECEIVED: AtomicBool = AtomicBool::new(false);
static SIGCHLD_RECEIVED: AtomicBool = AtomicBool::new(false);

#[derive(Clone, Copy, PartialEq)]
enum ShutdownMode {
    None,
    Reboot,
    Poweroff,
}

fn main() {
    if getpid().as_raw() != 1 {
        eprintln!("Warning: not running as PID 1");
    }

    setup_signals();

    let mut server_pid = spawn_server();

    let mut shutdown_mode = ShutdownMode::None;
    
    loop {
        if SIGCHLD_RECEIVED.swap(false, Ordering::SeqCst) {
            server_pid = reap_zombies(server_pid);
        }

        if SIGTERM_RECEIVED.swap(false, Ordering::SeqCst) {
            shutdown_mode = ShutdownMode::Poweroff;
            break;
        }

        if SIGINT_RECEIVED.swap(false, Ordering::SeqCst) {
            shutdown_mode = ShutdownMode::Reboot;
            break;
        }

        if SIGUSR1_RECEIVED.swap(false, Ordering::SeqCst) {
            soft_restart_server(&mut server_pid);
        }

        if SIGUSR2_RECEIVED.swap(false, Ordering::SeqCst) {
            handle_self_update();
        }

        if server_pid.is_none() {
            eprintln!("zinit-server died, respawning...");
            thread::sleep(Duration::from_secs(1));
            server_pid = spawn_server();
        }

        thread::sleep(Duration::from_millis(100));
    }

    do_shutdown(server_pid, shutdown_mode);
}
```

## Signal Setup

```rust
extern "C" fn signal_handler(sig: i32) {
    match sig {
        libc::SIGTERM => SIGTERM_RECEIVED.store(true, Ordering::SeqCst),
        libc::SIGINT => SIGINT_RECEIVED.store(true, Ordering::SeqCst),
        libc::SIGUSR1 => SIGUSR1_RECEIVED.store(true, Ordering::SeqCst),
        libc::SIGUSR2 => SIGUSR2_RECEIVED.store(true, Ordering::SeqCst),
        libc::SIGCHLD => SIGCHLD_RECEIVED.store(true, Ordering::SeqCst),
        _ => {}
    }
}

fn setup_signals() {
    let handler = SigHandler::Handler(signal_handler);
    let flags = SaFlags::SA_RESTART;
    let action = SigAction::new(handler, flags, SigSet::empty());

    unsafe {
        signal::sigaction(Signal::SIGTERM, &action).unwrap();
        signal::sigaction(Signal::SIGINT, &action).unwrap();
        signal::sigaction(Signal::SIGUSR1, &action).unwrap();
        signal::sigaction(Signal::SIGUSR2, &action).unwrap();
        signal::sigaction(Signal::SIGCHLD, &action).unwrap();
    }
}
```

## Zombie Reaping

```rust
fn reap_zombies(server_pid: Option<Pid>) -> Option<Pid> {
    let mut server_alive = server_pid;

    loop {
        match waitpid(Pid::from_raw(-1), Some(WaitPidFlag::WNOHANG)) {
            Ok(WaitStatus::Exited(pid, code)) => {
                eprintln!("Process {} exited with code {}", pid, code);
                if Some(pid) == server_pid {
                    server_alive = None;
                }
            }
            Ok(WaitStatus::Signaled(pid, sig, _)) => {
                eprintln!("Process {} killed by signal {:?}", pid, sig);
                if Some(pid) == server_pid {
                    server_alive = None;
                }
            }
            Ok(WaitStatus::StillAlive) => break,
            Err(nix::errno::Errno::ECHILD) => break,
            _ => break,
        }
    }

    server_alive
}
```

## Server Management

```rust
fn spawn_server() -> Option<Pid> {
    match unsafe { fork() } {
        Ok(ForkResult::Child) => {
            let prog = CString::new("/usr/bin/zinit-server").unwrap();
            let args = [prog.clone()];
            execv(&prog, &args).expect("Failed to exec zinit-server");
            unreachable!()
        }
        Ok(ForkResult::Parent { child }) => {
            eprintln!("Spawned zinit-server with PID {}", child);
            Some(child)
        }
        Err(e) => {
            eprintln!("Failed to fork: {}", e);
            None
        }
    }
}

fn soft_restart_server(server_pid: &mut Option<Pid>) {
    if let Some(pid) = *server_pid {
        let _ = signal::kill(pid, Signal::SIGUSR1);
        
        for _ in 0..50 {
            thread::sleep(Duration::from_millis(100));
            match waitpid(pid, Some(WaitPidFlag::WNOHANG)) {
                Ok(WaitStatus::Exited(_, _)) | Ok(WaitStatus::Signaled(_, _, _)) => {
                    *server_pid = None;
                    break;
                }
                _ => {}
            }
        }
        
        if server_pid.is_some() {
            let _ = signal::kill(pid, Signal::SIGKILL);
            let _ = waitpid(pid, None);
            *server_pid = None;
        }
    }
    
    *server_pid = spawn_server();
}
```

## Container Detection

```rust
/// Detect if we're running in a container (PID namespace).
fn is_container() -> bool {
    std::fs::read_to_string("/proc/1/sched")
        .ok()
        .and_then(|s| {
            // Format: "zinit-pid1 (12345, #threads: 1)"
            // The number in parentheses is the real PID outside the namespace
            let start = s.find('(')? + 1;
            let end = s.find(',')?;
            s[start..end].trim().parse::<u32>().ok()
        })
        .map(|real_pid| real_pid != 1)
        .unwrap_or(false)
}
```

- **Container**: `real_pid` != 1 -> `exit(0)` is fine
- **VM/bare-metal**: `real_pid` == 1 -> must use `reboot()` syscall

## Shutdown Sequence

```rust
fn do_shutdown(server_pid: Option<Pid>, mode: ShutdownMode) {
    eprintln!("Initiating shutdown (mode: {:?})", mode);

    // 1. Tell zinit-server to stop all services
    if let Ok(mut client) = ZinitClient::connect_default() {
        let _ = client.shutdown();
    }

    // 2. Wait for server to exit (30s timeout)
    if let Some(pid) = server_pid {
        for _ in 0..300 {
            thread::sleep(Duration::from_millis(100));
            match waitpid(pid, Some(WaitPidFlag::WNOHANG)) {
                Ok(WaitStatus::Exited(_, _)) | Ok(WaitStatus::Signaled(_, _, _)) => {
                    break;
                }
                _ => {}
            }
        }
        
        let _ = signal::kill(pid, Signal::SIGKILL);
        let _ = waitpid(pid, None);
    }

    // 3. Reap remaining zombies
    loop {
        match waitpid(Pid::from_raw(-1), Some(WaitPidFlag::WNOHANG)) {
            Err(nix::errno::Errno::ECHILD) => break,
            Ok(WaitStatus::StillAlive) => break,
            _ => continue,
        }
    }

    // 4. Sync filesystems
    unsafe { libc::sync(); }

    // 5. Container vs bare-metal/VM
    if is_container() {
        eprintln!("Container detected, exiting...");
        match mode {
            ShutdownMode::Poweroff | ShutdownMode::Reboot => std::process::exit(0),
            ShutdownMode::None => std::process::exit(1),
        }
    }

    // 6. Bare-metal/VM: reboot syscall - NEVER exit!
    match mode {
        ShutdownMode::Reboot => {
            eprintln!("Rebooting...");
            let _ = reboot(RebootMode::RB_AUTOBOOT);
        }
        ShutdownMode::Poweroff => {
            eprintln!("Powering off...");
            let _ = reboot(RebootMode::RB_POWER_OFF);
        }
        ShutdownMode::None => {
            eprintln!("Unexpected shutdown state, rebooting...");
            let _ = reboot(RebootMode::RB_AUTOBOOT);
        }
    }

    // If reboot syscall failed, loop forever
    eprintln!("Reboot syscall failed, halting...");
    loop {
        thread::sleep(Duration::from_secs(3600));
    }
}
```

## Auto-Update (Placeholder)

```rust
fn handle_self_update() {
    // TODO: Implement self-update
    // 1. Check for new zinit-pid1 binary
    // 2. Verify signature/checksum
    // 3. Replace binary
    // 4. Re-exec self: execv("/usr/bin/zinit-pid1", argv)
    //    This replaces process image but keeps PID 1
    
    eprintln!("Self-update not yet implemented");
}
```

Server update flow:
1. Server checks for updates
2. Server downloads new binary
3. Server signals pid1 with SIGUSR1
4. pid1 does `soft_restart_server()` which respawns with new binary

## Signal Protocol

| Signal | To pid1 | Action |
|--------|---------|--------|
| `SIGTERM` | External | Poweroff system |
| `SIGINT` | External | Reboot system |
| `SIGUSR1` | External | Soft restart zinit-server |
| `SIGUSR2` | External | Check for self-update |
| `SIGCHLD` | Kernel | Reap zombies |

| Signal | To server | Action |
|--------|-----------|--------|
| `SIGTERM` | From pid1 | Full shutdown (stop services, exit) |
| `SIGUSR1` | From pid1 | Soft exit (exit without stopping services) |

## Project Structure

```
zinit-pid1/
├── Cargo.toml
└── src/
    ├── main.rs        # Entry point, main loop
    ├── signals.rs     # Signal setup and handlers
    ├── reaper.rs      # Zombie reaping
    ├── server.rs      # Server spawn/restart
    ├── shutdown.rs    # Shutdown sequence
    └── update.rs      # Self-update (placeholder)
```