zag-orch 0.13.0

Orchestration library for zag — multi-session coordination for AI coding agents
Documentation
use anyhow::{Result, bail};
use zag_agent::process_store::{ProcessEntry, ProcessStore};

/// If `id` is the literal `"self"`, resolve it from the `ZAG_PROCESS_ID`
/// environment variable. Otherwise return the id unchanged.
fn resolve_process_id(id: &str) -> Result<String> {
    if id == "self" {
        std::env::var("ZAG_PROCESS_ID").map_err(|_| {
            anyhow::anyhow!(
                "Cannot resolve \"self\": ZAG_PROCESS_ID is not set. \
                 Are you running inside a zag session?"
            )
        })
    } else {
        Ok(id.to_string())
    }
}

/// Resolve the live OS status for an entry that is marked "running".
/// Returns "running", "dead", or the stored status unchanged.
pub fn resolve_live_status(entry: &ProcessEntry) -> &'static str {
    if entry.status != "running" {
        return match entry.status.as_str() {
            "exited" => "exited",
            "killed" => "killed",
            _ => "unknown",
        };
    }
    check_process_alive(entry.pid)
}

#[cfg(unix)]
fn check_process_alive(pid: u32) -> &'static str {
    use nix::sys::signal::kill;
    use nix::unistd::Pid;
    let pid = Pid::from_raw(pid as i32);
    match kill(pid, None) {
        Ok(()) => "running",
        Err(_) => "dead",
    }
}

#[cfg(not(unix))]
fn check_process_alive(_pid: u32) -> &'static str {
    // On Windows, we cannot cheaply check liveness with signals.
    // Return "running" and let callers handle stale entries.
    "running"
}

#[cfg(unix)]
fn send_signal(pid: u32, signal: nix::sys::signal::Signal) -> Result<()> {
    use nix::sys::signal::kill;
    use nix::unistd::Pid;
    let pid = Pid::from_raw(pid as i32);
    kill(pid, signal).map_err(|e| anyhow::anyhow!("Failed to signal process: {e}"))
}

#[cfg(not(unix))]
fn send_signal(pid: u32, _signal_name: &str) -> Result<()> {
    bail!(
        "Process signaling is not supported on Windows. Use taskkill /PID {} instead.",
        pid
    );
}

/// A process entry with resolved live status.
#[derive(Debug, serde::Serialize)]
pub struct ProcessInfo {
    #[serde(flatten)]
    pub entry: serde_json::Value,
    pub live_status: String,
}

/// List processes with resolved live status.
pub fn list_processes(
    running: bool,
    limit: Option<usize>,
    provider: Option<&str>,
) -> Result<Vec<ProcessInfo>> {
    let store = ProcessStore::load()?;
    let mut entries: Vec<&ProcessEntry> = store.list_recent(limit);
    if running {
        entries.retain(|e| resolve_live_status(e) == "running");
    }
    if let Some(p) = provider {
        entries.retain(|e| e.provider == p);
    }
    Ok(entries
        .iter()
        .map(|e| {
            let mut v = serde_json::to_value(e).unwrap_or_default();
            let live = resolve_live_status(e).to_string();
            if let serde_json::Value::Object(ref mut m) = v {
                m.insert(
                    "live_status".to_string(),
                    serde_json::Value::String(live.clone()),
                );
            }
            ProcessInfo {
                entry: v,
                live_status: live,
            }
        })
        .collect())
}

/// Get a single process by ID with live status.
pub fn get_process(id: &str) -> Result<ProcessInfo> {
    let id = resolve_process_id(id)?;
    let store = ProcessStore::load()?;
    match store.find(&id) {
        Some(e) => {
            let live = resolve_live_status(e).to_string();
            let mut v = serde_json::to_value(e)?;
            if let serde_json::Value::Object(ref mut m) = v {
                m.insert(
                    "live_status".to_string(),
                    serde_json::Value::String(live.clone()),
                );
            }
            Ok(ProcessInfo {
                entry: v,
                live_status: live,
            })
        }
        None => bail!("Process not found: {id}"),
    }
}

/// Send a stop signal (SIGHUP) to a process by ID.
pub fn request_stop(id: &str) -> Result<()> {
    let id = resolve_process_id(id)?;
    let entry = ProcessStore::load()?
        .find(&id)
        .ok_or_else(|| anyhow::anyhow!("Process not found: {id}"))?
        .clone();
    let live = resolve_live_status(&entry);
    if live != "running" {
        bail!("Process {id} is not running (status: {live})");
    }
    stop_process(entry.pid)
}

/// Send a kill signal (SIGTERM) to a process by ID.
pub fn request_kill(id: &str) -> Result<()> {
    let id = resolve_process_id(id)?;
    let mut store = ProcessStore::load()?;
    let entry = store
        .find(&id)
        .ok_or_else(|| anyhow::anyhow!("Process not found: {id}"))?
        .clone();
    let live = resolve_live_status(&entry);
    if live != "running" {
        bail!("Process {id} is not running (status: {live})");
    }
    store.update_status(&id, "killed", None);
    store.save()?;
    kill_process(entry.pid)
}

pub fn run_ps(command: PsCommand, json: bool) -> Result<()> {
    match command {
        PsCommand::List {
            running,
            limit,
            provider,
            children,
        } => {
            let store = ProcessStore::load()?;
            let mut entries: Vec<&ProcessEntry> = store.list_recent(limit);
            if running {
                entries.retain(|e| resolve_live_status(e) == "running");
            }
            if let Some(ref p) = provider {
                entries.retain(|e| e.provider == *p);
            }
            if let Some(ref parent_id) = children {
                entries.retain(|e| {
                    e.parent_session_id.as_deref() == Some(parent_id)
                        || e.parent_process_id.as_deref() == Some(parent_id)
                });
            }
            if json {
                let with_live: Vec<serde_json::Value> = entries
                    .iter()
                    .map(|e| {
                        let mut v = serde_json::to_value(e).unwrap_or_default();
                        if let serde_json::Value::Object(ref mut m) = v {
                            m.insert(
                                "live_status".to_string(),
                                serde_json::Value::String(resolve_live_status(e).to_string()),
                            );
                        }
                        v
                    })
                    .collect();
                println!("{}", serde_json::to_string(&with_live)?);
                return Ok(());
            }
            if entries.is_empty() {
                println!("No processes found.");
                return Ok(());
            }
            println!(
                "{:<38} {:<7} {:<8} {:<10} {:<10} {:<7} {:<22} PROMPT",
                "ID", "PID", "STATUS", "PROVIDER", "MODEL", "CMD", "STARTED"
            );
            println!("{}", "-".repeat(130));
            for e in &entries {
                let live = resolve_live_status(e);
                let prompt_display = e
                    .prompt
                    .as_deref()
                    .unwrap_or("")
                    .chars()
                    .take(40)
                    .collect::<String>();
                println!(
                    "{:<38} {:<7} {:<8} {:<10} {:<10} {:<7} {:<22} {}",
                    e.id,
                    e.pid,
                    live,
                    e.provider,
                    e.model,
                    e.command,
                    e.started_at.chars().take(20).collect::<String>(),
                    prompt_display
                );
            }
        }
        PsCommand::Show { id } => {
            let id = resolve_process_id(&id)?;
            let store = ProcessStore::load()?;
            match store.find(&id) {
                Some(e) => {
                    let live = resolve_live_status(e);
                    if json {
                        let mut v = serde_json::to_value(e)?;
                        if let serde_json::Value::Object(ref mut m) = v {
                            m.insert(
                                "live_status".to_string(),
                                serde_json::Value::String(live.to_string()),
                            );
                        }
                        println!("{}", serde_json::to_string(&v)?);
                        return Ok(());
                    }
                    println!("Process ID:  {}", e.id);
                    println!("PID:         {}", e.pid);
                    println!("Status:      {live}");
                    println!("Provider:    {}", e.provider);
                    println!("Model:       {}", e.model);
                    println!("Command:     {}", e.command);
                    println!("Started:     {}", e.started_at);
                    if let Some(ref exited) = e.exited_at {
                        println!("Exited:      {exited}");
                    }
                    if let Some(code) = e.exit_code {
                        println!("Exit code:   {code}");
                    }
                    if let Some(ref sid) = e.session_id {
                        println!("Session ID:  {sid}");
                    }
                    if let Some(ref root) = e.root {
                        println!("Root:        {root}");
                    }
                    if let Some(ref prompt) = e.prompt {
                        println!("Prompt:      {prompt}");
                    }
                }
                None => {
                    bail!("Process not found: {id}");
                }
            }
        }
        PsCommand::Stop { id } => {
            let id = resolve_process_id(&id)?;
            let entry = ProcessStore::load()?
                .find(&id)
                .ok_or_else(|| anyhow::anyhow!("Process not found: {id}"))?
                .clone();
            let live = resolve_live_status(&entry);
            if live != "running" {
                bail!("Process {id} is not running (status: {live})");
            }
            println!(
                "\x1b[33m>\x1b[0m Sending stop signal to process {} ({})",
                entry.pid, entry.id
            );
            stop_process(entry.pid)?;
            println!("\x1b[32m✓\x1b[0m Stop signal sent");
        }
        PsCommand::Kill { id } => {
            let id = resolve_process_id(&id)?;
            let mut store = ProcessStore::load()?;
            let entry = store
                .find(&id)
                .ok_or_else(|| anyhow::anyhow!("Process not found: {id}"))?
                .clone();
            let live = resolve_live_status(&entry);
            if live != "running" {
                bail!("Process {id} is not running (status: {live})");
            }
            // Update store before sending the signal — if this is a self-kill,
            // the SIGTERM will terminate this process and post-signal code may
            // not execute.
            store.update_status(&id, "killed", None);
            store.save()?;
            println!(
                "\x1b[33m>\x1b[0m Sending kill signal to process {} ({})",
                entry.pid, entry.id
            );
            kill_process(entry.pid)?;
            println!("\x1b[32m✓\x1b[0m Process killed");
        }
    }
    Ok(())
}

#[cfg(unix)]
fn stop_process(pid: u32) -> Result<()> {
    send_signal(pid, nix::sys::signal::Signal::SIGHUP)
}

#[cfg(not(unix))]
fn stop_process(pid: u32) -> Result<()> {
    send_signal(pid, "stop")
}

#[cfg(unix)]
fn kill_process(pid: u32) -> Result<()> {
    send_signal(pid, nix::sys::signal::Signal::SIGTERM)
}

#[cfg(not(unix))]
fn kill_process(pid: u32) -> Result<()> {
    send_signal(pid, "kill")
}

#[derive(clap::Subcommand)]
pub enum PsCommand {
    /// List processes (default)
    List {
        /// Show only running processes
        #[arg(long)]
        running: bool,
        /// Show only the N most recent processes
        #[arg(short = 'n', long)]
        limit: Option<usize>,
        /// Filter by provider
        #[arg(short = 'p', long)]
        provider: Option<String>,
        /// Show only child processes of this session or process ID
        #[arg(long)]
        children: Option<String>,
    },
    /// Show details of a specific process
    Show {
        /// Process ID (or "self" to use current process)
        id: String,
    },
    /// Send stop signal to a running process (graceful stop request)
    Stop {
        /// Process ID (or "self" to use current process)
        id: String,
    },
    /// Send kill signal to a running process (forceful termination)
    Kill {
        /// Process ID (or "self" to use current process)
        id: String,
    },
}

#[cfg(test)]
#[path = "ps_tests.rs"]
mod tests;