mecha10-cli 0.1.47

Mecha10 CLI tool
Documentation
//! Mecha10 Remote Runtime
//!
//! This binary runs inside the mecha10-remote Docker container and supervises
//! all remote nodes (AI/ML nodes that run off-robot).
//!
//! It reads the mecha10.json config (mounted into the container) and spawns
//! each remote node as a separate OS process using `mecha10 node <name>`.
//!
//! # Usage
//!
//! ```bash
//! # Inside the Docker container:
//! mecha10-remote
//! ```
//!
//! The config file is expected at `/app/mecha10.json` by default,
//! or can be specified via the `MECHA10_CONFIG` environment variable.

use anyhow::{Context, Result};
use serde::Deserialize;
use std::collections::HashMap;
use std::path::PathBuf;
use std::process::{Child, Command, Stdio};
use tokio::signal;
use tracing::{error, info, warn};

/// Remote node configuration from mecha10.json
#[derive(Debug, Deserialize)]
struct ProjectConfig {
    name: String,
    #[serde(default)]
    targets: Option<TargetsConfig>,
}

#[derive(Debug, Deserialize, Default)]
struct TargetsConfig {
    #[serde(default)]
    remote: Vec<String>,
}

/// Map of @mecha10/* node identifiers to internal node names
fn node_identifier_to_name(identifier: &str) -> Option<&'static str> {
    match identifier {
        "@mecha10/object-detector" => Some("object-detector"),
        "@mecha10/image-classifier" => Some("image-classifier"),
        "@mecha10/llm-command" => Some("llm-command"),
        "@mecha10/behavior-executor" => Some("behavior-executor"),
        "@mecha10/diagnostics" => Some("diagnostics-node"),
        "@mecha10/imu" => Some("imu"),
        "@mecha10/listener" => Some("listener"),
        "@mecha10/motor" => Some("motor"),
        "@mecha10/simulation-bridge" => Some("simulation-bridge"),
        "@mecha10/speaker" => Some("speaker"),
        "@mecha10/teleop" => Some("teleop"),
        "@mecha10/websocket-bridge" => Some("websocket-bridge"),
        _ => None,
    }
}

/// Spawn a node as a child process
fn spawn_node(name: &str) -> Result<Child> {
    // Use `mecha10 node <name>` to run the bundled node
    let child = Command::new("mecha10")
        .args(["node", name])
        .stdout(Stdio::inherit())
        .stderr(Stdio::inherit())
        .spawn()
        .with_context(|| format!("Failed to spawn node: {}", name))?;

    Ok(child)
}

/// Process supervisor that manages child processes
struct ProcessSupervisor {
    processes: HashMap<String, Child>,
}

impl ProcessSupervisor {
    fn new() -> Self {
        Self {
            processes: HashMap::new(),
        }
    }

    /// Spawn a node and track it
    fn spawn(&mut self, identifier: &str, name: &str) -> Result<()> {
        let child = spawn_node(name)?;
        info!("Started node {} (pid: {}) for {}", name, child.id(), identifier);
        self.processes.insert(identifier.to_string(), child);
        Ok(())
    }

    /// Check for any processes that have exited
    fn check_processes(&mut self) {
        let mut exited = Vec::new();

        for (identifier, child) in self.processes.iter_mut() {
            match child.try_wait() {
                Ok(Some(status)) => {
                    if status.success() {
                        info!("Node {} exited normally", identifier);
                    } else {
                        error!("Node {} exited with status: {}", identifier, status);
                    }
                    exited.push(identifier.clone());
                }
                Ok(None) => {
                    // Still running
                }
                Err(e) => {
                    error!("Error checking node {} status: {}", identifier, e);
                }
            }
        }

        // Remove exited processes
        for id in exited {
            self.processes.remove(&id);
        }
    }

    /// Stop all processes gracefully
    fn stop_all(&mut self) {
        info!("Stopping {} processes...", self.processes.len());

        // First try SIGTERM
        for (identifier, child) in self.processes.iter_mut() {
            info!("Sending SIGTERM to {} (pid: {})", identifier, child.id());
            #[cfg(unix)]
            {
                use nix::sys::signal::{kill, Signal};
                use nix::unistd::Pid;
                let _ = kill(Pid::from_raw(child.id() as i32), Signal::SIGTERM);
            }
            #[cfg(not(unix))]
            {
                let _ = child.kill();
            }
        }

        // Give processes time to exit gracefully
        std::thread::sleep(std::time::Duration::from_secs(3));

        // Force kill any remaining
        for (identifier, child) in self.processes.iter_mut() {
            match child.try_wait() {
                Ok(Some(_)) => {
                    // Already exited
                }
                _ => {
                    warn!("Force killing {} (pid: {})", identifier, child.id());
                    let _ = child.kill();
                    let _ = child.wait();
                }
            }
        }

        self.processes.clear();
    }

    /// Check if any processes are still running
    fn has_running(&self) -> bool {
        !self.processes.is_empty()
    }
}

impl Drop for ProcessSupervisor {
    fn drop(&mut self) {
        if self.has_running() {
            self.stop_all();
        }
    }
}

#[tokio::main]
async fn main() -> Result<()> {
    // Initialize logging
    tracing_subscriber::fmt()
        .with_env_filter(std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string()))
        .init();

    info!("Mecha10 Remote Runtime starting...");

    // Get config path from environment or use default
    let config_path = std::env::var("MECHA10_CONFIG")
        .map(PathBuf::from)
        .unwrap_or_else(|_| PathBuf::from("/app/mecha10.json"));

    info!("Loading config from: {}", config_path.display());

    // Load config
    let config_content = tokio::fs::read_to_string(&config_path)
        .await
        .context("Failed to read mecha10.json")?;

    let config: ProjectConfig = serde_json::from_str(&config_content).context("Failed to parse mecha10.json")?;

    info!("Project: {}", config.name);

    // Get remote nodes
    let remote_nodes = config.targets.as_ref().map(|t| t.remote.clone()).unwrap_or_default();

    if remote_nodes.is_empty() {
        warn!("No remote nodes configured in targets.remote");
        info!("Remote runtime has nothing to do, exiting.");
        return Ok(());
    }

    info!("Remote nodes to start: {:?}", remote_nodes);

    // Create process supervisor
    let mut supervisor = ProcessSupervisor::new();

    // Spawn each remote node
    let mut failed_nodes = Vec::new();
    for identifier in &remote_nodes {
        match node_identifier_to_name(identifier) {
            Some(name) => {
                if let Err(e) = supervisor.spawn(identifier, name) {
                    error!("Failed to spawn {}: {}", identifier, e);
                    failed_nodes.push(identifier.clone());
                }
            }
            None => {
                if identifier.starts_with("@local/") {
                    warn!(
                        "Custom node {} is not bundled, skipping. Build locally for custom nodes.",
                        identifier
                    );
                } else {
                    warn!("Unknown node identifier: {}", identifier);
                }
                failed_nodes.push(identifier.clone());
            }
        }
    }

    if !failed_nodes.is_empty() {
        error!(
            "Failed to start {} node(s): {}",
            failed_nodes.len(),
            failed_nodes.join(", ")
        );
        error!("Check the logs above for specific errors.");
        error!("Common issues:");
        error!("  - Missing config: ensure configs/@mecha10/<node>/config.json exists");
        error!("  - Missing model: ensure models/ directory has required model files");
        error!("  - Missing API key: run `mecha10 auth login` to authenticate");
    }

    if !supervisor.has_running() {
        warn!("No valid remote nodes running");
        warn!("Container will stay up to allow log inspection. Press Ctrl+C to stop.");
    } else {
        info!("Remote nodes started. Press Ctrl+C to stop.");
    }

    // Main loop: monitor processes and wait for shutdown signal
    // Note: We don't exit when nodes fail - this keeps the container up for debugging
    loop {
        tokio::select! {
            _ = signal::ctrl_c() => {
                info!("Shutdown signal received");
                break;
            }
            _ = tokio::time::sleep(tokio::time::Duration::from_secs(5)) => {
                supervisor.check_processes();
                // Don't exit when nodes fail - keep container running for debugging
                // The user can check logs and fix configuration
            }
        }
    }

    supervisor.stop_all();
    info!("Mecha10 Remote Runtime stopped.");
    Ok(())
}