swarm-engine-ui 0.1.6

CLI and Desktop UI for SwarmEngine
//! Llama Server Management commands

use std::path::PathBuf;

use clap::Subcommand;
use swarm_engine_core::config::PathResolver;

#[derive(Subcommand)]
pub enum LlamaAction {
    /// Start llama-server in background
    Start {
        /// Path to GGUF model file
        #[arg(short, long)]
        model: PathBuf,

        /// Path to LoRA adapter GGUF file
        #[arg(long)]
        lora: Option<PathBuf>,

        /// Host to bind (default: 127.0.0.1)
        #[arg(long, default_value = "127.0.0.1")]
        host: String,

        /// Port to listen on (default: 8080)
        #[arg(short, long, default_value = "8080")]
        port: u16,

        /// Number of GPU layers (default: 99 = all layers on GPU)
        #[arg(long, default_value = "99")]
        n_gpu_layers: u32,

        /// Context size (default: auto = 1024 * parallel)
        #[arg(long)]
        ctx_size: Option<u32>,

        /// Number of parallel slots for batch processing (default: 4)
        #[arg(long, default_value = "4")]
        parallel: u32,
    },
    /// Stop running llama-server
    Stop,
    /// Show llama-server logs (tail -f)
    Logs {
        /// Number of lines to show (default: 50)
        #[arg(short = 'n', long, default_value = "50")]
        lines: usize,

        /// Follow log output (like tail -f)
        #[arg(short, long)]
        follow: bool,
    },
    /// Check llama-server status
    Status,
}

/// PID file path for llama-server
fn llama_pid_file() -> PathBuf {
    PathResolver::user_data_dir().join("llama-server.pid")
}

/// Log file path for llama-server
fn llama_log_file() -> PathBuf {
    PathResolver::user_data_dir().join("llama-server.log")
}

/// Manage llama-server
pub fn cmd_llama(action: LlamaAction) {
    match action {
        LlamaAction::Start {
            model,
            lora,
            host,
            port,
            n_gpu_layers,
            ctx_size,
            parallel,
        } => cmd_llama_start(model, lora, host, port, n_gpu_layers, ctx_size, parallel),
        LlamaAction::Stop => cmd_llama_stop(),
        LlamaAction::Logs { lines, follow } => cmd_llama_logs(lines, follow),
        LlamaAction::Status => cmd_llama_status(),
    }
}

/// Start llama-server in background
fn cmd_llama_start(
    model: PathBuf,
    lora: Option<PathBuf>,
    host: String,
    port: u16,
    n_gpu_layers: u32,
    ctx_size: Option<u32>,
    parallel: u32,
) {
    // Auto-calculate ctx_size if not specified: 1024 tokens per slot
    let ctx_size = ctx_size.unwrap_or_else(|| {
        let auto = 1024 * parallel;
        println!("  (auto-calculated: 1024 * {} slots)", parallel);
        auto
    });
    // Check if model file exists
    if !model.exists() {
        eprintln!("Model file not found: {}", model.display());
        std::process::exit(1);
    }

    // Check if server is already running
    let pid_file = llama_pid_file();
    if pid_file.exists() {
        if let Ok(pid_str) = std::fs::read_to_string(&pid_file) {
            if let Ok(pid) = pid_str.trim().parse::<u32>() {
                // Check if process is still running
                let check = std::process::Command::new("kill")
                    .args(["-0", &pid.to_string()])
                    .status();
                if check.map(|s| s.success()).unwrap_or(false) {
                    eprintln!("llama-server is already running (PID: {})", pid);
                    eprintln!("Use 'swarm-engine llama stop' to stop it first");
                    std::process::exit(1);
                }
            }
        }
    }

    // Ensure data directory exists
    let data_dir = PathResolver::user_data_dir();
    if !data_dir.exists() {
        std::fs::create_dir_all(&data_dir).expect("Failed to create data directory");
    }

    let log_file = llama_log_file();

    println!("Starting llama-server...");
    println!("  Model: {}", model.display());
    if let Some(ref lora_path) = lora {
        println!("  LoRA: {}", lora_path.display());
    }
    println!("  Host: {}:{}", host, port);
    println!("  GPU layers: {}", n_gpu_layers);
    println!("  Context size: {}", ctx_size);
    println!("  Parallel slots: {}", parallel);
    println!("  Continuous batching: enabled");
    println!("  Log file: {}", log_file.display());

    // Build command with batch processing options
    let mut cmd = std::process::Command::new("llama-server");
    cmd.args([
        "-m",
        model.to_str().unwrap(),
        "--host",
        &host,
        "--port",
        &port.to_string(),
        "-ngl",
        &n_gpu_layers.to_string(),
        "-c",
        &ctx_size.to_string(),
        "-np",
        &parallel.to_string(),
        "--cont-batching",
    ]);

    // Add LoRA adapter if specified
    if let Some(lora_path) = lora {
        if !lora_path.exists() {
            eprintln!("LoRA adapter not found: {}", lora_path.display());
            std::process::exit(1);
        }
        cmd.args(["--lora", lora_path.to_str().unwrap()]);
    }

    // Redirect output to log file
    let log = std::fs::File::create(&log_file).expect("Failed to create log file");
    let log_err = log.try_clone().expect("Failed to clone log file");

    cmd.stdout(std::process::Stdio::from(log));
    cmd.stderr(std::process::Stdio::from(log_err));

    // Spawn in background
    match cmd.spawn() {
        Ok(child) => {
            let pid = child.id();
            std::fs::write(&pid_file, pid.to_string()).expect("Failed to write PID file");
            println!("\nllama-server started (PID: {})", pid);
            println!("Endpoint: http://{}:{}", host, port);
            println!("\nUse 'swarm-engine llama logs -f' to follow the logs");
            println!("Use 'swarm-engine llama status' to check status");
        }
        Err(e) => {
            eprintln!("Failed to start llama-server: {}", e);
            eprintln!("\nMake sure llama-server is installed and in your PATH.");
            eprintln!("Install: brew install llama.cpp (macOS) or build from source");
            std::process::exit(1);
        }
    }
}

/// Stop llama-server
fn cmd_llama_stop() {
    let pid_file = llama_pid_file();

    if !pid_file.exists() {
        println!("llama-server is not running (no PID file found)");
        return;
    }

    let pid_str = match std::fs::read_to_string(&pid_file) {
        Ok(s) => s,
        Err(e) => {
            eprintln!("Failed to read PID file: {}", e);
            std::process::exit(1);
        }
    };

    let pid: u32 = match pid_str.trim().parse() {
        Ok(p) => p,
        Err(_) => {
            eprintln!("Invalid PID in file: {}", pid_str);
            let _ = std::fs::remove_file(&pid_file);
            std::process::exit(1);
        }
    };

    println!("Stopping llama-server (PID: {})...", pid);

    // Send SIGTERM
    let status = std::process::Command::new("kill")
        .arg(pid.to_string())
        .status();

    match status {
        Ok(s) if s.success() => {
            let _ = std::fs::remove_file(&pid_file);
            println!("llama-server stopped");
        }
        Ok(_) => {
            // Process might not exist
            let _ = std::fs::remove_file(&pid_file);
            println!("llama-server was not running");
        }
        Err(e) => {
            eprintln!("Failed to stop llama-server: {}", e);
            std::process::exit(1);
        }
    }
}

/// Show llama-server logs
fn cmd_llama_logs(lines: usize, follow: bool) {
    let log_file = llama_log_file();

    if !log_file.exists() {
        println!("No log file found. Start llama-server first.");
        return;
    }

    if follow {
        // tail -f
        let status = std::process::Command::new("tail")
            .args(["-f", "-n", &lines.to_string()])
            .arg(&log_file)
            .status();

        if let Err(e) = status {
            eprintln!("Failed to tail log file: {}", e);
        }
    } else {
        // tail -n
        let output = std::process::Command::new("tail")
            .args(["-n", &lines.to_string()])
            .arg(&log_file)
            .output();

        match output {
            Ok(o) => {
                print!("{}", String::from_utf8_lossy(&o.stdout));
            }
            Err(e) => {
                eprintln!("Failed to read log file: {}", e);
            }
        }
    }
}

/// Check llama-server status
fn cmd_llama_status() {
    let pid_file = llama_pid_file();

    if !pid_file.exists() {
        println!("llama-server: stopped (no PID file)");
        return;
    }

    let pid_str = match std::fs::read_to_string(&pid_file) {
        Ok(s) => s,
        Err(_) => {
            println!("llama-server: unknown (failed to read PID file)");
            return;
        }
    };

    let pid: u32 = match pid_str.trim().parse() {
        Ok(p) => p,
        Err(_) => {
            println!("llama-server: unknown (invalid PID)");
            return;
        }
    };

    // Check if process is running
    let check = std::process::Command::new("kill")
        .args(["-0", &pid.to_string()])
        .status();

    if check.map(|s| s.success()).unwrap_or(false) {
        println!("llama-server: running (PID: {})", pid);

        // Try to get health status from API
        let rt = tokio::runtime::Runtime::new().expect("Failed to create runtime");
        let healthy = rt.block_on(async {
            let client = reqwest::Client::new();
            match client
                .get("http://localhost:8080/health")
                .timeout(std::time::Duration::from_secs(2))
                .send()
                .await
            {
                Ok(resp) => resp.status().is_success(),
                Err(_) => false,
            }
        });

        if healthy {
            println!("Health: OK (http://localhost:8080)");
        } else {
            println!("Health: not responding (might still be loading model)");
        }

        // Show log file location
        let log_file = llama_log_file();
        if log_file.exists() {
            println!("Log file: {}", log_file.display());
        }
    } else {
        println!("llama-server: stopped (process not found)");
        // Clean up stale PID file
        let _ = std::fs::remove_file(&pid_file);
    }
}