mi6-otel-server 0.2.3

OpenTelemetry HTTP server for mi6
Documentation
//! Process lifecycle management for the OTel server.
//!
//! This module provides functions to start, stop, and check the status
//! of the mi6 OTel server process.

use std::net::TcpStream;
use std::process::{Command, Stdio};
use std::time::Duration;

use anyhow::{Context, Result};
use mi6_core::OtelMode;

/// Default port for the OTel server.
pub const DEFAULT_PORT: u16 = 4318;

/// Check if a server is running on the given port.
pub fn is_server_running(port: u16) -> bool {
    TcpStream::connect_timeout(
        &std::net::SocketAddr::from(([127, 0, 0, 1], port)),
        Duration::from_millis(100),
    )
    .is_ok()
}

/// Check if the server on this port is a mi6 OTel server.
///
/// Returns `true` if it's our server, `false` if it's something else or not running.
pub fn is_mi6_server(port: u16) -> bool {
    use std::io::{Read, Write};

    let Ok(mut stream) = TcpStream::connect_timeout(
        &std::net::SocketAddr::from(([127, 0, 0, 1], port)),
        Duration::from_millis(500),
    ) else {
        return false;
    };

    // Set read timeout
    let _ = stream.set_read_timeout(Some(Duration::from_millis(500)));

    // Send HTTP GET /health request
    let request = "GET /health HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n";
    if stream.write_all(request.as_bytes()).is_err() {
        return false;
    }

    // Read response - check for our specific service identifier
    let mut response = [0u8; 256];
    if stream.read(&mut response).is_err() {
        return false;
    }

    // Check for our specific response containing "mi6-otel"
    let response_str = String::from_utf8_lossy(&response);
    response_str.contains("mi6-otel")
}

/// Kill the mi6 OTel server running on the given port.
#[cfg(unix)]
fn kill_mi6_server(port: u16) -> Result<bool> {
    // Use lsof to find the PID of the process listening on this port
    let output = Command::new("lsof")
        .args(["-ti", &format!(":{}", port)])
        .output()
        .context("failed to run lsof")?;

    if output.status.success() {
        let pids = String::from_utf8_lossy(&output.stdout);
        let mut killed = false;
        for pid in pids.lines() {
            let pid = pid.trim();
            if !pid.is_empty() {
                // Use kill command to send SIGTERM
                let _ = Command::new("kill").args(["-TERM", pid]).output();
                killed = true;
            }
        }
        if killed {
            // Wait a moment for the process to terminate
            std::thread::sleep(Duration::from_millis(500));
            return Ok(true);
        }
    }
    Ok(false)
}

#[cfg(not(unix))]
fn kill_mi6_server(_port: u16) -> Result<bool> {
    anyhow::bail!("stop is not supported on this platform")
}

/// Stop the mi6 OTel server running on the given port.
pub fn stop_server(port: u16) -> Result<()> {
    if !is_server_running(port) {
        eprintln!("No server running on port {}", port);
        return Ok(());
    }

    if !is_mi6_server(port) {
        anyhow::bail!(
            "port {} is in use by another service, not a mi6 OTel server",
            port
        );
    }

    if kill_mi6_server(port)? {
        eprintln!("Stopped OTel server on port {}", port);
    }
    Ok(())
}

/// Status information for the OTel server.
#[derive(Debug)]
pub struct OtelServerStatus {
    /// Whether a server is running on the port
    pub running: bool,
    /// Whether the running server is a mi6 server
    pub is_mi6: bool,
    /// Port number
    pub port: u16,
    /// Path to the mi6 CLI binary (used for relay mode)
    pub cli_binary: Option<String>,
    /// Version of the CLI binary
    pub cli_version: Option<String>,
    /// Processing mode from config (note: running server may use different mode)
    pub mode: OtelMode,
}

impl OtelServerStatus {
    /// Get detailed status of the OTel server.
    pub fn get(port: u16) -> Self {
        let running = is_server_running(port);
        let is_mi6 = running && is_mi6_server(port);
        let config = mi6_core::Config::load().unwrap_or_default();
        let mode = config.otel.mode;

        // Get CLI binary path (look up "mi6" on PATH for relay mode)
        let cli_binary = which::which("mi6").ok().map(|p| p.display().to_string());

        // Get CLI version by running `mi6 --version`
        let cli_version = get_cli_version();

        Self {
            running,
            is_mi6,
            port,
            cli_binary,
            cli_version,
            mode,
        }
    }
}

/// Get the version of the mi6 CLI binary on PATH.
fn get_cli_version() -> Option<String> {
    let output = Command::new("mi6").arg("--version").output().ok()?;

    if output.status.success() {
        let stdout = String::from_utf8_lossy(&output.stdout);
        // Parse "mi6 0.1.0" -> "0.1.0"
        stdout.trim().strip_prefix("mi6 ").map(String::from)
    } else {
        None
    }
}

/// Get the status of the OTel server.
pub fn get_status(port: u16) -> Result<()> {
    let status = OtelServerStatus::get(port);

    if !status.running {
        eprintln!("OTel server is not running on port {}", port);
        return Ok(());
    }

    if !status.is_mi6 {
        eprintln!(
            "Port {} is in use by another service (not a mi6 OTel server)",
            port
        );
        return Ok(());
    }

    eprintln!("OTel Server Status");
    eprintln!("  Running:     yes");
    eprintln!("  Port:        {}", status.port);
    eprintln!("  Mode:        {} (from config)", status.mode);

    if let Some(ref path) = status.cli_binary {
        eprintln!("  CLI Binary:  {}", path);
        if let Some(ref v) = status.cli_version {
            eprintln!("  CLI Version: {}", v);
        }
    } else {
        eprintln!("  CLI Binary:  not found on PATH");
    }

    Ok(())
}

/// Ensure the OTel server is running, starting it if necessary.
///
/// # Arguments
/// * `port` - The port to run the server on
/// * `restart` - If true, restart the server even if it's already running
/// * `mode` - Processing mode (None uses config default)
///
/// # Returns
/// `Ok(true)` if the server is confirmed running, `Ok(false)` if it may still be starting.
pub fn ensure_running(port: u16, restart: bool, mode: Option<OtelMode>) -> Result<bool> {
    if is_server_running(port) {
        // Something is running on this port - check if it's our server
        if is_mi6_server(port) {
            if restart {
                // Kill and restart
                kill_mi6_server(port)?;
            } else {
                return Ok(true); // Our server is already running
            }
        } else {
            // Port is in use by something else
            anyhow::bail!(
                "port {} is in use by another service; \
                 cannot start mi6 OTel server; \
                 consider changing the port in your settings",
                port
            );
        }
    }

    // Find the binary path
    let binary_path = std::env::current_exe().context("failed to determine binary path")?;
    let port_str = port.to_string();
    let mode_str = mode.map(|m| m.to_string());

    // Platform-specific spawning
    #[cfg(unix)]
    {
        // Use nohup to make process immune to SIGHUP and parent death
        let mut cmd = Command::new("nohup");
        cmd.arg(&binary_path)
            .args(["otel", "run", "--port", &port_str]);

        if let Some(ref m) = mode_str {
            cmd.args(["--mode", m]);
        }

        cmd.stdin(Stdio::null())
            .stdout(Stdio::null())
            .stderr(Stdio::null());

        use std::os::unix::process::CommandExt;
        cmd.process_group(0);

        cmd.spawn().context("failed to spawn otel server")?;
    }

    #[cfg(not(unix))]
    {
        let mut cmd = Command::new(&binary_path);
        cmd.args(["otel", "run", "--port", &port_str]);

        if let Some(ref m) = mode_str {
            cmd.args(["--mode", m]);
        }

        cmd.stdin(Stdio::null())
            .stdout(Stdio::null())
            .stderr(Stdio::null())
            .spawn()
            .context("failed to spawn otel server")?;
    }

    // Wait briefly for server to start
    for _ in 0..10 {
        std::thread::sleep(Duration::from_millis(50));
        if is_server_running(port) {
            return Ok(true);
        }
    }

    // Server may still be starting - warn user but don't fail
    eprintln!(
        "Warning: OTel server not yet responding on port {}; it may still be starting",
        port
    );
    Ok(false)
}

/// Get the default OTel port.
pub fn default_port() -> u16 {
    DEFAULT_PORT
}

/// Find an available port starting from the given port.
///
/// Scans up to 100 ports from `start_port`, returning the first port that is
/// either not in use or already running a mi6 OTel server.
pub fn find_available_port(start_port: u16) -> u16 {
    for port in start_port..start_port + 100 {
        // Port is usable if nothing is running or if our server is already there
        if !is_server_running(port) || is_mi6_server(port) {
            return port;
        }
    }
    // Fallback to original port
    start_port
}