swarm-engine-core 0.1.6

//! ModelApplicator - 学習済みモデルの適用
//!
//! ## 概要
//!
//! TrainedModel を llama-server に適用する。
//! 現在のモデルの追跡と rollback もサポート。
//!
//! ## 設計
//!
//! - `ModelApplicator` trait: 適用インターフェース（async）
//! - `LlamaServerApplicator`: llama-server への適用実装
//!
//! ## 使用例
//!
//! ```ignore
//! use swarm_engine_core::learn::lora::{LlamaServerApplicator, ModelApplicator};
//!
//! let applicator = LlamaServerApplicator::new(config);
//! applicator.apply(&trained_model).await?;
//!
//! // rollback to previous
//! if let Some(prev_id) = applicator.previous_model_id() {
//!     applicator.rollback(&prev_id).await?;
//! }
//! ```

use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::sync::RwLock;
use std::time::Duration;

use async_trait::async_trait;
use tokio::process::Command;

use super::trainer::{LoraModelId, TrainedModel};

// ============================================================================
// ApplicatorError
// ============================================================================

/// Applicator エラー
#[derive(Debug)]
pub enum ApplicatorError {
    /// モデルが見つからない
    ModelNotFound(LoraModelId),
    /// アダプタファイルが見つからない
    AdapterNotFound(PathBuf),
    /// サーバー起動失敗
    ServerStartFailed(String),
    /// サーバー停止失敗
    ServerStopFailed(String),
    /// IO エラー
    Io(std::io::Error),
    /// 履歴が空
    NoHistory,
    /// その他
    Other(String),
}

impl std::fmt::Display for ApplicatorError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::ModelNotFound(id) => write!(f, "Model not found: {}", id),
            Self::AdapterNotFound(p) => write!(f, "Adapter not found: {}", p.display()),
            Self::ServerStartFailed(msg) => write!(f, "Server start failed: {}", msg),
            Self::ServerStopFailed(msg) => write!(f, "Server stop failed: {}", msg),
            Self::Io(e) => write!(f, "IO error: {}", e),
            Self::NoHistory => write!(f, "No model history for rollback"),
            Self::Other(msg) => write!(f, "{}", msg),
        }
    }
}

impl std::error::Error for ApplicatorError {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        match self {
            Self::Io(e) => Some(e),
            _ => None,
        }
    }
}

impl From<std::io::Error> for ApplicatorError {
    fn from(e: std::io::Error) -> Self {
        Self::Io(e)
    }
}

// ============================================================================
// ModelApplicator trait
// ============================================================================

/// モデル適用 trait（非同期）
///
/// サーバー起動/停止などの重い処理を非同期で実行するため、
/// `apply` と `rollback` は async メソッド。
#[async_trait]
pub trait ModelApplicator: Send + Sync {
    /// モデルを適用する（非同期）
    async fn apply(&self, model: &TrainedModel) -> Result<(), ApplicatorError>;

    /// 指定したモデルにロールバック（非同期）
    async fn rollback(&self, to: &LoraModelId) -> Result<(), ApplicatorError>;

    /// 現在適用中のモデル
    fn current(&self) -> Option<TrainedModel>;

    /// 前のモデル ID（rollback 用）
    fn previous_model_id(&self) -> Option<LoraModelId>;
}

// ============================================================================
// LlamaServerConfig
// ============================================================================

/// llama-server の設定
#[derive(Debug, Clone)]
pub struct LlamaServerConfig {
    /// ベースモデルのパス (GGUF)
    pub base_model_path: PathBuf,
    /// ホスト
    pub host: String,
    /// ポート
    pub port: u16,
    /// GPU レイヤー数
    pub n_gpu_layers: u32,
    /// コンテキストサイズ
    pub ctx_size: u32,
    /// 並列スロット数
    pub parallel: u32,
    /// PID ファイルパス
    pub pid_file: PathBuf,
    /// ログファイルパス
    pub log_file: PathBuf,
    /// llama-server コマンドパス
    pub server_path: String,
}

impl Default for LlamaServerConfig {
    fn default() -> Self {
        // Use ~/.local/share/swarm-engine for data files
        let data_dir = dirs::data_dir()
            .unwrap_or_else(|| PathBuf::from("."))
            .join("swarm-engine");

        Self {
            base_model_path: PathBuf::new(),
            host: "127.0.0.1".to_string(),
            port: 8080,
            n_gpu_layers: 99,
            ctx_size: 4096,
            parallel: 4,
            pid_file: data_dir.join("llama-server.pid"),
            log_file: data_dir.join("llama-server.log"),
            server_path: "llama-server".to_string(),
        }
    }
}

impl LlamaServerConfig {
    /// ベースモデルパスを設定
    pub fn base_model(mut self, path: impl Into<PathBuf>) -> Self {
        self.base_model_path = path.into();
        self
    }

    /// ホストを設定
    pub fn host(mut self, host: impl Into<String>) -> Self {
        self.host = host.into();
        self
    }

    /// ポートを設定
    pub fn port(mut self, port: u16) -> Self {
        self.port = port;
        self
    }

    /// GPU レイヤー数を設定
    pub fn n_gpu_layers(mut self, n: u32) -> Self {
        self.n_gpu_layers = n;
        self
    }

    /// 並列スロット数を設定
    pub fn parallel(mut self, n: u32) -> Self {
        self.parallel = n;
        self
    }

    /// コンテキストサイズを設定
    pub fn ctx_size(mut self, size: u32) -> Self {
        self.ctx_size = size;
        self
    }
}

// ============================================================================
// LlamaServerApplicator
// ============================================================================

/// llama-server へのモデル適用
pub struct LlamaServerApplicator {
    config: LlamaServerConfig,
    /// 現在適用中のモデル
    current_model: RwLock<Option<TrainedModel>>,
    /// 適用履歴（rollback 用）
    history: RwLock<Vec<TrainedModel>>,
}

impl LlamaServerApplicator {
    /// 新しい Applicator を作成
    pub fn new(config: LlamaServerConfig) -> Self {
        Self {
            config,
            current_model: RwLock::new(None),
            history: RwLock::new(Vec::new()),
        }
    }

    /// サーバーを停止（非同期）
    async fn stop_server(&self) -> Result<(), ApplicatorError> {
        if !self.config.pid_file.exists() {
            // Server not running - OK
            return Ok(());
        }

        let pid_str = tokio::fs::read_to_string(&self.config.pid_file).await?;
        let pid: u32 = pid_str
            .trim()
            .parse()
            .map_err(|_| ApplicatorError::ServerStopFailed("Invalid PID".to_string()))?;

        // Send SIGTERM
        let status = Command::new("kill").arg(pid.to_string()).status().await?;

        if !status.success() {
            // Process might already be dead - that's OK
            tracing::debug!(pid, "Process already stopped or kill failed");
        }

        // Wait a bit for graceful shutdown (non-blocking)
        tokio::time::sleep(Duration::from_millis(500)).await;

        // Remove PID file
        let _ = tokio::fs::remove_file(&self.config.pid_file).await;

        Ok(())
    }

    /// サーバーを起動（LoRA アダプタ付き、非同期）
    async fn start_server(&self, lora_path: Option<&Path>) -> Result<(), ApplicatorError> {
        // Ensure base model exists
        if !self.config.base_model_path.exists() {
            return Err(ApplicatorError::Other(format!(
                "Base model not found: {}",
                self.config.base_model_path.display()
            )));
        }

        // Verify LoRA adapter if specified
        if let Some(lora) = lora_path {
            if !lora.exists() {
                return Err(ApplicatorError::AdapterNotFound(lora.to_path_buf()));
            }
        }

        // Ensure data directory exists
        if let Some(parent) = self.config.pid_file.parent() {
            tokio::fs::create_dir_all(parent).await?;
        }

        // Build command
        let mut cmd = Command::new(&self.config.server_path);
        cmd.args([
            "-m",
            self.config.base_model_path.to_str().unwrap(),
            "--host",
            &self.config.host,
            "--port",
            &self.config.port.to_string(),
            "-ngl",
            &self.config.n_gpu_layers.to_string(),
            "-c",
            &self.config.ctx_size.to_string(),
            "-np",
            &self.config.parallel.to_string(),
            "--cont-batching",
        ]);

        // Add LoRA adapter
        if let Some(lora) = lora_path {
            cmd.args(["--lora", lora.to_str().unwrap()]);
        }

        // Redirect output to log file
        let log = std::fs::File::create(&self.config.log_file)?;
        let log_err = log.try_clone()?;

        cmd.stdout(Stdio::from(log));
        cmd.stderr(Stdio::from(log_err));

        // Spawn
        match cmd.spawn() {
            Ok(child) => {
                let pid = child.id();
                tokio::fs::write(&self.config.pid_file, pid.unwrap_or(0).to_string()).await?;

                tracing::info!(
                    pid = pid.unwrap_or(0),
                    endpoint = format!("http://{}:{}", self.config.host, self.config.port),
                    lora = ?lora_path,
                    "llama-server started"
                );

                // Wait for server to be ready
                self.wait_for_ready().await?;

                Ok(())
            }
            Err(e) => Err(ApplicatorError::ServerStartFailed(e.to_string())),
        }
    }

    /// サーバーが ready になるまで待機（非同期）
    async fn wait_for_ready(&self) -> Result<(), ApplicatorError> {
        let max_attempts = 30;
        let delay = Duration::from_millis(500);
        let addr = format!("{}:{}", self.config.host, self.config.port);

        for attempt in 1..=max_attempts {
            tokio::time::sleep(delay).await;

            // Try to connect (non-blocking TCP check)
            match tokio::net::TcpStream::connect(&addr).await {
                Ok(_) => {
                    tracing::debug!(attempt, "llama-server is ready");
                    return Ok(());
                }
                Err(_) => {
                    tracing::trace!(attempt, "Waiting for llama-server...");
                }
            }
        }

        Err(ApplicatorError::ServerStartFailed(
            "Timeout waiting for server to be ready".to_string(),
        ))
    }

    /// 履歴からモデルを探す
    fn find_in_history(&self, id: &LoraModelId) -> Option<TrainedModel> {
        let history = self.history.read().unwrap();
        history.iter().find(|m| &m.id == id).cloned()
    }
}

#[async_trait]
impl ModelApplicator for LlamaServerApplicator {
    async fn apply(&self, model: &TrainedModel) -> Result<(), ApplicatorError> {
        tracing::info!(
            model_id = %model.id,
            adapter = %model.adapter_path.display(),
            "Applying model"
        );

        // Capture current model BEFORE any operations (for potential rollback)
        let previous_model = self.current();

        // Stop current server first - if this fails, don't modify history
        self.stop_server().await?;

        // Start with new LoRA adapter
        if let Err(e) = self.start_server(Some(&model.adapter_path)).await {
            // Server start failed - try to restore previous state if possible
            if let Some(ref prev) = previous_model {
                tracing::warn!(
                    model_id = %prev.id,
                    "Apply failed, attempting to restore previous model"
                );
                // Best effort restore - ignore errors here
                let _ = self.start_server(Some(&prev.adapter_path)).await;
            }
            return Err(e);
        }

        // Only update history and current model after successful operations
        if let Some(prev) = previous_model {
            self.history.write().unwrap().push(prev);
        }

        // Update current model
        *self.current_model.write().unwrap() = Some(model.clone());

        tracing::info!(model_id = %model.id, "Model applied successfully");
        Ok(())
    }

    async fn rollback(&self, to: &LoraModelId) -> Result<(), ApplicatorError> {
        tracing::info!(target_id = %to, "Rolling back model");

        // Find model in history
        let model = self
            .find_in_history(to)
            .ok_or_else(|| ApplicatorError::ModelNotFound(to.clone()))?;

        // Stop current server
        self.stop_server().await?;

        // Start with rollback model
        self.start_server(Some(&model.adapter_path)).await?;

        // Update current model (don't add to history for rollback)
        *self.current_model.write().unwrap() = Some(model);

        tracing::info!(target_id = %to, "Rollback completed");
        Ok(())
    }

    fn current(&self) -> Option<TrainedModel> {
        self.current_model.read().unwrap().clone()
    }

    fn previous_model_id(&self) -> Option<LoraModelId> {
        let history = self.history.read().unwrap();
        history.last().map(|m| m.id.clone())
    }
}

// ============================================================================
// NoOpApplicator (for testing)
// ============================================================================

/// 何もしない Applicator（テスト用）
pub struct NoOpApplicator {
    current_model: RwLock<Option<TrainedModel>>,
    history: RwLock<Vec<TrainedModel>>,
}

impl NoOpApplicator {
    pub fn new() -> Self {
        Self {
            current_model: RwLock::new(None),
            history: RwLock::new(Vec::new()),
        }
    }
}

impl Default for NoOpApplicator {
    fn default() -> Self {
        Self::new()
    }
}

#[async_trait]
impl ModelApplicator for NoOpApplicator {
    async fn apply(&self, model: &TrainedModel) -> Result<(), ApplicatorError> {
        if let Some(current) = self.current() {
            self.history.write().unwrap().push(current);
        }
        *self.current_model.write().unwrap() = Some(model.clone());
        Ok(())
    }

    async fn rollback(&self, to: &LoraModelId) -> Result<(), ApplicatorError> {
        let history = self.history.read().unwrap();
        let model = history
            .iter()
            .find(|m| &m.id == to)
            .cloned()
            .ok_or_else(|| ApplicatorError::ModelNotFound(to.clone()))?;
        drop(history);

        *self.current_model.write().unwrap() = Some(model);
        Ok(())
    }

    fn current(&self) -> Option<TrainedModel> {
        self.current_model.read().unwrap().clone()
    }

    fn previous_model_id(&self) -> Option<LoraModelId> {
        let history = self.history.read().unwrap();
        history.last().map(|m| m.id.clone())
    }
}

// ============================================================================
// Tests
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;

    fn create_test_model(id: &str, adapter: &str) -> TrainedModel {
        TrainedModel {
            id: LoraModelId::parse(id),
            base_model: "test-base".to_string(),
            adapter_path: PathBuf::from(adapter),
            learn_model_name: "test-learn".to_string(),
            episode_ids: vec![],
            sample_count: 10,
            created_at: 0,
            metrics: None,
        }
    }

    #[tokio::test]
    async fn test_noop_applicator_apply() {
        let applicator = NoOpApplicator::new();
        let model = create_test_model("model-1", "/path/to/adapter1");

        assert!(applicator.current().is_none());
        applicator.apply(&model).await.unwrap();
        assert_eq!(applicator.current().unwrap().id.as_str(), "model-1");
    }

    #[tokio::test]
    async fn test_noop_applicator_history() {
        let applicator = NoOpApplicator::new();
        let model1 = create_test_model("model-1", "/path/to/adapter1");
        let model2 = create_test_model("model-2", "/path/to/adapter2");

        applicator.apply(&model1).await.unwrap();
        assert!(applicator.previous_model_id().is_none());

        applicator.apply(&model2).await.unwrap();
        assert_eq!(applicator.previous_model_id().unwrap().as_str(), "model-1");
        assert_eq!(applicator.current().unwrap().id.as_str(), "model-2");
    }

    #[tokio::test]
    async fn test_noop_applicator_rollback() {
        let applicator = NoOpApplicator::new();
        let model1 = create_test_model("model-1", "/path/to/adapter1");
        let model2 = create_test_model("model-2", "/path/to/adapter2");

        applicator.apply(&model1).await.unwrap();
        applicator.apply(&model2).await.unwrap();

        // Rollback to model-1
        applicator
            .rollback(&LoraModelId::parse("model-1"))
            .await
            .unwrap();
        assert_eq!(applicator.current().unwrap().id.as_str(), "model-1");
    }

    #[tokio::test]
    async fn test_rollback_not_found() {
        let applicator = NoOpApplicator::new();
        let model = create_test_model("model-1", "/path/to/adapter");

        applicator.apply(&model).await.unwrap();

        let result = applicator
            .rollback(&LoraModelId::parse("nonexistent"))
            .await;
        assert!(matches!(result, Err(ApplicatorError::ModelNotFound(_))));
    }

    #[test]
    fn test_llama_server_config_builder() {
        let config = LlamaServerConfig::default()
            .base_model("/path/to/model.gguf")
            .host("0.0.0.0")
            .port(8081)
            .n_gpu_layers(50)
            .parallel(8);

        assert_eq!(config.base_model_path, PathBuf::from("/path/to/model.gguf"));
        assert_eq!(config.host, "0.0.0.0");
        assert_eq!(config.port, 8081);
        assert_eq!(config.n_gpu_layers, 50);
        assert_eq!(config.parallel, 8);
    }
}