swarm-engine-llm 0.1.6

//! LLM Decider - Action 選択のための LLM 抽象
//!
//! 軽量LLM（Qwen2.5-Coder 1.5B等）による高速な Action 選択
//!
//! # 概念
//!
//! - [`LlmDecider`]: LLM への問い合わせ抽象（非同期、バッチ対応）
//!
//! # 型の統一
//!
//! LLM層はCore層の型を直接使用:
//! - `WorkerDecisionRequest` - リクエスト
//! - `DecisionResponse` - レスポンス

use std::future::Future;
use std::pin::Pin;

/// バッチ決定の戻り値型（clippy::type_complexity 対策）
pub type BatchDecisionFuture<'a> =
    Pin<Box<dyn Future<Output = Vec<Result<DecisionResponse, LlmError>>> + Send + 'a>>;

// Core の型を再エクスポート
pub use swarm_engine_core::agent::{
    ActionCandidate, ActionParam, DecisionResponse, ResolvedContext, WorkerDecisionRequest,
};
pub use swarm_engine_core::types::LoraConfig;

/// LLM エラー
#[derive(Debug, Clone, thiserror::Error)]
pub enum LlmError {
    /// 一時的エラー（リトライ可能）
    #[error("LLM error (transient): {0}")]
    Transient(String),

    /// 恒久的エラー（リトライ不可）
    #[error("LLM error: {0}")]
    Permanent(String),
}

impl LlmError {
    pub fn transient(message: impl Into<String>) -> Self {
        Self::Transient(message.into())
    }

    pub fn permanent(message: impl Into<String>) -> Self {
        Self::Permanent(message.into())
    }

    pub fn is_transient(&self) -> bool {
        matches!(self, Self::Transient(_))
    }

    pub fn message(&self) -> &str {
        match self {
            Self::Transient(msg) => msg,
            Self::Permanent(msg) => msg,
        }
    }
}

impl From<swarm_engine_core::error::SwarmError> for LlmError {
    fn from(err: swarm_engine_core::error::SwarmError) -> Self {
        if err.is_transient() {
            Self::Transient(err.message())
        } else {
            Self::Permanent(err.message())
        }
    }
}

impl From<LlmError> for swarm_engine_core::error::SwarmError {
    fn from(err: LlmError) -> Self {
        match err {
            LlmError::Transient(message) => {
                swarm_engine_core::error::SwarmError::LlmTransient { message }
            }
            LlmError::Permanent(message) => {
                swarm_engine_core::error::SwarmError::LlmPermanent { message }
            }
        }
    }
}

/// LLM Decider trait
///
/// Core の `WorkerDecisionRequest` を受け取り、`DecisionResponse` を返す。
pub trait LlmDecider: Send + Sync {
    /// 単一の決定
    fn decide(
        &self,
        request: WorkerDecisionRequest,
    ) -> Pin<Box<dyn Future<Output = Result<DecisionResponse, LlmError>> + Send + '_>>;

    /// 生のプロンプトを送信し、生のレスポンスを取得
    ///
    /// DependencyGraph 生成など、Action 選択以外の用途に使用。
    /// デフォルト実装はエラーを返す（未対応）。
    ///
    /// # Arguments
    /// * `prompt` - 送信するプロンプト
    /// * `lora` - LoRA 設定（None の場合はベースモデルのみ）
    fn call_raw(
        &self,
        _prompt: &str,
        _lora: Option<&LoraConfig>,
    ) -> Pin<Box<dyn Future<Output = Result<String, LlmError>> + Send + '_>> {
        Box::pin(async { Err(LlmError::permanent("call_raw not implemented")) })
    }

    /// バッチ決定（100+ Agent 対応）
    fn decide_batch(&self, requests: Vec<WorkerDecisionRequest>) -> BatchDecisionFuture<'_> {
        // デフォルト実装: 順次処理
        Box::pin(async move {
            let mut results = Vec::with_capacity(requests.len());
            for req in requests {
                results.push(self.decide(req).await);
            }
            results
        })
    }

    /// モデル名
    fn model_name(&self) -> &str;

    /// エンドポイント
    fn endpoint(&self) -> &str {
        "unknown"
    }

    /// ヘルスチェック
    fn is_healthy(&self) -> Pin<Box<dyn Future<Output = bool> + Send + '_>>;

    /// 最大同時実行数を取得（サーバーのスロット数等）
    ///
    /// デフォルトはNone（無制限）。
    /// 実装側でサーバーに問い合わせてスロット数を返すことができる。
    fn max_concurrency(&self) -> Pin<Box<dyn Future<Output = Option<usize>> + Send + '_>> {
        Box::pin(async { None })
    }
}

/// Decider 設定
#[derive(Debug, Clone)]
pub struct LlmDeciderConfig {
    /// モデル名
    pub model: String,
    /// エンドポイント
    pub endpoint: String,
    /// タイムアウト（ミリ秒）
    pub timeout_ms: u64,
    /// 最大バッチサイズ
    pub max_batch_size: usize,
    /// Temperature
    pub temperature: f32,
    /// カスタムシステムプロンプト（テンプレート変数: {query}, {candidates}, {world_state}）
    pub system_prompt: Option<String>,
}

impl Default for LlmDeciderConfig {
    fn default() -> Self {
        Self {
            model: "qwen2.5-coder:1.5b".to_string(),
            endpoint: "http://localhost:11434".to_string(),
            timeout_ms: 5000,
            max_batch_size: 100,
            temperature: 0.1,
            system_prompt: None,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_llm_error_transient() {
        let err = LlmError::transient("connection timeout");
        assert!(err.is_transient());
        assert_eq!(err.message(), "connection timeout");
        assert_eq!(
            format!("{}", err),
            "LLM error (transient): connection timeout"
        );
    }

    #[test]
    fn test_llm_error_permanent() {
        let err = LlmError::permanent("invalid model");
        assert!(!err.is_transient());
        assert_eq!(err.message(), "invalid model");
    }

    #[test]
    fn test_llm_decider_config_default() {
        let config = LlmDeciderConfig::default();
        assert_eq!(config.model, "qwen2.5-coder:1.5b");
        assert_eq!(config.endpoint, "http://localhost:11434");
        assert_eq!(config.timeout_ms, 5000);
        assert_eq!(config.max_batch_size, 100);
        assert!((config.temperature - 0.1).abs() < 0.001);
    }
}