swarm-engine-core 0.1.6

//! LearnedComponent - 学習結果の型安全な抽象化
//!
//! ## 設計思想
//!
//! ComponentLearner（学習プロセス）と LearnedComponent（学習結果）をペアで定義。
//! 各学習対象ごとに専用の型を持ち、Map/Any を使わず型安全性を確保する。
//!
//! ## 背景
//!
//! - 従来の ML: 口調、好み程度 → Map でも許容
//! - Swarm Learning: Control の知識そのものが Domain → Typed 必須
//!
//! ## 使用例
//!
//! ```ignore
//! // ComponentLearner と LearnedComponent はペアで定義
//! struct DepGraphLearner;
//!
//! impl ComponentLearner for DepGraphLearner {
//!     type Output = LearnedDepGraph;
//!
//!     fn learn(&self, episodes: &[Episode]) -> Result<Self::Output, LearnError> {
//!         // Episodes から LearnedDepGraph を生成
//!     }
//! }
//!
//! impl LearnedComponent for LearnedDepGraph {
//!     fn component_id() -> &'static str { "dep_graph" }
//!     // ...
//! }
//! ```
//!
//! ## LearnModel vs ComponentLearner
//!
//! - `LearnModel`: Episode → TrainingData (LoRA fine-tuning 用)
//! - `ComponentLearner`: Episodes → LearnedComponent (ScenarioProfile 用)

use serde::{de::DeserializeOwned, Serialize};

use super::episode::Episode;
use super::learn_model::LearnError;

// ============================================================================
// ComponentLearner Trait
// ============================================================================

/// ScenarioProfile コンポーネントの学習プロセス
///
/// Episode の集合から LearnedComponent を生成する。
/// LearnModel（LoRA用）とは異なり、ScenarioProfile の各コンポーネントを
/// 型安全に学習する。
pub trait ComponentLearner: Send + Sync {
    /// 学習結果の型
    type Output: LearnedComponent;

    /// 学習器の名前
    fn name(&self) -> &str;

    /// 目的の説明
    fn objective(&self) -> &str;

    /// Episode から学習結果を生成
    fn learn(&self, episodes: &[Episode]) -> Result<Self::Output, LearnError>;

    /// 既存のコンポーネントを更新（増分学習）
    fn update(
        &self,
        existing: &Self::Output,
        new_episodes: &[Episode],
    ) -> Result<Self::Output, LearnError> {
        let mut learned = self.learn(new_episodes)?;
        learned.merge(existing);
        Ok(learned)
    }
}

// ============================================================================
// LearnedComponent Trait
// ============================================================================

/// 学習結果コンポーネントの共通 trait
///
/// 各学習対象（DepGraph, Strategy, Exploration 等）の学習結果が実装する。
/// Typed で管理することで、型安全性と IDE サポートを確保。
pub trait LearnedComponent: Send + Sync + Serialize + DeserializeOwned + Clone {
    /// コンポーネント識別子（ファイル名等に使用）
    fn component_id() -> &'static str
    where
        Self: Sized;

    /// 信頼度スコア (0.0 - 1.0)
    ///
    /// 学習データ量や品質に基づく信頼度。
    /// 低い場合は Bootstrap 追加実行を検討。
    fn confidence(&self) -> f64;

    /// 学習に使用したセッション数
    fn session_count(&self) -> usize;

    /// 最終更新タイムスタンプ (Unix epoch seconds)
    fn updated_at(&self) -> u64;

    /// 他のコンポーネントとマージ（増分学習用）
    ///
    /// デフォルト実装: 信頼度の高い方を優先
    fn merge(&mut self, other: &Self)
    where
        Self: Sized,
    {
        if other.confidence() > self.confidence() {
            *self = other.clone();
        }
    }

    /// バージョン番号（互換性チェック用）
    fn version() -> u32
    where
        Self: Sized,
    {
        1
    }
}

// ============================================================================
// LearnedDepGraph - 学習済み依存グラフ
// ============================================================================

use crate::exploration::DependencyGraph;

// Re-use existing RecommendedPath from offline module
pub use super::offline::RecommendedPath;

/// 学習済み依存グラフ
///
/// Bootstrap フェーズで正解グラフから学習し、
/// Release フェーズで LLM なしで即座にアクション順序を決定。
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct LearnedDepGraph {
    /// 依存グラフ本体
    pub graph: DependencyGraph,

    /// 学習済みアクション順序（トポロジカルソート済み）
    /// Deprecated: discover_order + not_discover_order を使用
    pub action_order: Vec<String>,

    /// Discover アクションの順序（NodeExpand 系）
    #[serde(default)]
    pub discover_order: Vec<String>,

    /// Not-Discover アクションの順序（NodeStateChange 系）
    #[serde(default)]
    pub not_discover_order: Vec<String>,

    /// 推奨パス（成功率順）
    #[serde(default)]
    pub recommended_paths: Vec<RecommendedPath>,

    /// 信頼度 (0.0 - 1.0)
    pub confidence: f64,

    /// 学習に使用したセッション ID
    pub learned_from: Vec<String>,

    /// 最終更新タイムスタンプ
    pub updated_at: u64,
}

impl LearnedDepGraph {
    /// 新規作成
    pub fn new(graph: DependencyGraph, action_order: Vec<String>) -> Self {
        Self {
            graph,
            action_order,
            discover_order: Vec::new(),
            not_discover_order: Vec::new(),
            recommended_paths: Vec::new(),
            confidence: 0.0,
            learned_from: Vec::new(),
            updated_at: std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .map(|d| d.as_secs())
                .unwrap_or(0),
        }
    }

    /// discover/not_discover を個別に設定して作成
    pub fn with_orders(
        graph: DependencyGraph,
        discover_order: Vec<String>,
        not_discover_order: Vec<String>,
    ) -> Self {
        let mut all_actions = discover_order.clone();
        all_actions.extend(not_discover_order.clone());
        Self {
            graph,
            action_order: all_actions,
            discover_order,
            not_discover_order,
            recommended_paths: Vec::new(),
            confidence: 0.0,
            learned_from: Vec::new(),
            updated_at: std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .map(|d| d.as_secs())
                .unwrap_or(0),
        }
    }

    /// 信頼度を設定
    pub fn with_confidence(mut self, confidence: f64) -> Self {
        self.confidence = confidence;
        self
    }

    /// 学習元セッションを追加
    pub fn with_sessions(mut self, session_ids: Vec<String>) -> Self {
        self.learned_from = session_ids;
        self
    }

    /// 推奨パスを追加
    pub fn with_recommended_paths(mut self, paths: Vec<RecommendedPath>) -> Self {
        self.recommended_paths = paths;
        self
    }
}

impl LearnedComponent for LearnedDepGraph {
    fn component_id() -> &'static str {
        "dep_graph"
    }

    fn confidence(&self) -> f64 {
        self.confidence
    }

    fn session_count(&self) -> usize {
        self.learned_from.len()
    }

    fn updated_at(&self) -> u64 {
        self.updated_at
    }

    fn merge(&mut self, other: &Self) {
        // セッション数と信頼度を考慮してマージ
        if other.learned_from.len() > self.learned_from.len() || other.confidence > self.confidence
        {
            self.graph = other.graph.clone();
            self.action_order = other.action_order.clone();
            self.confidence = other.confidence;
        }
        // セッション ID は結合
        for id in &other.learned_from {
            if !self.learned_from.contains(id) {
                self.learned_from.push(id.clone());
            }
        }
        // 推奨パスはマージ
        for path in &other.recommended_paths {
            if !self
                .recommended_paths
                .iter()
                .any(|p| p.actions == path.actions)
            {
                self.recommended_paths.push(path.clone());
            }
        }
        self.updated_at = other.updated_at.max(self.updated_at);
    }
}

// ============================================================================
// LearnedExploration - 学習済み探索パラメータ
// ============================================================================

/// 学習済み探索パラメータ
///
/// UCB1 の探索係数、学習重みなど、探索戦略のパラメータ。
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct LearnedExploration {
    /// UCB1 探索係数
    pub ucb1_c: f64,

    /// 学習重み (0.0 - 1.0)
    pub learning_weight: f64,

    /// N-gram 重み
    pub ngram_weight: f64,

    /// 信頼度
    pub confidence: f64,

    /// 学習セッション数
    pub session_count: usize,

    /// 最終更新
    pub updated_at: u64,
}

impl Default for LearnedExploration {
    fn default() -> Self {
        Self {
            ucb1_c: 1.414,
            learning_weight: 0.3,
            ngram_weight: 1.0,
            confidence: 0.0,
            session_count: 0,
            updated_at: 0,
        }
    }
}

impl LearnedExploration {
    /// 新規作成
    pub fn new(ucb1_c: f64, learning_weight: f64, ngram_weight: f64) -> Self {
        Self {
            ucb1_c,
            learning_weight,
            ngram_weight,
            confidence: 0.0,
            session_count: 0,
            updated_at: std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .map(|d| d.as_secs())
                .unwrap_or(0),
        }
    }
}

impl LearnedComponent for LearnedExploration {
    fn component_id() -> &'static str {
        "exploration"
    }

    fn confidence(&self) -> f64 {
        self.confidence
    }

    fn session_count(&self) -> usize {
        self.session_count
    }

    fn updated_at(&self) -> u64 {
        self.updated_at
    }
}

// ============================================================================
// LearnedStrategy - 学習済み戦略設定
// ============================================================================

/// 学習済み戦略設定
///
/// 初期戦略の選択、戦略切り替えの閾値など。
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct LearnedStrategy {
    /// 初期選択戦略
    pub initial_strategy: String,

    /// 成熟度閾値（何回実行後に戦略切り替えを検討するか）
    pub maturity_threshold: usize,

    /// エラー率閾値（これを超えたら戦略切り替え）
    pub error_rate_threshold: f64,

    /// 信頼度
    pub confidence: f64,

    /// 学習セッション数
    pub session_count: usize,

    /// 最終更新
    pub updated_at: u64,
}

impl Default for LearnedStrategy {
    fn default() -> Self {
        Self {
            initial_strategy: "ucb1".to_string(),
            maturity_threshold: 5,
            error_rate_threshold: 0.45,
            confidence: 0.0,
            session_count: 0,
            updated_at: 0,
        }
    }
}

impl LearnedComponent for LearnedStrategy {
    fn component_id() -> &'static str {
        "strategy"
    }

    fn confidence(&self) -> f64 {
        self.confidence
    }

    fn session_count(&self) -> usize {
        self.session_count
    }

    fn updated_at(&self) -> u64 {
        self.updated_at
    }
}

// ============================================================================
// Tests
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;
    use crate::exploration::DependencyGraph;

    #[test]
    fn test_learned_dep_graph_creation() {
        let graph = DependencyGraph::new();
        let learned = LearnedDepGraph::new(graph, vec!["A".to_string(), "B".to_string()])
            .with_confidence(0.8)
            .with_sessions(vec!["s1".to_string(), "s2".to_string()]);

        assert_eq!(learned.confidence(), 0.8);
        assert_eq!(learned.session_count(), 2);
        assert_eq!(LearnedDepGraph::component_id(), "dep_graph");
    }

    #[test]
    fn test_learned_dep_graph_merge() {
        let graph = DependencyGraph::new();
        let mut learned1 = LearnedDepGraph::new(graph.clone(), vec!["A".to_string()])
            .with_confidence(0.5)
            .with_sessions(vec!["s1".to_string()]);

        let learned2 = LearnedDepGraph::new(graph, vec!["A".to_string(), "B".to_string()])
            .with_confidence(0.8)
            .with_sessions(vec!["s2".to_string(), "s3".to_string()]);

        learned1.merge(&learned2);

        // Higher confidence wins for graph/order
        assert_eq!(learned1.confidence, 0.8);
        assert_eq!(learned1.action_order.len(), 2);
        // Sessions are combined
        assert_eq!(learned1.learned_from.len(), 3);
    }

    #[test]
    fn test_learned_exploration_default() {
        let exploration = LearnedExploration::default();
        assert_eq!(exploration.ucb1_c, 1.414);
        assert_eq!(LearnedExploration::component_id(), "exploration");
    }

    #[test]
    fn test_learned_strategy_default() {
        let strategy = LearnedStrategy::default();
        assert_eq!(strategy.initial_strategy, "ucb1");
        assert_eq!(LearnedStrategy::component_id(), "strategy");
    }

    #[test]
    fn test_serialization() {
        let exploration = LearnedExploration::new(2.0, 0.5, 1.5);
        let json = serde_json::to_string(&exploration).unwrap();
        let restored: LearnedExploration = serde_json::from_str(&json).unwrap();
        assert_eq!(restored.ucb1_c, 2.0);
    }
}