swarm-engine-core 0.1.6

//! Offline Learning - セッション間学習の分析・最適化
//!
//! 複数セッションの統計データを分析し、最適なパラメータや方針を導出する。
//!
//! # アーキテクチャ
//!
//! ```text
//! LearningStore (sessions/*.json)
//!      ↓
//! OfflineAnalyzer
//!  ├── analyze_parameters() → OptimalParameters
//!  ├── extract_paths() → RecommendedPaths
//!  └── evaluate_strategies() → StrategyConfig
//!      ↓
//! OfflineModel (保存)
//!      ↓
//! 次回セッション開始時に読み込み → Orchestrator/Provider に反映
//! ```
//!
//! # 使用例
//!
//! ```ignore
//! use swarm_engine_core::learn::{LearningStore, OfflineAnalyzer, OfflineModel};
//!
//! // 履歴データを分析
//! let store = LearningStore::new("./learning")?;
//! let snapshots = store.query_latest("my-scenario", 10)?;
//! let analyzer = OfflineAnalyzer::new(&snapshots);
//!
//! // 最適パラメータを算出
//! let model = analyzer.analyze();
//!
//! // 保存
//! store.save_offline_model("my-scenario", &model)?;
//!
//! // 次回セッションで読み込み
//! let model = store.load_offline_model("my-scenario")?;
//! builder.with_offline_model(model)
//! ```

use std::collections::HashMap;

use serde::{Deserialize, Serialize};

use super::snapshot::LearningSnapshot;

/// Offline 学習モデル
///
/// 複数セッションの分析結果を保持し、次回セッションに適用する。
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OfflineModel {
    /// モデルバージョン
    pub version: u32,
    /// 最適化されたパラメータ
    pub parameters: OptimalParameters,
    /// 推奨アクションパス（成功率順）
    pub recommended_paths: Vec<RecommendedPath>,
    /// Selection 戦略設定
    pub strategy_config: StrategyConfig,
    /// 分析に使用したセッション数
    pub analyzed_sessions: usize,
    /// 最終更新タイムスタンプ
    pub updated_at: u64,
    /// 学習済みアクション順序（DependencyGraph キャッシュ用）
    #[serde(default)]
    pub action_order: Option<LearnedActionOrder>,
}

/// 学習済みアクション順序
///
/// DependencyGraph を LLM なしで即座に構築するためのキャッシュ。
/// 同じアクション集合であれば、LLM を呼ばずにグラフを生成できる。
///
/// # フィールド
///
/// - `discover` / `not_discover`: アクション実行順序
/// - `action_set_hash`: アクション集合の識別子（マッチング用）
/// - `lora`: このエントリに関連付けられた LoRA 設定
/// - `validated_accuracy`: 検証済み精度（オプション）
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LearnedActionOrder {
    /// Discover（NodeExpand）アクションの順序
    pub discover: Vec<String>,
    /// NotDiscover（NodeStateChange）アクションの順序
    pub not_discover: Vec<String>,
    /// アクション集合のハッシュ（キャッシュヒット判定用）
    ///
    /// ハッシュが一致すれば同じアクション集合とみなす。
    pub action_set_hash: u64,
    /// 生成元の情報（デバッグ用）
    #[serde(default)]
    pub source: ActionOrderSource,
    /// 対応する LoRA 設定（オプション）
    #[serde(default)]
    pub lora: Option<crate::types::LoraConfig>,
    /// 検証済み精度（オプション）
    #[serde(default)]
    pub validated_accuracy: Option<f64>,
}

/// アクション順序の生成元
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub enum ActionOrderSource {
    /// LLM により生成
    #[default]
    Llm,
    /// 静的パターン
    Static,
    /// ユーザー定義
    Manual,
}

impl LearnedActionOrder {
    /// 新しい LearnedActionOrder を作成
    pub fn new(discover: Vec<String>, not_discover: Vec<String>, actions: &[String]) -> Self {
        Self {
            discover,
            not_discover,
            action_set_hash: Self::compute_hash(actions),
            source: ActionOrderSource::Llm,
            lora: None,
            validated_accuracy: None,
        }
    }

    /// LoRA 設定を追加
    pub fn with_lora(mut self, lora: crate::types::LoraConfig) -> Self {
        self.lora = Some(lora);
        self
    }

    /// 検証済み精度を設定
    pub fn with_accuracy(mut self, accuracy: f64) -> Self {
        self.validated_accuracy = Some(accuracy);
        self
    }

    /// 生成元を設定
    pub fn with_source(mut self, source: ActionOrderSource) -> Self {
        self.source = source;
        self
    }

    /// アクション集合のハッシュを計算
    ///
    /// アクション名をソートしてハッシュすることで、順序に依存しないハッシュを生成。
    pub fn compute_hash(actions: &[String]) -> u64 {
        use std::collections::hash_map::DefaultHasher;
        use std::hash::{Hash, Hasher};

        let mut sorted: Vec<&str> = actions.iter().map(|s| s.as_str()).collect();
        sorted.sort();

        let mut hasher = DefaultHasher::new();
        for action in sorted {
            action.hash(&mut hasher);
        }
        hasher.finish()
    }

    /// アクション集合が完全一致するか判定（ハッシュで判定）
    ///
    /// Note: `matches_actions` は `is_exact_match` のエイリアス（後方互換性）
    pub fn is_exact_match(&self, actions: &[String]) -> bool {
        self.action_set_hash == Self::compute_hash(actions)
    }

    /// `is_exact_match` のエイリアス（後方互換性のため維持）
    #[inline]
    pub fn matches_actions(&self, actions: &[String]) -> bool {
        self.is_exact_match(actions)
    }

    /// 一致率を計算（Jaccard 係数）
    pub fn match_rate(&self, actions: &[String]) -> f64 {
        use std::collections::HashSet;

        let mut self_actions: Vec<String> = self.discover.clone();
        self_actions.extend(self.not_discover.clone());

        if self_actions.is_empty() && actions.is_empty() {
            return 1.0;
        }
        if self_actions.is_empty() || actions.is_empty() {
            return 0.0;
        }

        let self_set: HashSet<_> = self_actions.iter().collect();
        let other_set: HashSet<_> = actions.iter().collect();

        let intersection = self_set.intersection(&other_set).count();
        let union = self_set.union(&other_set).count();

        intersection as f64 / union as f64
    }
}

/// 最適化されたパラメータ
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimalParameters {
    /// UCB1 の探索係数
    pub ucb1_c: f64,
    /// 学習ボーナス係数
    pub learning_weight: f64,
    /// N-gram ボーナス係数（trigram の重み）
    pub ngram_weight: f64,
}

impl Default for OptimalParameters {
    fn default() -> Self {
        Self {
            ucb1_c: std::f64::consts::SQRT_2,
            learning_weight: 0.3,
            ngram_weight: 1.0,
        }
    }
}

/// 推奨アクションパス
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecommendedPath {
    /// アクションシーケンス
    pub actions: Vec<String>,
    /// 成功率
    pub success_rate: f64,
    /// 観測回数
    pub observations: u32,
}

/// Selection 戦略設定
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StrategyConfig {
    /// 成熟判定の閾値（これ以上の訪問で成熟）
    pub maturity_threshold: u32,
    /// エラー率の閾値（これ以上なら Thompson）
    pub error_rate_threshold: f64,
    /// 推奨初期戦略
    pub initial_strategy: String,
}

impl Default for StrategyConfig {
    fn default() -> Self {
        Self {
            maturity_threshold: 10,
            error_rate_threshold: 0.3,
            initial_strategy: "ucb1".to_string(),
        }
    }
}

impl Default for OfflineModel {
    fn default() -> Self {
        Self {
            version: 1,
            parameters: OptimalParameters::default(),
            recommended_paths: Vec::new(),
            strategy_config: StrategyConfig::default(),
            analyzed_sessions: 0,
            updated_at: 0,
            action_order: None,
        }
    }
}

/// Offline 分析器
///
/// 複数の LearningSnapshot を分析し、最適なパラメータや方針を導出する。
pub struct OfflineAnalyzer<'a> {
    snapshots: &'a [LearningSnapshot],
}

impl<'a> OfflineAnalyzer<'a> {
    /// 新しい分析器を作成
    pub fn new(snapshots: &'a [LearningSnapshot]) -> Self {
        Self { snapshots }
    }

    /// 全ての分析を実行して OfflineModel を生成
    pub fn analyze(&self) -> OfflineModel {
        let now = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .map(|d| d.as_secs())
            .unwrap_or(0);

        OfflineModel {
            version: 1,
            parameters: self.analyze_parameters(),
            recommended_paths: self.extract_paths(),
            strategy_config: self.analyze_strategy(),
            analyzed_sessions: self.snapshots.len(),
            updated_at: now,
            action_order: None, // 別途設定される
        }
    }

    /// パラメータ最適化
    ///
    /// 履歴データから最適な UCB1 c, learning_weight 等を算出。
    /// 現在は統計ベースのヒューリスティックを使用。
    pub fn analyze_parameters(&self) -> OptimalParameters {
        if self.snapshots.is_empty() {
            return OptimalParameters::default();
        }

        // 成功率を計算
        let (total_success, total_failure) = self.snapshots.iter().fold((0u32, 0u32), |acc, s| {
            (
                acc.0 + s.episode_transitions.success_episodes,
                acc.1 + s.episode_transitions.failure_episodes,
            )
        });

        let success_rate = if total_success + total_failure > 0 {
            total_success as f64 / (total_success + total_failure) as f64
        } else {
            0.5
        };

        // 成功率に基づいて UCB1 c を調整
        // - 成功率が高い（>0.8）: 活用重視 → c を下げる
        // - 成功率が低い（<0.5）: 探索重視 → c を上げる
        let ucb1_c = if success_rate > 0.8 {
            1.0 // 活用重視
        } else if success_rate < 0.5 {
            2.0 // 探索重視
        } else {
            std::f64::consts::SQRT_2 // バランス
        };

        // N-gram データの有効性を評価
        let ngram_effectiveness = self.evaluate_ngram_effectiveness();
        let ngram_weight = if ngram_effectiveness > 0.7 {
            1.5 // N-gram が有効なら重みを上げる
        } else if ngram_effectiveness < 0.3 {
            0.5 // N-gram が効かないなら重みを下げる
        } else {
            1.0
        };

        OptimalParameters {
            ucb1_c,
            learning_weight: 0.3, // 現状は固定
            ngram_weight,
        }
    }

    /// N-gram の有効性を評価
    ///
    /// trigram の成功率分散が大きいほど、N-gram が選択に有効。
    fn evaluate_ngram_effectiveness(&self) -> f64 {
        let mut all_rates: Vec<f64> = Vec::new();

        for snapshot in self.snapshots {
            for &(success, failure) in snapshot.ngram_stats.trigrams.values() {
                let total = success + failure;
                if total >= 3 {
                    // 最低3回以上の観測
                    all_rates.push(success as f64 / total as f64);
                }
            }
        }

        if all_rates.is_empty() {
            return 0.5; // データ不足
        }

        // 分散を計算（大きいほど識別力がある）
        let mean = all_rates.iter().sum::<f64>() / all_rates.len() as f64;
        let variance =
            all_rates.iter().map(|r| (r - mean).powi(2)).sum::<f64>() / all_rates.len() as f64;

        // 分散を [0, 1] にスケール（0.25 が最大分散）
        (variance / 0.25).min(1.0)
    }

    /// 推奨パスを抽出
    ///
    /// 成功エピソードで頻出するアクションシーケンスを抽出。
    pub fn extract_paths(&self) -> Vec<RecommendedPath> {
        // trigram から成功率の高いパスを抽出
        let mut path_stats: HashMap<Vec<String>, (u32, u32)> = HashMap::new();

        for snapshot in self.snapshots {
            for (key, &(success, failure)) in &snapshot.ngram_stats.trigrams {
                let path = vec![key.0.clone(), key.1.clone(), key.2.clone()];
                let entry = path_stats.entry(path).or_insert((0, 0));
                entry.0 += success;
                entry.1 += failure;
            }
        }

        // 成功率でソートして上位を返す
        let mut paths: Vec<RecommendedPath> = path_stats
            .into_iter()
            .filter(|(_, (s, f))| s + f >= 5) // 最低5回以上の観測
            .map(|(actions, (success, failure))| {
                let total = success + failure;
                RecommendedPath {
                    actions,
                    success_rate: success as f64 / total as f64,
                    observations: total,
                }
            })
            .collect();

        paths.sort_by(|a, b| {
            b.success_rate
                .partial_cmp(&a.success_rate)
                .unwrap_or(std::cmp::Ordering::Equal)
        });

        paths.into_iter().take(10).collect() // 上位10パス
    }

    /// 戦略設定を分析
    ///
    /// 履歴データから最適な AdaptiveProvider 設定を算出。
    pub fn analyze_strategy(&self) -> StrategyConfig {
        if self.snapshots.is_empty() {
            return StrategyConfig::default();
        }

        // エラー率の平均を計算
        let (total_success, total_failure) = self.snapshots.iter().fold((0u32, 0u32), |acc, s| {
            (
                acc.0 + s.episode_transitions.success_episodes,
                acc.1 + s.episode_transitions.failure_episodes,
            )
        });

        let avg_error_rate = if total_success + total_failure > 0 {
            total_failure as f64 / (total_success + total_failure) as f64
        } else {
            0.3
        };

        // 総アクション数から成熟閾値を推定
        let total_actions: u64 = self
            .snapshots
            .iter()
            .map(|s| s.metadata.total_actions as u64)
            .sum();
        let avg_actions = total_actions as f64 / self.snapshots.len().max(1) as f64;

        // 平均アクション数の 10% を成熟閾値に
        let maturity_threshold = ((avg_actions * 0.1) as u32).clamp(5, 50);

        // 初期戦略の決定
        let initial_strategy = if avg_error_rate > 0.4 {
            "thompson" // エラー率高 → 探索重視
        } else if avg_error_rate < 0.1 {
            "greedy" // エラー率低 → 活用重視
        } else {
            "ucb1" // バランス
        };

        StrategyConfig {
            maturity_threshold,
            error_rate_threshold: (avg_error_rate * 1.5).min(0.5), // 平均の1.5倍を閾値に
            initial_strategy: initial_strategy.to_string(),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn create_test_snapshot(success: u32, failure: u32) -> LearningSnapshot {
        let mut snapshot = LearningSnapshot::empty();
        snapshot.episode_transitions.success_episodes = success;
        snapshot.episode_transitions.failure_episodes = failure;
        snapshot.metadata.total_actions = (success + failure) * 5;
        snapshot
    }

    #[test]
    fn test_analyzer_empty_snapshots() {
        let snapshots: Vec<LearningSnapshot> = vec![];
        let analyzer = OfflineAnalyzer::new(&snapshots);
        let model = analyzer.analyze();

        assert_eq!(model.analyzed_sessions, 0);
        assert!((model.parameters.ucb1_c - std::f64::consts::SQRT_2).abs() < 0.01);
    }

    #[test]
    fn test_analyzer_high_success_rate() {
        let snapshots = vec![
            create_test_snapshot(9, 1),
            create_test_snapshot(8, 2),
            create_test_snapshot(10, 0),
        ];
        let analyzer = OfflineAnalyzer::new(&snapshots);
        let params = analyzer.analyze_parameters();

        // 成功率が高い → ucb1_c は低め（活用重視）
        assert!(params.ucb1_c < std::f64::consts::SQRT_2);
    }

    #[test]
    fn test_analyzer_low_success_rate() {
        let snapshots = vec![
            create_test_snapshot(3, 7),
            create_test_snapshot(4, 6),
            create_test_snapshot(2, 8),
        ];
        let analyzer = OfflineAnalyzer::new(&snapshots);
        let params = analyzer.analyze_parameters();

        // 成功率が低い → ucb1_c は高め（探索重視）
        assert!(params.ucb1_c > std::f64::consts::SQRT_2);
    }

    #[test]
    fn test_strategy_config_high_error() {
        let snapshots = vec![create_test_snapshot(3, 7), create_test_snapshot(4, 6)];
        let analyzer = OfflineAnalyzer::new(&snapshots);
        let config = analyzer.analyze_strategy();

        assert_eq!(config.initial_strategy, "thompson");
    }

    #[test]
    fn test_strategy_config_low_error() {
        let snapshots = vec![create_test_snapshot(19, 1), create_test_snapshot(18, 2)];
        let analyzer = OfflineAnalyzer::new(&snapshots);
        let config = analyzer.analyze_strategy();

        assert_eq!(config.initial_strategy, "greedy");
    }
}