swarm-engine-core 0.1.6

Core types and orchestration for SwarmEngine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
//! LearnedComponent - 学習結果の型安全な抽象化
//!
//! ## 設計思想
//!
//! ComponentLearner(学習プロセス)と LearnedComponent(学習結果)をペアで定義。
//! 各学習対象ごとに専用の型を持ち、Map/Any を使わず型安全性を確保する。
//!
//! ## 背景
//!
//! - 従来の ML: 口調、好み程度 → Map でも許容
//! - Swarm Learning: Control の知識そのものが Domain → Typed 必須
//!
//! ## 使用例
//!
//! ```ignore
//! // ComponentLearner と LearnedComponent はペアで定義
//! struct DepGraphLearner;
//!
//! impl ComponentLearner for DepGraphLearner {
//!     type Output = LearnedDepGraph;
//!
//!     fn learn(&self, episodes: &[Episode]) -> Result<Self::Output, LearnError> {
//!         // Episodes から LearnedDepGraph を生成
//!     }
//! }
//!
//! impl LearnedComponent for LearnedDepGraph {
//!     fn component_id() -> &'static str { "dep_graph" }
//!     // ...
//! }
//! ```
//!
//! ## LearnModel vs ComponentLearner
//!
//! - `LearnModel`: Episode → TrainingData (LoRA fine-tuning 用)
//! - `ComponentLearner`: Episodes → LearnedComponent (ScenarioProfile 用)

use serde::{de::DeserializeOwned, Serialize};

use super::episode::Episode;
use super::learn_model::LearnError;

// ============================================================================
// ComponentLearner Trait
// ============================================================================

/// ScenarioProfile コンポーネントの学習プロセス
///
/// Episode の集合から LearnedComponent を生成する。
/// LearnModel(LoRA用)とは異なり、ScenarioProfile の各コンポーネントを
/// 型安全に学習する。
pub trait ComponentLearner: Send + Sync {
    /// 学習結果の型
    type Output: LearnedComponent;

    /// 学習器の名前
    fn name(&self) -> &str;

    /// 目的の説明
    fn objective(&self) -> &str;

    /// Episode から学習結果を生成
    fn learn(&self, episodes: &[Episode]) -> Result<Self::Output, LearnError>;

    /// 既存のコンポーネントを更新(増分学習)
    fn update(
        &self,
        existing: &Self::Output,
        new_episodes: &[Episode],
    ) -> Result<Self::Output, LearnError> {
        let mut learned = self.learn(new_episodes)?;
        learned.merge(existing);
        Ok(learned)
    }
}

// ============================================================================
// LearnedComponent Trait
// ============================================================================

/// 学習結果コンポーネントの共通 trait
///
/// 各学習対象(DepGraph, Strategy, Exploration 等)の学習結果が実装する。
/// Typed で管理することで、型安全性と IDE サポートを確保。
pub trait LearnedComponent: Send + Sync + Serialize + DeserializeOwned + Clone {
    /// コンポーネント識別子(ファイル名等に使用)
    fn component_id() -> &'static str
    where
        Self: Sized;

    /// 信頼度スコア (0.0 - 1.0)
    ///
    /// 学習データ量や品質に基づく信頼度。
    /// 低い場合は Bootstrap 追加実行を検討。
    fn confidence(&self) -> f64;

    /// 学習に使用したセッション数
    fn session_count(&self) -> usize;

    /// 最終更新タイムスタンプ (Unix epoch seconds)
    fn updated_at(&self) -> u64;

    /// 他のコンポーネントとマージ(増分学習用)
    ///
    /// デフォルト実装: 信頼度の高い方を優先
    fn merge(&mut self, other: &Self)
    where
        Self: Sized,
    {
        if other.confidence() > self.confidence() {
            *self = other.clone();
        }
    }

    /// バージョン番号(互換性チェック用)
    fn version() -> u32
    where
        Self: Sized,
    {
        1
    }
}

// ============================================================================
// LearnedDepGraph - 学習済み依存グラフ
// ============================================================================

use crate::exploration::DependencyGraph;

// Re-use existing RecommendedPath from offline module
pub use super::offline::RecommendedPath;

/// 学習済み依存グラフ
///
/// Bootstrap フェーズで正解グラフから学習し、
/// Release フェーズで LLM なしで即座にアクション順序を決定。
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct LearnedDepGraph {
    /// 依存グラフ本体
    pub graph: DependencyGraph,

    /// 学習済みアクション順序(トポロジカルソート済み)
    /// Deprecated: discover_order + not_discover_order を使用
    pub action_order: Vec<String>,

    /// Discover アクションの順序(NodeExpand 系)
    #[serde(default)]
    pub discover_order: Vec<String>,

    /// Not-Discover アクションの順序(NodeStateChange 系)
    #[serde(default)]
    pub not_discover_order: Vec<String>,

    /// 推奨パス(成功率順)
    #[serde(default)]
    pub recommended_paths: Vec<RecommendedPath>,

    /// 信頼度 (0.0 - 1.0)
    pub confidence: f64,

    /// 学習に使用したセッション ID
    pub learned_from: Vec<String>,

    /// 最終更新タイムスタンプ
    pub updated_at: u64,
}

impl LearnedDepGraph {
    /// 新規作成
    pub fn new(graph: DependencyGraph, action_order: Vec<String>) -> Self {
        Self {
            graph,
            action_order,
            discover_order: Vec::new(),
            not_discover_order: Vec::new(),
            recommended_paths: Vec::new(),
            confidence: 0.0,
            learned_from: Vec::new(),
            updated_at: std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .map(|d| d.as_secs())
                .unwrap_or(0),
        }
    }

    /// discover/not_discover を個別に設定して作成
    pub fn with_orders(
        graph: DependencyGraph,
        discover_order: Vec<String>,
        not_discover_order: Vec<String>,
    ) -> Self {
        let mut all_actions = discover_order.clone();
        all_actions.extend(not_discover_order.clone());
        Self {
            graph,
            action_order: all_actions,
            discover_order,
            not_discover_order,
            recommended_paths: Vec::new(),
            confidence: 0.0,
            learned_from: Vec::new(),
            updated_at: std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .map(|d| d.as_secs())
                .unwrap_or(0),
        }
    }

    /// 信頼度を設定
    pub fn with_confidence(mut self, confidence: f64) -> Self {
        self.confidence = confidence;
        self
    }

    /// 学習元セッションを追加
    pub fn with_sessions(mut self, session_ids: Vec<String>) -> Self {
        self.learned_from = session_ids;
        self
    }

    /// 推奨パスを追加
    pub fn with_recommended_paths(mut self, paths: Vec<RecommendedPath>) -> Self {
        self.recommended_paths = paths;
        self
    }
}

impl LearnedComponent for LearnedDepGraph {
    fn component_id() -> &'static str {
        "dep_graph"
    }

    fn confidence(&self) -> f64 {
        self.confidence
    }

    fn session_count(&self) -> usize {
        self.learned_from.len()
    }

    fn updated_at(&self) -> u64 {
        self.updated_at
    }

    fn merge(&mut self, other: &Self) {
        // セッション数と信頼度を考慮してマージ
        if other.learned_from.len() > self.learned_from.len() || other.confidence > self.confidence
        {
            self.graph = other.graph.clone();
            self.action_order = other.action_order.clone();
            self.confidence = other.confidence;
        }
        // セッション ID は結合
        for id in &other.learned_from {
            if !self.learned_from.contains(id) {
                self.learned_from.push(id.clone());
            }
        }
        // 推奨パスはマージ
        for path in &other.recommended_paths {
            if !self
                .recommended_paths
                .iter()
                .any(|p| p.actions == path.actions)
            {
                self.recommended_paths.push(path.clone());
            }
        }
        self.updated_at = other.updated_at.max(self.updated_at);
    }
}

// ============================================================================
// LearnedExploration - 学習済み探索パラメータ
// ============================================================================

/// 学習済み探索パラメータ
///
/// UCB1 の探索係数、学習重みなど、探索戦略のパラメータ。
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct LearnedExploration {
    /// UCB1 探索係数
    pub ucb1_c: f64,

    /// 学習重み (0.0 - 1.0)
    pub learning_weight: f64,

    /// N-gram 重み
    pub ngram_weight: f64,

    /// 信頼度
    pub confidence: f64,

    /// 学習セッション数
    pub session_count: usize,

    /// 最終更新
    pub updated_at: u64,
}

impl Default for LearnedExploration {
    fn default() -> Self {
        Self {
            ucb1_c: 1.414,
            learning_weight: 0.3,
            ngram_weight: 1.0,
            confidence: 0.0,
            session_count: 0,
            updated_at: 0,
        }
    }
}

impl LearnedExploration {
    /// 新規作成
    pub fn new(ucb1_c: f64, learning_weight: f64, ngram_weight: f64) -> Self {
        Self {
            ucb1_c,
            learning_weight,
            ngram_weight,
            confidence: 0.0,
            session_count: 0,
            updated_at: std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .map(|d| d.as_secs())
                .unwrap_or(0),
        }
    }
}

impl LearnedComponent for LearnedExploration {
    fn component_id() -> &'static str {
        "exploration"
    }

    fn confidence(&self) -> f64 {
        self.confidence
    }

    fn session_count(&self) -> usize {
        self.session_count
    }

    fn updated_at(&self) -> u64 {
        self.updated_at
    }
}

// ============================================================================
// LearnedStrategy - 学習済み戦略設定
// ============================================================================

/// 学習済み戦略設定
///
/// 初期戦略の選択、戦略切り替えの閾値など。
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct LearnedStrategy {
    /// 初期選択戦略
    pub initial_strategy: String,

    /// 成熟度閾値(何回実行後に戦略切り替えを検討するか)
    pub maturity_threshold: usize,

    /// エラー率閾値(これを超えたら戦略切り替え)
    pub error_rate_threshold: f64,

    /// 信頼度
    pub confidence: f64,

    /// 学習セッション数
    pub session_count: usize,

    /// 最終更新
    pub updated_at: u64,
}

impl Default for LearnedStrategy {
    fn default() -> Self {
        Self {
            initial_strategy: "ucb1".to_string(),
            maturity_threshold: 5,
            error_rate_threshold: 0.45,
            confidence: 0.0,
            session_count: 0,
            updated_at: 0,
        }
    }
}

impl LearnedComponent for LearnedStrategy {
    fn component_id() -> &'static str {
        "strategy"
    }

    fn confidence(&self) -> f64 {
        self.confidence
    }

    fn session_count(&self) -> usize {
        self.session_count
    }

    fn updated_at(&self) -> u64 {
        self.updated_at
    }
}

// ============================================================================
// Tests
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;
    use crate::exploration::DependencyGraph;

    #[test]
    fn test_learned_dep_graph_creation() {
        let graph = DependencyGraph::new();
        let learned = LearnedDepGraph::new(graph, vec!["A".to_string(), "B".to_string()])
            .with_confidence(0.8)
            .with_sessions(vec!["s1".to_string(), "s2".to_string()]);

        assert_eq!(learned.confidence(), 0.8);
        assert_eq!(learned.session_count(), 2);
        assert_eq!(LearnedDepGraph::component_id(), "dep_graph");
    }

    #[test]
    fn test_learned_dep_graph_merge() {
        let graph = DependencyGraph::new();
        let mut learned1 = LearnedDepGraph::new(graph.clone(), vec!["A".to_string()])
            .with_confidence(0.5)
            .with_sessions(vec!["s1".to_string()]);

        let learned2 = LearnedDepGraph::new(graph, vec!["A".to_string(), "B".to_string()])
            .with_confidence(0.8)
            .with_sessions(vec!["s2".to_string(), "s3".to_string()]);

        learned1.merge(&learned2);

        // Higher confidence wins for graph/order
        assert_eq!(learned1.confidence, 0.8);
        assert_eq!(learned1.action_order.len(), 2);
        // Sessions are combined
        assert_eq!(learned1.learned_from.len(), 3);
    }

    #[test]
    fn test_learned_exploration_default() {
        let exploration = LearnedExploration::default();
        assert_eq!(exploration.ucb1_c, 1.414);
        assert_eq!(LearnedExploration::component_id(), "exploration");
    }

    #[test]
    fn test_learned_strategy_default() {
        let strategy = LearnedStrategy::default();
        assert_eq!(strategy.initial_strategy, "ucb1");
        assert_eq!(LearnedStrategy::component_id(), "strategy");
    }

    #[test]
    fn test_serialization() {
        let exploration = LearnedExploration::new(2.0, 0.5, 1.5);
        let json = serde_json::to_string(&exploration).unwrap();
        let restored: LearnedExploration = serde_json::from_str(&json).unwrap();
        assert_eq!(restored.ucb1_c, 2.0);
    }
}