Skip to main content

organism_runtime/
tournament.rs

1//! Formation tournament — run competing formations and learn from the comparison.
2//!
3//! Runs N `Formation`s on the same intent seed, scores each result using only
4//! `ConvergeResult` fields (convergence, cycle efficiency, criteria coverage),
5//! picks a winner, and produces `PriorCalibration` updates ready to feed into
6//! the next `PlanningPriorAgent` seed.
7//!
8//! No new Converge types. No wrapper layers. Uses only public `converge-kernel`
9//! fields and `organism-learning`'s adapter.
10
11use converge_kernel::CriterionResult;
12use uuid::Uuid;
13
14use crate::formation::{Formation, FormationError, FormationResult};
15use organism_learning::PriorCalibration;
16use organism_learning::adapter::calibrate_priors;
17use organism_learning::{ErrorDimension, LearningEpisode, PredictionError};
18
19// ── Score ─────────────────────────────────────────────────────────────────────
20
21/// Score derived entirely from `ConvergeResult` fields.
22#[derive(Debug, Clone)]
23pub struct FormationScore {
24    pub label: String,
25    /// Composite score in [0, 1]. Higher is better.
26    pub score: f64,
27    pub converged: bool,
28    pub cycles: u32,
29    /// Number of `CriterionResult::Met` outcomes (application-supplied criteria).
30    pub criteria_met: usize,
31    /// Total criteria evaluated.
32    pub criteria_total: usize,
33}
34
35impl FormationScore {
36    fn from_result(result: &FormationResult) -> Self {
37        let cr = &result.converge_result;
38
39        let criteria_total = cr.criteria_outcomes.len();
40        let criteria_met = cr
41            .criteria_outcomes
42            .iter()
43            .filter(|o| matches!(o.result, CriterionResult::Met { .. }))
44            .count();
45
46        // Convergence is the dominant signal.
47        // Efficiency bonus: fewer cycles = higher score (cap at 50 cycles).
48        // Criteria coverage: proportion of supplied criteria met.
49        let convergence_score = if cr.converged { 1.0 } else { 0.0 };
50        let efficiency_score = 1.0 - (f64::from(cr.cycles) / 50.0_f64).min(1.0);
51        let criteria_score = if criteria_total == 0 {
52            0.5 // neutral when no criteria registered
53        } else {
54            f64::from(u32::try_from(criteria_met).unwrap_or(u32::MAX))
55                / f64::from(u32::try_from(criteria_total).unwrap_or(u32::MAX))
56        };
57
58        // Weights: convergence 60%, efficiency 20%, criteria 20%.
59        let score = convergence_score * 0.6 + efficiency_score * 0.2 + criteria_score * 0.2;
60
61        Self {
62            label: result.label.clone(),
63            score,
64            converged: cr.converged,
65            cycles: cr.cycles,
66            criteria_met,
67            criteria_total,
68        }
69    }
70}
71
72// ── Tournament ────────────────────────────────────────────────────────────────
73
74pub struct FormationTournament {
75    formations: Vec<Formation>,
76    intent_id: Uuid,
77    plan_id: Uuid,
78}
79
80#[derive(Debug, Clone)]
81pub struct TournamentResult {
82    pub winner: FormationScore,
83    pub all_scores: Vec<FormationScore>,
84    /// Calibrated priors ready to seed the next `PlanningPriorAgent` run.
85    pub priors: Vec<PriorCalibration>,
86}
87
88#[derive(Debug, thiserror::Error)]
89pub enum TournamentError {
90    #[error("no formations provided")]
91    NoFormations,
92    #[error("all formations failed: {0}")]
93    AllFailed(String),
94    #[error("formation error: {0}")]
95    Formation(#[from] FormationError),
96}
97
98impl FormationTournament {
99    pub fn new(intent_id: Uuid, plan_id: Uuid, formations: Vec<Formation>) -> Self {
100        Self {
101            formations,
102            intent_id,
103            plan_id,
104        }
105    }
106
107    /// Run all formations, score them, pick the winner, and calibrate priors.
108    pub async fn run(self) -> Result<TournamentResult, TournamentError> {
109        if self.formations.is_empty() {
110            return Err(TournamentError::NoFormations);
111        }
112
113        let mut results: Vec<FormationResult> = Vec::new();
114        let mut errors: Vec<String> = Vec::new();
115
116        for formation in self.formations {
117            match formation.run().await {
118                Ok(r) => results.push(r),
119                Err(e) => errors.push(e.to_string()),
120            }
121        }
122
123        if results.is_empty() {
124            return Err(TournamentError::AllFailed(errors.join("; ")));
125        }
126
127        // Score all results.
128        let mut scores: Vec<FormationScore> =
129            results.iter().map(FormationScore::from_result).collect();
130
131        // Sort descending by score — first is winner.
132        scores.sort_by(|a, b| b.score.total_cmp(&a.score));
133
134        let winner = scores[0].clone();
135
136        // Build a learning episode from the winner's score and calibrate priors.
137        // The convergence and efficiency metrics become prediction-error dimensions
138        // so `PlanningPriorAgent` can bias future runs toward configurations that
139        // converged quickly and met their criteria.
140        let priors = calibrate_priors(
141            &episode_from_scores(&scores, self.intent_id, self.plan_id),
142            &[],
143        );
144
145        Ok(TournamentResult {
146            winner,
147            all_scores: scores,
148            priors,
149        })
150    }
151}
152
153/// Construct a minimal `LearningEpisode` from tournament scores so we can
154/// feed it into `calibrate_priors` without duplicating the Bayesian logic.
155fn episode_from_scores(
156    scores: &[FormationScore],
157    intent_id: Uuid,
158    plan_id: Uuid,
159) -> LearningEpisode {
160    let winner = &scores[0];
161
162    // convergence_rate: fraction of formations that converged.
163    let converged_count = scores.iter().filter(|s| s.converged).count();
164    let convergence_rate = f64::from(u32::try_from(converged_count).unwrap_or(u32::MAX))
165        / f64::from(u32::try_from(scores.len()).unwrap_or(u32::MAX));
166
167    // criteria_coverage: winner's criteria coverage.
168    let criteria_coverage = if winner.criteria_total == 0 {
169        1.0
170    } else {
171        f64::from(u32::try_from(winner.criteria_met).unwrap_or(u32::MAX))
172            / f64::from(u32::try_from(winner.criteria_total).unwrap_or(u32::MAX))
173    };
174
175    // cycle_efficiency: normalised inverse cycle count of winner.
176    let cycle_efficiency = 1.0 - (f64::from(winner.cycles) / 50.0_f64).min(1.0);
177
178    LearningEpisode {
179        id: Uuid::new_v4(),
180        intent_id,
181        plan_id,
182        predicted_outcome: format!("winner: {}", winner.label),
183        actual_outcome: Some(format!(
184            "score={:.3} converged={} cycles={}",
185            winner.score, winner.converged, winner.cycles
186        )),
187        run_status: Some(if winner.converged {
188            "converged".into()
189        } else {
190            "did-not-converge".into()
191        }),
192        prediction_error: Some(PredictionError {
193            magnitude: 1.0 - winner.score,
194            dimensions: vec![
195                ErrorDimension {
196                    name: "convergence_rate".into(),
197                    predicted: 1.0,
198                    actual: convergence_rate,
199                },
200                ErrorDimension {
201                    name: "criteria_coverage".into(),
202                    predicted: 1.0,
203                    actual: criteria_coverage,
204                },
205                ErrorDimension {
206                    name: "cycle_efficiency".into(),
207                    predicted: 1.0,
208                    actual: cycle_efficiency,
209                },
210            ],
211        }),
212        adversarial_signals: vec![],
213        lessons: vec![],
214    }
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220    use crate::provenance::ORGANISM_RUNTIME_PROVENANCE;
221    use converge_kernel::{AgentEffect, Context, ContextKey};
222    use converge_pack::{Provenance, ProvenanceSource, Suggestor, TextPayload};
223
224    struct ConvergingAgent;
225
226    #[async_trait::async_trait]
227    impl Suggestor for ConvergingAgent {
228        fn name(&self) -> &'static str {
229            "converging"
230        }
231
232        fn dependencies(&self) -> &[ContextKey] {
233            &[ContextKey::Seeds]
234        }
235
236        fn provenance(&self) -> Provenance {
237            ORGANISM_RUNTIME_PROVENANCE.provenance()
238        }
239
240        fn accepts(&self, ctx: &dyn Context) -> bool {
241            ctx.has(ContextKey::Seeds) && !ctx.has(ContextKey::Hypotheses)
242        }
243
244        async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
245            let seeds = ctx.get(ContextKey::Seeds);
246            AgentEffect::builder()
247                .proposal(
248                    crate::provenance::ORGANISM_RUNTIME_PROVENANCE.proposed_fact(
249                        ContextKey::Hypotheses,
250                        format!("hyp-{}", seeds[0].id()),
251                        TextPayload::new("converged hypothesis"),
252                    ),
253                )
254                .build()
255        }
256    }
257
258    fn make_formation(label: &str) -> Formation {
259        Formation::new(label).agent(ConvergingAgent).seed(
260            ContextKey::Seeds,
261            "s1",
262            "test content",
263            "test",
264        )
265    }
266
267    fn id() -> Uuid {
268        Uuid::parse_str("00000000-0000-0000-0000-000000000001").unwrap()
269    }
270
271    // ── Scoring ───────────────────────────────────────────────────────────────
272
273    #[test]
274    fn score_converged_result_above_zero_point_six() {
275        // A converged result with no criteria gets convergence(0.6) + efficiency(~0.2) + neutral(0.1)
276        // roughly ≥ 0.7
277        let result = tokio::runtime::Runtime::new()
278            .unwrap()
279            .block_on(make_formation("f1").run())
280            .unwrap();
281        let score = FormationScore::from_result(&result);
282        assert!(score.converged);
283        assert!(score.score > 0.6, "score was {}", score.score);
284    }
285
286    // ── Tournament ────────────────────────────────────────────────────────────
287
288    #[tokio::test]
289    async fn tournament_picks_winner_from_two_formations() {
290        let t = FormationTournament::new(
291            id(),
292            id(),
293            vec![make_formation("team-a"), make_formation("team-b")],
294        );
295
296        let result = t.run().await.unwrap();
297        assert!(!result.winner.label.is_empty());
298        assert_eq!(result.all_scores.len(), 2);
299        // Winner must have the highest score
300        for score in &result.all_scores {
301            assert!(result.winner.score >= score.score);
302        }
303    }
304
305    #[tokio::test]
306    async fn tournament_produces_priors() {
307        let t = FormationTournament::new(id(), id(), vec![make_formation("solo")]);
308        let result = t.run().await.unwrap();
309        // Should produce calibrations for convergence_rate, criteria_coverage, cycle_efficiency
310        assert!(!result.priors.is_empty());
311        assert!(
312            result
313                .priors
314                .iter()
315                .any(|p| p.assumption_type == "convergence_rate")
316        );
317        assert!(
318            result
319                .priors
320                .iter()
321                .any(|p| p.assumption_type == "criteria_coverage")
322        );
323        assert!(
324            result
325                .priors
326                .iter()
327                .any(|p| p.assumption_type == "cycle_efficiency")
328        );
329    }
330
331    #[tokio::test]
332    async fn tournament_error_on_no_formations() {
333        let t = FormationTournament::new(id(), id(), vec![]);
334        assert!(matches!(t.run().await, Err(TournamentError::NoFormations)));
335    }
336
337    #[tokio::test]
338    async fn tournament_scores_sorted_descending() {
339        let t = FormationTournament::new(
340            id(),
341            id(),
342            vec![
343                make_formation("a"),
344                make_formation("b"),
345                make_formation("c"),
346            ],
347        );
348        let result = t.run().await.unwrap();
349        let scores: Vec<f64> = result.all_scores.iter().map(|s| s.score).collect();
350        for window in scores.windows(2) {
351            assert!(window[0] >= window[1]);
352        }
353    }
354
355    #[tokio::test]
356    async fn tournament_winner_is_first_in_sorted_list() {
357        let t =
358            FormationTournament::new(id(), id(), vec![make_formation("a"), make_formation("b")]);
359        let result = t.run().await.unwrap();
360        assert_eq!(result.winner.label, result.all_scores[0].label);
361    }
362
363    // ── Prior calibration tightening ─────────────────────────────────────────
364
365    #[tokio::test]
366    async fn repeated_tournaments_tighten_priors() {
367        // Run two rounds, pass first-round priors back into second round
368        // via episode_from_scores. Evidence count must increment each round.
369        let run_once = |existing: Vec<PriorCalibration>| async move {
370            let t = FormationTournament::new(id(), id(), vec![make_formation("f")]);
371            let result = t.run().await.unwrap();
372            let episode = episode_from_scores(&result.all_scores, id(), id());
373            calibrate_priors(&episode, &existing)
374        };
375
376        let round1 = run_once(vec![]).await;
377        let round2 = run_once(round1.clone()).await;
378
379        assert_eq!(round1[0].evidence_count, 1);
380        assert_eq!(round2[0].evidence_count, 2);
381    }
382
383    // ── Priors are valid PlanningPriorAgent seeds ─────────────────────────────
384
385    #[tokio::test]
386    async fn priors_are_serializable_as_planning_prior_seeds() {
387        let t = FormationTournament::new(id(), id(), vec![make_formation("f")]);
388        let result = t.run().await.unwrap();
389
390        for prior in &result.priors {
391            let seed_content = serde_json::json!({
392                "type": "prior_calibration",
393                "calibration": prior,
394            });
395            // Must round-trip so PlanningPriorAgent can deserialize it
396            let json = seed_content.to_string();
397            let back: serde_json::Value = serde_json::from_str(&json).unwrap();
398            assert_eq!(
399                back["calibration"]["assumption_type"].as_str().unwrap(),
400                prior.assumption_type
401            );
402        }
403    }
404}