crucible/training/
evaluation.rs

1// Copyright 2024-2026 Reflective Labs
2
3use converge_pack::{AgentEffect, Context, ContextKey, Provenance, ProvenanceSource, Suggestor};
4use std::collections::HashMap;
5
6use crate::provenance::CRUCIBLE_PROVENANCE;
7
8use super::features::apply_feature_spec;
9use super::io::{get_numeric_series, load_dataframe, mean_abs_error, mean_abs_value};
10use super::types::{
11    DeploymentDecision, EvaluationReport, InferenceSample, ModelRegistryRecord, MonitoringReport,
12    diagnostic, has_deployment_decision_for_iteration, has_evaluation_for_iteration,
13    has_inference_for_iteration, has_monitoring_report_for_iteration,
14    has_registry_record_for_iteration, latest_evaluation_report, proposal,
15    read_feature_spec_from_ctx, read_latest_model_meta_from_ctx, read_latest_plan_from_ctx,
16    read_latest_split_from_ctx, read_model_from_ctx, read_model_path_from_ctx,
17};
18
19#[derive(Debug, Default)]
20pub struct ModelEvaluationAgent;
21
22impl ModelEvaluationAgent {
23    pub fn new() -> Self {
24        Self
25    }
26}
27
28#[async_trait::async_trait]
29impl Suggestor for ModelEvaluationAgent {
30    fn name(&self) -> &'static str {
31        "ModelEvaluationAgent (MAE)"
32    }
33
34    fn dependencies(&self) -> &[ContextKey] {
35        &[ContextKey::Signals, ContextKey::Strategies]
36    }
37
38    fn accepts(&self, ctx: &dyn Context) -> bool {
39        ctx.has(ContextKey::Signals)
40            && ctx.has(ContextKey::Strategies)
41            && match read_latest_split_from_ctx(ctx) {
42                Ok(split) => !has_evaluation_for_iteration(ctx, split.iteration),
43                Err(_) => false,
44            }
45    }
46
47    fn provenance(&self) -> Provenance {
48        Provenance::from(CRUCIBLE_PROVENANCE.as_str())
49    }
50
51    async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
52        let split = match read_latest_split_from_ctx(ctx) {
53            Ok(split) => split,
54            Err(err) => {
55                return AgentEffect::with_proposal(diagnostic(
56                    self.name(),
57                    ContextKey::Diagnostic,
58                    "model-eval-error",
59                    err.to_string(),
60                ));
61            }
62        };
63
64        let model = match read_model_from_ctx(ctx) {
65            Ok(model) => model,
66            Err(err) => {
67                return AgentEffect::with_proposal(diagnostic(
68                    self.name(),
69                    ContextKey::Diagnostic,
70                    "model-eval-error",
71                    err.to_string(),
72                ));
73            }
74        };
75
76        let raw_val_df = match load_dataframe(&split.val_path) {
77            Ok(df) => df,
78            Err(err) => {
79                return AgentEffect::with_proposal(diagnostic(
80                    self.name(),
81                    ContextKey::Diagnostic,
82                    "model-eval-error",
83                    err.to_string(),
84                ));
85            }
86        };
87
88        // Apply FeatureSpec transformation if available
89        let val_df = match read_feature_spec_from_ctx(ctx, split.iteration) {
90            Some(spec) => apply_feature_spec(&raw_val_df, &spec).unwrap_or(raw_val_df),
91            None => raw_val_df,
92        };
93
94        let target = match get_numeric_series(&val_df, &model.target_column) {
95            Ok(series) => series,
96            Err(err) => {
97                return AgentEffect::with_proposal(diagnostic(
98                    self.name(),
99                    ContextKey::Diagnostic,
100                    "model-eval-error",
101                    err.to_string(),
102                ));
103            }
104        };
105
106        let mae = match mean_abs_error(&target, model.mean) {
107            Ok(value) => value,
108            Err(err) => {
109                return AgentEffect::with_proposal(diagnostic(
110                    self.name(),
111                    ContextKey::Diagnostic,
112                    "model-eval-error",
113                    err.to_string(),
114                ));
115            }
116        };
117
118        let mean_abs = match mean_abs_value(&target) {
119            Ok(value) => value,
120            Err(err) => {
121                return AgentEffect::with_proposal(diagnostic(
122                    self.name(),
123                    ContextKey::Diagnostic,
124                    "model-eval-error",
125                    err.to_string(),
126                ));
127            }
128        };
129
130        let success_ratio = if mean_abs > 0.0 {
131            (1.0 - (mae / mean_abs)).clamp(0.0, 1.0)
132        } else {
133            0.0
134        };
135
136        let report = EvaluationReport {
137            model_path: read_model_path_from_ctx(ctx).unwrap_or_default(),
138            metric: "mae".to_string(),
139            value: mae,
140            mean_abs_target: mean_abs,
141            success_ratio,
142            val_rows: split.val_rows,
143            iteration: split.iteration,
144        };
145
146        AgentEffect::with_proposal(proposal(
147            self.name(),
148            ContextKey::Evaluations,
149            format!("model-eval-{}", split.iteration),
150            report,
151        ))
152    }
153}
154
155#[derive(Debug)]
156pub struct SampleInferenceAgent {
157    pub max_rows: usize,
158}
159
160impl SampleInferenceAgent {
161    pub fn new(max_rows: usize) -> Self {
162        Self { max_rows }
163    }
164}
165
166#[async_trait::async_trait]
167impl Suggestor for SampleInferenceAgent {
168    fn name(&self) -> &'static str {
169        "SampleInferenceAgent (Baseline)"
170    }
171
172    fn dependencies(&self) -> &[ContextKey] {
173        &[ContextKey::Signals, ContextKey::Strategies]
174    }
175
176    fn accepts(&self, ctx: &dyn Context) -> bool {
177        ctx.has(ContextKey::Signals)
178            && ctx.has(ContextKey::Strategies)
179            && match read_latest_split_from_ctx(ctx) {
180                Ok(split) => !has_inference_for_iteration(ctx, split.iteration),
181                Err(_) => false,
182            }
183    }
184
185    fn provenance(&self) -> Provenance {
186        Provenance::from(CRUCIBLE_PROVENANCE.as_str())
187    }
188
189    async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
190        let split = match read_latest_split_from_ctx(ctx) {
191            Ok(split) => split,
192            Err(err) => {
193                return AgentEffect::with_proposal(diagnostic(
194                    self.name(),
195                    ContextKey::Diagnostic,
196                    "model-infer-error",
197                    err.to_string(),
198                ));
199            }
200        };
201
202        let model = match read_model_from_ctx(ctx) {
203            Ok(model) => model,
204            Err(err) => {
205                return AgentEffect::with_proposal(diagnostic(
206                    self.name(),
207                    ContextKey::Diagnostic,
208                    "model-infer-error",
209                    err.to_string(),
210                ));
211            }
212        };
213
214        let infer_df = match load_dataframe(&split.infer_path) {
215            Ok(df) => df,
216            Err(err) => {
217                return AgentEffect::with_proposal(diagnostic(
218                    self.name(),
219                    ContextKey::Diagnostic,
220                    "model-infer-error",
221                    err.to_string(),
222                ));
223            }
224        };
225
226        let target = match get_numeric_series(&infer_df, &model.target_column) {
227            Ok(series) => series,
228            Err(err) => {
229                return AgentEffect::with_proposal(diagnostic(
230                    self.name(),
231                    ContextKey::Diagnostic,
232                    "model-infer-error",
233                    err.to_string(),
234                ));
235            }
236        };
237
238        let sample_rows = self.max_rows.min(infer_df.height().max(1));
239        let actuals = match target.f64() {
240            Ok(series) => series
241                .into_no_null_iter()
242                .take(sample_rows)
243                .collect::<Vec<_>>(),
244            Err(err) => {
245                return AgentEffect::with_proposal(diagnostic(
246                    self.name(),
247                    ContextKey::Diagnostic,
248                    "model-infer-error",
249                    err.to_string(),
250                ));
251            }
252        };
253
254        let predictions = vec![model.mean; actuals.len()];
255        let sample = InferenceSample {
256            model_path: read_model_path_from_ctx(ctx).unwrap_or_default(),
257            target_column: model.target_column,
258            rows: actuals.len(),
259            predictions,
260            actuals,
261            iteration: split.iteration,
262        };
263
264        AgentEffect::with_proposal(proposal(
265            self.name(),
266            ContextKey::Hypotheses,
267            format!("inference-sample-{}", split.iteration),
268            sample,
269        ))
270    }
271}
272
273#[derive(Debug, Default)]
274pub struct ModelRegistryAgent;
275
276impl ModelRegistryAgent {
277    pub fn new() -> Self {
278        Self
279    }
280}
281
282#[async_trait::async_trait]
283impl Suggestor for ModelRegistryAgent {
284    fn name(&self) -> &'static str {
285        "ModelRegistryAgent"
286    }
287
288    fn dependencies(&self) -> &[ContextKey] {
289        &[ContextKey::Strategies, ContextKey::Evaluations]
290    }
291
292    fn accepts(&self, ctx: &dyn Context) -> bool {
293        ctx.has(ContextKey::Strategies)
294            && ctx.has(ContextKey::Evaluations)
295            && match read_latest_model_meta_from_ctx(ctx) {
296                Ok(meta) => !has_registry_record_for_iteration(ctx, meta.iteration),
297                Err(_) => false,
298            }
299    }
300
301    fn provenance(&self) -> Provenance {
302        Provenance::from(CRUCIBLE_PROVENANCE.as_str())
303    }
304
305    async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
306        let meta = match read_latest_model_meta_from_ctx(ctx) {
307            Ok(meta) => meta,
308            Err(err) => {
309                return AgentEffect::with_proposal(diagnostic(
310                    self.name(),
311                    ContextKey::Diagnostic,
312                    "model-registry-error",
313                    err.to_string(),
314                ));
315            }
316        };
317
318        let report = latest_evaluation_report(ctx, meta.iteration);
319        let mut metrics = HashMap::new();
320        if let Some(report) = report {
321            metrics.insert(report.metric, report.value);
322            metrics.insert("success_ratio".to_string(), report.success_ratio);
323        }
324
325        let record = ModelRegistryRecord {
326            kind: "model_registry".to_string(),
327            iteration: meta.iteration,
328            model_path: meta.model_path,
329            metrics,
330            provenance: "training_flow".to_string(),
331        };
332
333        AgentEffect::with_proposal(proposal(
334            self.name(),
335            ContextKey::Strategies,
336            format!("model-registry-{}", record.iteration),
337            record,
338        ))
339    }
340}
341
342#[derive(Debug, Default)]
343pub struct MonitoringAgent;
344
345impl MonitoringAgent {
346    pub fn new() -> Self {
347        Self
348    }
349}
350
351#[async_trait::async_trait]
352impl Suggestor for MonitoringAgent {
353    fn name(&self) -> &'static str {
354        "MonitoringAgent"
355    }
356
357    fn dependencies(&self) -> &[ContextKey] {
358        &[ContextKey::Evaluations]
359    }
360
361    fn accepts(&self, ctx: &dyn Context) -> bool {
362        ctx.has(ContextKey::Evaluations)
363            && match latest_evaluation_report(ctx, 0) {
364                Some(report) => !has_monitoring_report_for_iteration(ctx, report.iteration),
365                None => false,
366            }
367    }
368
369    fn provenance(&self) -> Provenance {
370        Provenance::from(CRUCIBLE_PROVENANCE.as_str())
371    }
372
373    async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
374        let report = match latest_evaluation_report(ctx, 0) {
375            Some(report) => report,
376            None => return AgentEffect::empty(),
377        };
378
379        let status = if report.success_ratio >= 0.75 {
380            "healthy"
381        } else {
382            "needs_attention"
383        };
384
385        let monitoring = MonitoringReport {
386            kind: "monitoring".to_string(),
387            iteration: report.iteration,
388            metric: report.metric,
389            value: report.value,
390            baseline: report.mean_abs_target,
391            status: status.to_string(),
392        };
393
394        AgentEffect::with_proposal(proposal(
395            self.name(),
396            ContextKey::Evaluations,
397            format!("monitoring-{}", report.iteration),
398            monitoring,
399        ))
400    }
401}
402
403#[derive(Debug, Default)]
404pub struct DeploymentAgent;
405
406impl DeploymentAgent {
407    pub fn new() -> Self {
408        Self
409    }
410}
411
412#[async_trait::async_trait]
413impl Suggestor for DeploymentAgent {
414    fn name(&self) -> &'static str {
415        "DeploymentAgent"
416    }
417
418    fn dependencies(&self) -> &[ContextKey] {
419        &[ContextKey::Evaluations, ContextKey::Strategies]
420    }
421
422    fn accepts(&self, ctx: &dyn Context) -> bool {
423        ctx.has(ContextKey::Evaluations)
424            && ctx.has(ContextKey::Strategies)
425            && match latest_evaluation_report(ctx, 0) {
426                Some(report) => !has_deployment_decision_for_iteration(ctx, report.iteration),
427                None => false,
428            }
429    }
430
431    fn provenance(&self) -> Provenance {
432        Provenance::from(CRUCIBLE_PROVENANCE.as_str())
433    }
434
435    async fn execute(&self, ctx: &dyn Context) -> AgentEffect {
436        let report = match latest_evaluation_report(ctx, 0) {
437            Some(report) => report,
438            None => return AgentEffect::empty(),
439        };
440
441        let quality_threshold =
442            read_latest_plan_from_ctx(ctx).map_or(0.75, |plan| plan.quality_threshold);
443
444        let (action, retrain, reason) = if report.success_ratio >= quality_threshold {
445            ("deploy", false, "meets quality threshold")
446        } else {
447            ("hold", true, "below quality threshold")
448        };
449
450        let decision = DeploymentDecision {
451            kind: "deployment_decision".to_string(),
452            iteration: report.iteration,
453            action: action.to_string(),
454            reason: reason.to_string(),
455            retrain,
456        };
457
458        AgentEffect::with_proposal(proposal(
459            self.name(),
460            ContextKey::Strategies,
461            format!("deployment-{}", report.iteration),
462            decision,
463        ))
464    }
465}
crucible/training/evaluation.rs

crucible/training/
evaluation.rs