1use aios_protocol::event::EventKind;
7use serde::{Deserialize, Serialize};
8use serde_json::json;
9
10use crate::score::{EvalScore, ScoreLabel};
11use crate::taxonomy::EvalLayer;
12
13pub const EVAL_EVENT_PREFIX: &str = "eval.";
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
18#[serde(tag = "type", rename_all = "PascalCase")]
19pub enum NousEvent {
20 InlineCompleted {
22 evaluator: String,
23 score: f64,
24 label: ScoreLabel,
25 layer: EvalLayer,
26 session_id: String,
27 run_id: Option<String>,
28 explanation: Option<String>,
29 },
30 AsyncCompleted {
32 evaluator: String,
33 scores: Vec<ScoreSummary>,
34 session_id: String,
35 run_id: Option<String>,
36 duration_ms: u64,
37 },
38 QualityChanged {
40 session_id: String,
41 aggregate_quality: f64,
42 trend: f64,
43 inline_count: u32,
44 async_count: u32,
45 },
46 EgriOutcome {
48 session_id: String,
49 trial_id: Option<String>,
50 outcome: serde_json::Value,
51 },
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct ScoreSummary {
57 pub evaluator: String,
58 pub value: f64,
59 pub label: ScoreLabel,
60 pub layer: EvalLayer,
61}
62
63impl From<&EvalScore> for ScoreSummary {
64 fn from(score: &EvalScore) -> Self {
65 Self {
66 evaluator: score.evaluator.clone(),
67 value: score.value,
68 label: score.label,
69 layer: score.layer,
70 }
71 }
72}
73
74impl NousEvent {
75 pub fn from_inline_score(score: &EvalScore) -> Self {
77 Self::InlineCompleted {
78 evaluator: score.evaluator.clone(),
79 score: score.value,
80 label: score.label,
81 layer: score.layer,
82 session_id: score.session_id.clone(),
83 run_id: score.run_id.clone(),
84 explanation: score.explanation.clone(),
85 }
86 }
87
88 pub fn into_event_kind(self) -> EventKind {
90 let (event_type, data) = match &self {
91 Self::InlineCompleted {
92 evaluator,
93 score,
94 label,
95 layer,
96 session_id,
97 run_id,
98 explanation,
99 } => (
100 "eval.InlineCompleted",
101 json!({
102 "evaluator": evaluator,
103 "score": score,
104 "label": label,
105 "layer": layer,
106 "session_id": session_id,
107 "run_id": run_id,
108 "explanation": explanation,
109 }),
110 ),
111 Self::AsyncCompleted {
112 evaluator,
113 scores,
114 session_id,
115 run_id,
116 duration_ms,
117 } => (
118 "eval.AsyncCompleted",
119 json!({
120 "evaluator": evaluator,
121 "scores": scores,
122 "session_id": session_id,
123 "run_id": run_id,
124 "duration_ms": duration_ms,
125 }),
126 ),
127 Self::QualityChanged {
128 session_id,
129 aggregate_quality,
130 trend,
131 inline_count,
132 async_count,
133 } => (
134 "eval.QualityChanged",
135 json!({
136 "session_id": session_id,
137 "aggregate_quality": aggregate_quality,
138 "trend": trend,
139 "inline_count": inline_count,
140 "async_count": async_count,
141 }),
142 ),
143 Self::EgriOutcome {
144 session_id,
145 trial_id,
146 outcome,
147 } => (
148 "eval.egri_outcome",
149 json!({
150 "session_id": session_id,
151 "trial_id": trial_id,
152 "outcome": outcome,
153 }),
154 ),
155 };
156 EventKind::Custom {
157 event_type: event_type.to_owned(),
158 data,
159 }
160 }
161
162 pub fn is_eval_event(event_type: &str) -> bool {
164 event_type.starts_with(EVAL_EVENT_PREFIX)
165 }
166
167 pub fn from_custom(event_type: &str, data: &serde_json::Value) -> Option<Self> {
169 if !Self::is_eval_event(event_type) {
170 return None;
171 }
172
173 match event_type {
174 "eval.InlineCompleted" => {
175 let evaluator = data.get("evaluator")?.as_str()?.to_owned();
176 let score = data.get("score")?.as_f64()?;
177 let label: ScoreLabel = serde_json::from_value(data.get("label")?.clone()).ok()?;
178 let layer: EvalLayer = serde_json::from_value(data.get("layer")?.clone()).ok()?;
179 let session_id = data.get("session_id")?.as_str()?.to_owned();
180 let run_id = data
181 .get("run_id")
182 .and_then(|v| v.as_str())
183 .map(str::to_owned);
184 let explanation = data
185 .get("explanation")
186 .and_then(|v| v.as_str())
187 .map(str::to_owned);
188 Some(Self::InlineCompleted {
189 evaluator,
190 score,
191 label,
192 layer,
193 session_id,
194 run_id,
195 explanation,
196 })
197 }
198 "eval.AsyncCompleted" => {
199 let evaluator = data.get("evaluator")?.as_str()?.to_owned();
200 let scores: Vec<ScoreSummary> =
201 serde_json::from_value(data.get("scores")?.clone()).ok()?;
202 let session_id = data.get("session_id")?.as_str()?.to_owned();
203 let run_id = data
204 .get("run_id")
205 .and_then(|v| v.as_str())
206 .map(str::to_owned);
207 let duration_ms = data.get("duration_ms")?.as_u64()?;
208 Some(Self::AsyncCompleted {
209 evaluator,
210 scores,
211 session_id,
212 run_id,
213 duration_ms,
214 })
215 }
216 "eval.QualityChanged" => {
217 let session_id = data.get("session_id")?.as_str()?.to_owned();
218 let aggregate_quality = data.get("aggregate_quality")?.as_f64()?;
219 let trend = data.get("trend")?.as_f64()?;
220 let inline_count = data.get("inline_count")?.as_u64()? as u32;
221 let async_count = data.get("async_count")?.as_u64()? as u32;
222 Some(Self::QualityChanged {
223 session_id,
224 aggregate_quality,
225 trend,
226 inline_count,
227 async_count,
228 })
229 }
230 "eval.egri_outcome" => {
231 let session_id = data.get("session_id")?.as_str()?.to_owned();
232 let trial_id = data
233 .get("trial_id")
234 .and_then(|v| v.as_str())
235 .map(str::to_owned);
236 let outcome = data.get("outcome")?.clone();
237 Some(Self::EgriOutcome {
238 session_id,
239 trial_id,
240 outcome,
241 })
242 }
243 _ => None,
244 }
245 }
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251 use crate::taxonomy::EvalTiming;
252
253 #[test]
254 fn inline_completed_to_event_kind() {
255 let event = NousEvent::InlineCompleted {
256 evaluator: "token_efficiency".into(),
257 score: 0.85,
258 label: ScoreLabel::Good,
259 layer: EvalLayer::Execution,
260 session_id: "sess-1".into(),
261 run_id: Some("run-1".into()),
262 explanation: Some("good ratio".into()),
263 };
264 let kind = event.into_event_kind();
265 if let EventKind::Custom { event_type, data } = &kind {
266 assert_eq!(event_type, "eval.InlineCompleted");
267 assert_eq!(data["evaluator"], "token_efficiency");
268 assert_eq!(data["score"], 0.85);
269 } else {
270 panic!("expected Custom variant");
271 }
272 }
273
274 #[test]
275 fn event_kind_roundtrip_inline() {
276 let event = NousEvent::InlineCompleted {
277 evaluator: "budget_adherence".into(),
278 score: 0.92,
279 label: ScoreLabel::Good,
280 layer: EvalLayer::Cost,
281 session_id: "sess-1".into(),
282 run_id: None,
283 explanation: None,
284 };
285 let kind = event.into_event_kind();
286
287 let json = serde_json::to_string(&kind).unwrap();
288 let back: EventKind = serde_json::from_str(&json).unwrap();
289
290 if let EventKind::Custom { event_type, data } = back {
291 assert_eq!(event_type, "eval.InlineCompleted");
292 let parsed = NousEvent::from_custom(&event_type, &data).unwrap();
293 assert!(matches!(
294 parsed,
295 NousEvent::InlineCompleted {
296 evaluator,
297 ..
298 } if evaluator == "budget_adherence"
299 ));
300 } else {
301 panic!("expected Custom variant after roundtrip");
302 }
303 }
304
305 #[test]
306 fn quality_changed_roundtrip() {
307 let event = NousEvent::QualityChanged {
308 session_id: "sess-1".into(),
309 aggregate_quality: 0.78,
310 trend: 0.02,
311 inline_count: 15,
312 async_count: 3,
313 };
314 let kind = event.into_event_kind();
315 if let EventKind::Custom { event_type, data } = kind {
316 let parsed = NousEvent::from_custom(&event_type, &data).unwrap();
317 assert!(matches!(
318 parsed,
319 NousEvent::QualityChanged {
320 aggregate_quality,
321 ..
322 } if (aggregate_quality - 0.78).abs() < f64::EPSILON
323 ));
324 } else {
325 panic!("expected Custom");
326 }
327 }
328
329 #[test]
330 fn is_eval_event_prefix() {
331 assert!(NousEvent::is_eval_event("eval.InlineCompleted"));
332 assert!(NousEvent::is_eval_event("eval.Anything"));
333 assert!(!NousEvent::is_eval_event("autonomic.CostCharged"));
334 assert!(!NousEvent::is_eval_event("InlineCompleted"));
335 }
336
337 #[test]
338 fn from_custom_returns_none_for_non_eval() {
339 let result = NousEvent::from_custom("autonomic.CostCharged", &json!({}));
340 assert!(result.is_none());
341 }
342
343 #[test]
344 fn from_inline_score_creates_event() {
345 let score = EvalScore::new(
346 "test_eval",
347 0.75,
348 EvalLayer::Action,
349 EvalTiming::Inline,
350 "sess-1",
351 )
352 .unwrap()
353 .with_explanation("some explanation");
354
355 let event = NousEvent::from_inline_score(&score);
356 assert!(matches!(
357 event,
358 NousEvent::InlineCompleted {
359 evaluator,
360 score: s,
361 ..
362 } if evaluator == "test_eval" && (s - 0.75).abs() < f64::EPSILON
363 ));
364 }
365
366 #[test]
367 fn score_summary_from_eval_score() {
368 let score =
369 EvalScore::new("test", 0.8, EvalLayer::Safety, EvalTiming::Inline, "s").unwrap();
370 let summary = ScoreSummary::from(&score);
371 assert_eq!(summary.evaluator, "test");
372 assert!((summary.value - 0.8).abs() < f64::EPSILON);
373 assert_eq!(summary.layer, EvalLayer::Safety);
374 }
375
376 #[test]
377 fn egri_outcome_roundtrip() {
378 let outcome_data = json!({
379 "score": {"aggregate": 0.85, "plan_quality": 0.9},
380 "constraints_passed": true,
381 "constraint_violations": [],
382 });
383 let event = NousEvent::EgriOutcome {
384 session_id: "sess-1".into(),
385 trial_id: Some("trial-001".into()),
386 outcome: outcome_data,
387 };
388 let kind = event.into_event_kind();
389 if let EventKind::Custom { event_type, data } = kind {
390 assert_eq!(event_type, "eval.egri_outcome");
391 let parsed = NousEvent::from_custom(&event_type, &data).unwrap();
392 match parsed {
393 NousEvent::EgriOutcome {
394 session_id,
395 trial_id,
396 outcome,
397 } => {
398 assert_eq!(session_id, "sess-1");
399 assert_eq!(trial_id.as_deref(), Some("trial-001"));
400 assert_eq!(outcome["score"]["aggregate"], 0.85);
401 }
402 _ => panic!("expected EgriOutcome variant"),
403 }
404 } else {
405 panic!("expected Custom variant");
406 }
407 }
408
409 #[test]
410 fn egri_outcome_without_trial_id() {
411 let event = NousEvent::EgriOutcome {
412 session_id: "sess-2".into(),
413 trial_id: None,
414 outcome: json!({"score": {"aggregate": 0.5}}),
415 };
416 let kind = event.into_event_kind();
417 if let EventKind::Custom { event_type, data } = kind {
418 let parsed = NousEvent::from_custom(&event_type, &data).unwrap();
419 assert!(matches!(
420 parsed,
421 NousEvent::EgriOutcome { trial_id: None, .. }
422 ));
423 } else {
424 panic!("expected Custom variant");
425 }
426 }
427}