1use crate::error::NousResult;
7use crate::score::EvalScore;
8use crate::taxonomy::{EvalLayer, EvalTiming};
9
10#[derive(Debug, Clone)]
15pub struct EvalContext {
16 pub session_id: String,
18 pub run_id: Option<String>,
20 pub iteration: Option<u32>,
22 pub input_tokens: Option<u64>,
24 pub output_tokens: Option<u64>,
26 pub tokens_remaining: Option<u64>,
28 pub total_tokens_used: Option<u64>,
30 pub tool_call_count: Option<u32>,
32 pub tool_error_count: Option<u32>,
34 pub tool_name: Option<String>,
36 pub tool_errored: Option<bool>,
38 pub max_iterations: Option<u32>,
40 pub metadata: std::collections::HashMap<String, String>,
42}
43
44impl EvalContext {
45 pub fn new(session_id: impl Into<String>) -> Self {
47 Self {
48 session_id: session_id.into(),
49 run_id: None,
50 iteration: None,
51 input_tokens: None,
52 output_tokens: None,
53 tokens_remaining: None,
54 total_tokens_used: None,
55 tool_call_count: None,
56 tool_error_count: None,
57 tool_name: None,
58 tool_errored: None,
59 max_iterations: None,
60 metadata: std::collections::HashMap::new(),
61 }
62 }
63}
64
65pub trait NousEvaluator: Send + Sync {
70 fn name(&self) -> &str;
72
73 fn layer(&self) -> EvalLayer;
75
76 fn timing(&self) -> EvalTiming;
78
79 fn evaluate(&self, ctx: &EvalContext) -> NousResult<Vec<EvalScore>>;
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
87pub enum EvalHook {
88 BeforeModelCall,
90 AfterModelCall,
92 PreToolCall,
94 PostToolCall,
96 OnRunFinished,
98}
99
100impl EvalHook {
101 pub fn as_str(&self) -> &'static str {
103 match self {
104 Self::BeforeModelCall => "before_model_call",
105 Self::AfterModelCall => "after_model_call",
106 Self::PreToolCall => "pre_tool_call",
107 Self::PostToolCall => "post_tool_call",
108 Self::OnRunFinished => "on_run_finished",
109 }
110 }
111}
112
113#[cfg(test)]
114mod tests {
115 use super::*;
116 use crate::taxonomy::{EvalLayer, EvalTiming};
117
118 struct MockEvaluator;
119
120 impl NousEvaluator for MockEvaluator {
121 fn name(&self) -> &str {
122 "mock"
123 }
124
125 fn layer(&self) -> EvalLayer {
126 EvalLayer::Execution
127 }
128
129 fn timing(&self) -> EvalTiming {
130 EvalTiming::Inline
131 }
132
133 fn evaluate(&self, ctx: &EvalContext) -> NousResult<Vec<EvalScore>> {
134 let score = EvalScore::new(
135 self.name(),
136 0.9,
137 self.layer(),
138 self.timing(),
139 &ctx.session_id,
140 )?;
141 Ok(vec![score])
142 }
143 }
144
145 #[test]
146 fn mock_evaluator_produces_score() {
147 let evaluator = MockEvaluator;
148 let ctx = EvalContext::new("sess-1");
149 let scores = evaluator.evaluate(&ctx).unwrap();
150 assert_eq!(scores.len(), 1);
151 assert_eq!(scores[0].evaluator, "mock");
152 assert!((scores[0].value - 0.9).abs() < f64::EPSILON);
153 }
154
155 #[test]
156 fn eval_context_new_minimal() {
157 let ctx = EvalContext::new("test");
158 assert_eq!(ctx.session_id, "test");
159 assert!(ctx.run_id.is_none());
160 assert!(ctx.input_tokens.is_none());
161 }
162
163 #[test]
164 fn eval_hook_as_str() {
165 assert_eq!(EvalHook::BeforeModelCall.as_str(), "before_model_call");
166 assert_eq!(EvalHook::AfterModelCall.as_str(), "after_model_call");
167 assert_eq!(EvalHook::PreToolCall.as_str(), "pre_tool_call");
168 assert_eq!(EvalHook::PostToolCall.as_str(), "post_tool_call");
169 assert_eq!(EvalHook::OnRunFinished.as_str(), "on_run_finished");
170 }
171}