use crate::error::NousResult;
use crate::score::EvalScore;
use crate::taxonomy::{EvalLayer, EvalTiming};
#[derive(Debug, Clone)]
pub struct EvalContext {
pub session_id: String,
pub run_id: Option<String>,
pub iteration: Option<u32>,
pub input_tokens: Option<u64>,
pub output_tokens: Option<u64>,
pub tokens_remaining: Option<u64>,
pub total_tokens_used: Option<u64>,
pub tool_call_count: Option<u32>,
pub tool_error_count: Option<u32>,
pub tool_name: Option<String>,
pub tool_errored: Option<bool>,
pub max_iterations: Option<u32>,
pub metadata: std::collections::HashMap<String, String>,
}
impl EvalContext {
pub fn new(session_id: impl Into<String>) -> Self {
Self {
session_id: session_id.into(),
run_id: None,
iteration: None,
input_tokens: None,
output_tokens: None,
tokens_remaining: None,
total_tokens_used: None,
tool_call_count: None,
tool_error_count: None,
tool_name: None,
tool_errored: None,
max_iterations: None,
metadata: std::collections::HashMap::new(),
}
}
}
pub trait NousEvaluator: Send + Sync {
fn name(&self) -> &str;
fn layer(&self) -> EvalLayer;
fn timing(&self) -> EvalTiming;
fn evaluate(&self, ctx: &EvalContext) -> NousResult<Vec<EvalScore>>;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum EvalHook {
BeforeModelCall,
AfterModelCall,
PreToolCall,
PostToolCall,
OnRunFinished,
}
impl EvalHook {
pub fn as_str(&self) -> &'static str {
match self {
Self::BeforeModelCall => "before_model_call",
Self::AfterModelCall => "after_model_call",
Self::PreToolCall => "pre_tool_call",
Self::PostToolCall => "post_tool_call",
Self::OnRunFinished => "on_run_finished",
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::taxonomy::{EvalLayer, EvalTiming};
struct MockEvaluator;
impl NousEvaluator for MockEvaluator {
fn name(&self) -> &str {
"mock"
}
fn layer(&self) -> EvalLayer {
EvalLayer::Execution
}
fn timing(&self) -> EvalTiming {
EvalTiming::Inline
}
fn evaluate(&self, ctx: &EvalContext) -> NousResult<Vec<EvalScore>> {
let score = EvalScore::new(
self.name(),
0.9,
self.layer(),
self.timing(),
&ctx.session_id,
)?;
Ok(vec![score])
}
}
#[test]
fn mock_evaluator_produces_score() {
let evaluator = MockEvaluator;
let ctx = EvalContext::new("sess-1");
let scores = evaluator.evaluate(&ctx).unwrap();
assert_eq!(scores.len(), 1);
assert_eq!(scores[0].evaluator, "mock");
assert!((scores[0].value - 0.9).abs() < f64::EPSILON);
}
#[test]
fn eval_context_new_minimal() {
let ctx = EvalContext::new("test");
assert_eq!(ctx.session_id, "test");
assert!(ctx.run_id.is_none());
assert!(ctx.input_tokens.is_none());
}
#[test]
fn eval_hook_as_str() {
assert_eq!(EvalHook::BeforeModelCall.as_str(), "before_model_call");
assert_eq!(EvalHook::AfterModelCall.as_str(), "after_model_call");
assert_eq!(EvalHook::PreToolCall.as_str(), "pre_tool_call");
assert_eq!(EvalHook::PostToolCall.as_str(), "post_tool_call");
assert_eq!(EvalHook::OnRunFinished.as_str(), "on_run_finished");
}
}