pub struct Evaluator { /* private fields */ }eval only.Expand description
The main evaluator struct
Implementations§
Source§impl Evaluator
impl Evaluator
Sourcepub fn new(config: EvaluationConfig) -> Evaluator
pub fn new(config: EvaluationConfig) -> Evaluator
Create a new evaluator with default configuration
Sourcepub fn with_llm_judge(
config: EvaluationConfig,
judge_model: Arc<dyn Llm>,
) -> Evaluator
pub fn with_llm_judge( config: EvaluationConfig, judge_model: Arc<dyn Llm>, ) -> Evaluator
Create an evaluator with an LLM judge for semantic matching and rubric evaluation
Sourcepub fn set_llm_judge(&mut self, judge_model: Arc<dyn Llm>)
pub fn set_llm_judge(&mut self, judge_model: Arc<dyn Llm>)
Set the LLM judge model
Sourcepub fn has_llm_judge(&self) -> bool
pub fn has_llm_judge(&self) -> bool
Check if LLM judge is available
Sourcepub fn set_structured_judge(&mut self, judge: Arc<StructuredJudge>)
pub fn set_structured_judge(&mut self, judge: Arc<StructuredJudge>)
Set the structured judge for typed verdict evaluation
Sourcepub fn set_cost_tracker(&mut self, tracker: CostTracker)
pub fn set_cost_tracker(&mut self, tracker: CostTracker)
Set the cost tracker for token usage and latency metrics
Sourcepub fn set_trace_analyzer(&mut self, analyzer: TraceAnalyzer)
pub fn set_trace_analyzer(&mut self, analyzer: TraceAnalyzer)
Set the trace analyzer for execution inefficiency detection
Sourcepub fn set_conversation_scorer(&mut self, scorer: Arc<ConversationScorer>)
pub fn set_conversation_scorer(&mut self, scorer: Arc<ConversationScorer>)
Set the conversation scorer for multi-turn metrics
Sourcepub fn has_structured_judge(&self) -> bool
pub fn has_structured_judge(&self) -> bool
Check if a structured judge is configured
Sourcepub fn has_cost_tracker(&self) -> bool
pub fn has_cost_tracker(&self) -> bool
Check if a cost tracker is configured
Sourcepub fn has_trace_analyzer(&self) -> bool
pub fn has_trace_analyzer(&self) -> bool
Check if a trace analyzer is configured
Sourcepub fn has_conversation_scorer(&self) -> bool
pub fn has_conversation_scorer(&self) -> bool
Check if a conversation scorer is configured
Sourcepub async fn evaluate_file(
&self,
agent: Arc<dyn Agent>,
path: impl AsRef<Path>,
) -> Result<EvaluationReport, EvalError>
pub async fn evaluate_file( &self, agent: Arc<dyn Agent>, path: impl AsRef<Path>, ) -> Result<EvaluationReport, EvalError>
Evaluate a test file against an agent
Sourcepub async fn evaluate_test_file(
&self,
agent: Arc<dyn Agent>,
test_file: &TestFile,
) -> Result<EvaluationReport, EvalError>
pub async fn evaluate_test_file( &self, agent: Arc<dyn Agent>, test_file: &TestFile, ) -> Result<EvaluationReport, EvalError>
Evaluate a TestFile struct
Sourcepub async fn evaluate_case(
&self,
agent: Arc<dyn Agent>,
eval_case: &EvalCase,
) -> Result<EvaluationResult, EvalError>
pub async fn evaluate_case( &self, agent: Arc<dyn Agent>, eval_case: &EvalCase, ) -> Result<EvaluationResult, EvalError>
Evaluate a single test case
Sourcepub async fn evaluate_cases_parallel(
&self,
agent: Arc<dyn Agent>,
cases: &[EvalCase],
concurrency: usize,
) -> Vec<Result<EvaluationResult, EvalError>>
pub async fn evaluate_cases_parallel( &self, agent: Arc<dyn Agent>, cases: &[EvalCase], concurrency: usize, ) -> Vec<Result<EvaluationResult, EvalError>>
Evaluate multiple test cases in parallel
Sourcepub async fn evaluate_directory(
&self,
agent: Arc<dyn Agent>,
dir: impl AsRef<Path>,
) -> Result<Vec<EvaluationReport>, EvalError>
pub async fn evaluate_directory( &self, agent: Arc<dyn Agent>, dir: impl AsRef<Path>, ) -> Result<Vec<EvaluationReport>, EvalError>
Evaluate a directory of test files