Struct Evaluator

Source

pub struct Evaluator { /* private fields */ }

Available on crate feature eval only.

Expand description

The main evaluator struct

Implementations§

Source §

impl Evaluator

Source

pub fn new(config: EvaluationConfig) -> Evaluator

Create a new evaluator with default configuration

Source

pub fn with_llm_judge( config: EvaluationConfig, judge_model: Arc<dyn Llm>, ) -> Evaluator

Create an evaluator with an LLM judge for semantic matching and rubric evaluation

Source

pub fn set_llm_judge(&mut self, judge_model: Arc<dyn Llm>)

Set the LLM judge model

Source

pub fn has_llm_judge(&self) -> bool

Check if LLM judge is available

Source

pub fn set_structured_judge(&mut self, judge: Arc<StructuredJudge>)

Set the structured judge for typed verdict evaluation

Source

pub fn set_cost_tracker(&mut self, tracker: CostTracker)

Set the cost tracker for token usage and latency metrics

Source

pub fn set_trace_analyzer(&mut self, analyzer: TraceAnalyzer)

Set the trace analyzer for execution inefficiency detection

Source

pub fn set_conversation_scorer(&mut self, scorer: Arc<ConversationScorer>)

Set the conversation scorer for multi-turn metrics

Source

pub fn has_structured_judge(&self) -> bool

Check if a structured judge is configured

Source

pub fn has_cost_tracker(&self) -> bool

Check if a cost tracker is configured

Source

pub fn has_trace_analyzer(&self) -> bool

Check if a trace analyzer is configured

Source

pub fn has_conversation_scorer(&self) -> bool

Check if a conversation scorer is configured

Source

pub async fn evaluate_file( &self, agent: Arc<dyn Agent>, path: impl AsRef<Path>, ) -> Result<EvaluationReport, EvalError>

Evaluate a test file against an agent

Source

pub async fn evaluate_test_file( &self, agent: Arc<dyn Agent>, test_file: &TestFile, ) -> Result<EvaluationReport, EvalError>

Evaluate a TestFile struct

Source

pub async fn evaluate_case( &self, agent: Arc<dyn Agent>, eval_case: &EvalCase, ) -> Result<EvaluationResult, EvalError>

Evaluate a single test case

Source

pub async fn evaluate_cases_parallel( &self, agent: Arc<dyn Agent>, cases: &[EvalCase], concurrency: usize, ) -> Vec<Result<EvaluationResult, EvalError>>

Evaluate multiple test cases in parallel

Source

pub async fn evaluate_directory( &self, agent: Arc<dyn Agent>, dir: impl AsRef<Path>, ) -> Result<Vec<EvaluationReport>, EvalError>

Evaluate a directory of test files

Trait Implementations§

Source §

impl Default for Evaluator

Source §

fn default() -> Evaluator

Returns the “default value” for a type. Read more

Auto Trait Implementations§

§

impl UnsafeUnpin for Evaluator

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

Source §

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self> ⓘ

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self> ⓘ

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> PolicyExt for T
where T: ?Sized,

Source §

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Sized + Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more

Source §

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Sized + Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more

Source §

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,

Source §

impl<T> Same for T

Source §

type Output = T

Should always be Self

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

fn vzip(self) -> V

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self> ⓘ
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self> ⓘ

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

Struct Evaluator Copy item path

Implementations§

impl Evaluator

pub fn new(config: EvaluationConfig) -> Evaluator

pub fn with_llm_judge( config: EvaluationConfig, judge_model: Arc<dyn Llm>, ) -> Evaluator

pub fn set_llm_judge(&mut self, judge_model: Arc<dyn Llm>)

pub fn has_llm_judge(&self) -> bool

pub fn set_structured_judge(&mut self, judge: Arc<StructuredJudge>)

pub fn set_cost_tracker(&mut self, tracker: CostTracker)

pub fn set_trace_analyzer(&mut self, analyzer: TraceAnalyzer)

pub fn set_conversation_scorer(&mut self, scorer: Arc<ConversationScorer>)

pub fn has_structured_judge(&self) -> bool

pub fn has_cost_tracker(&self) -> bool

pub fn has_trace_analyzer(&self) -> bool

pub fn has_conversation_scorer(&self) -> bool

pub async fn evaluate_file( &self, agent: Arc<dyn Agent>, path: impl AsRef<Path>, ) -> Result<EvaluationReport, EvalError>

pub async fn evaluate_test_file( &self, agent: Arc<dyn Agent>, test_file: &TestFile, ) -> Result<EvaluationReport, EvalError>

pub async fn evaluate_case( &self, agent: Arc<dyn Agent>, eval_case: &EvalCase, ) -> Result<EvaluationResult, EvalError>

pub async fn evaluate_cases_parallel( &self, agent: Arc<dyn Agent>, cases: &[EvalCase], concurrency: usize, ) -> Vec<Result<EvaluationResult, EvalError>>

pub async fn evaluate_directory( &self, agent: Arc<dyn Agent>, dir: impl AsRef<Path>, ) -> Result<Vec<EvaluationReport>, EvalError>

Trait Implementations§

impl Default for Evaluator

fn default() -> Evaluator

Auto Trait Implementations§

impl !RefUnwindSafe for Evaluator

impl !UnwindSafe for Evaluator

impl Freeze for Evaluator

impl Send for Evaluator

impl Sync for Evaluator

impl Unpin for Evaluator

impl UnsafeUnpin for Evaluator

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DTwhere ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DTwhere ST: ?Sized, DT: ?Sized,

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self> ⓘ

fn in_current_span(self) -> Instrumented<Self> ⓘ

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Sized + Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Sized + Policy<B, E>, P: Policy<B, E>,

impl<T> Read<Exclusive, BecauseExclusive> for Twhere T: ?Sized,

impl<T> Same for T

type Output = T

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self> ⓘwhere S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self> ⓘ

Struct Evaluator

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
where ST: ?Sized, DT: ?Sized,

impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
where ST: ?Sized, DT: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Sized + Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Sized + Policy<B, E>, P: Policy<B, E>,

impl<T> Read<Exclusive, BecauseExclusive> for T
where T: ?Sized,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self> ⓘ
where S: Into<Dispatch>,