Struct Evaluator

Source

pub struct Evaluator { /* private fields */ }

Expand description

The main evaluator struct

Implementations§

Source §

impl Evaluator

Source

pub fn new(config: EvaluationConfig) -> Self

Create a new evaluator with default configuration

Source

pub fn with_llm_judge( config: EvaluationConfig, judge_model: Arc<dyn Llm>, ) -> Self

Create an evaluator with an LLM judge for semantic matching and rubric evaluation

Source

pub fn set_llm_judge(&mut self, judge_model: Arc<dyn Llm>)

Set the LLM judge model

Source

pub fn has_llm_judge(&self) -> bool

Check if LLM judge is available

Source

pub async fn evaluate_file( &self, agent: Arc<dyn Agent>, path: impl AsRef<Path>, ) -> Result<EvaluationReport>

Evaluate a test file against an agent

Source

pub async fn evaluate_test_file( &self, agent: Arc<dyn Agent>, test_file: &TestFile, ) -> Result<EvaluationReport>

Evaluate a TestFile struct

Source

pub async fn evaluate_case( &self, agent: Arc<dyn Agent>, eval_case: &EvalCase, ) -> Result<EvaluationResult>

Evaluate a single test case

Source

pub async fn evaluate_cases_parallel( &self, agent: Arc<dyn Agent>, cases: &[EvalCase], concurrency: usize, ) -> Vec<Result<EvaluationResult>> ⓘ

Evaluate multiple test cases in parallel

Source

pub async fn evaluate_directory( &self, agent: Arc<dyn Agent>, dir: impl AsRef<Path>, ) -> Result<Vec<EvaluationReport>>

Evaluate a directory of test files

Trait Implementations§

Source §

impl Default for Evaluator

Source §

fn default() -> Self

Returns the “default value” for a type. Read more

Auto Trait Implementations§

§

impl !UnwindSafe for Evaluator

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

Evaluator

Struct Evaluator Copy item path

Implementations§

impl Evaluator

pub fn new(config: EvaluationConfig) -> Self

pub fn with_llm_judge( config: EvaluationConfig, judge_model: Arc<dyn Llm>, ) -> Self

pub fn set_llm_judge(&mut self, judge_model: Arc<dyn Llm>)

pub fn has_llm_judge(&self) -> bool

pub async fn evaluate_file( &self, agent: Arc<dyn Agent>, path: impl AsRef<Path>, ) -> Result<EvaluationReport>

pub async fn evaluate_test_file( &self, agent: Arc<dyn Agent>, test_file: &TestFile, ) -> Result<EvaluationReport>

pub async fn evaluate_case( &self, agent: Arc<dyn Agent>, eval_case: &EvalCase, ) -> Result<EvaluationResult>

pub async fn evaluate_cases_parallel( &self, agent: Arc<dyn Agent>, cases: &[EvalCase], concurrency: usize, ) -> Vec<Result<EvaluationResult>> ⓘ

pub async fn evaluate_directory( &self, agent: Arc<dyn Agent>, dir: impl AsRef<Path>, ) -> Result<Vec<EvaluationReport>>

Trait Implementations§

impl Default for Evaluator

fn default() -> Self

Auto Trait Implementations§

impl Freeze for Evaluator

impl !RefUnwindSafe for Evaluator

impl Send for Evaluator

impl Sync for Evaluator

impl Unpin for Evaluator

impl UnsafeUnpin for Evaluator

impl !UnwindSafe for Evaluator

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct Evaluator

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,