Struct EvalRunner

Source

pub struct EvalRunner { /* private fields */ }

Expand description

Evaluation runner — executes a suite against an agent using async metrics.

Implementations§

Source §

impl EvalRunner

Source

pub fn new() -> Self

Create a new EvalRunner with no metrics and default threshold 0.7.

Source

pub fn metric(self, metric: Box<dyn AsyncMetric>) -> Self

Add a metric to score agent outputs with.

Source

pub fn threshold(self, threshold: f64) -> Self

Set the minimum score threshold for a test case to pass.

A case passes if all metric scores are ≥ threshold.

Source

pub async fn run( &self, agent: &dyn EvalAgent, suite: &EvalSuite, ) -> Result<EvalReport>

Execute the evaluation suite against the agent.

For each test case: calls agent.respond(input), scores with all metrics, marks passed/failed, and aggregates into an EvalReport.

§Errors

Returns an error if the agent fails on any test case.

Trait Implementations§

Source §

impl Default for EvalRunner

Source §

fn default() -> Self

Returns the “default value” for a type. Read more

Auto Trait Implementations§

§

impl !UnwindSafe for EvalRunner

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

EvalRunner

Struct EvalRunner Copy item path

Implementations§

impl EvalRunner

pub fn new() -> Self

pub fn metric(self, metric: Box<dyn AsyncMetric>) -> Self

pub fn threshold(self, threshold: f64) -> Self

pub async fn run( &self, agent: &dyn EvalAgent, suite: &EvalSuite, ) -> Result<EvalReport>

§Errors

Trait Implementations§

impl Default for EvalRunner

fn default() -> Self

Auto Trait Implementations§

impl Freeze for EvalRunner

impl !RefUnwindSafe for EvalRunner

impl Send for EvalRunner

impl Sync for EvalRunner

impl Unpin for EvalRunner

impl UnsafeUnpin for EvalRunner

impl !UnwindSafe for EvalRunner

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct EvalRunner

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,