Skip to main content

rlevo_core/
evaluation.rs

1//! Object-safe environment interface consumed by external evaluators.
2//!
3//! [`BenchEnv`] is intentionally narrower than [`Environment`] so consumers
4//! (benchmarking harnesses, evolutionary outer loops) do not have to thread
5//! const-generic dimensions through their signatures. Adapters that wrap
6//! concrete [`Environment`] impls live in `rlevo-environments` (behind the
7//! `bench` feature).
8//!
9//! `reset` and `step` return `Result<_, BenchError>` so adapters preserve
10//! upstream recoverable errors ([`EnvironmentError`]) without escalating
11//! them to panics. Consuming harnesses still wrap callers in `catch_unwind`
12//! to capture genuine programming-bug panics separately.
13//!
14//! [`Environment`]: crate::environment::Environment
15//! [`EnvironmentError`]: crate::environment::EnvironmentError
16
17use crate::environment::EnvironmentError;
18
19/// A single environment step as seen by an external evaluator.
20///
21/// Returned by [`BenchEnv::step`]. The observation type is generic so
22/// adapters can expose whatever concrete type the underlying environment
23/// produces without further erasure.
24#[derive(Debug, Clone)]
25pub struct BenchStep<Obs> {
26    /// The observation the agent receives after the action was applied.
27    pub observation: Obs,
28    /// Scalar reward signal for the transition.
29    pub reward: f64,
30    /// Whether the episode has ended (terminal or truncated).
31    pub done: bool,
32}
33
34/// Recoverable error reported by a [`BenchEnv`] impl.
35///
36/// Wraps [`EnvironmentError`] so adapters preserve the typed upstream
37/// error rather than collapsing it to a string.
38#[derive(Debug, thiserror::Error)]
39pub enum BenchError {
40    #[error("environment reset failed: {0}")]
41    Reset(#[source] EnvironmentError),
42    #[error("environment step failed: {0}")]
43    Step(#[source] EnvironmentError),
44}
45
46/// Object-safe environment interface consumed by external evaluators.
47///
48/// `BenchEnv` strips the const-generic dimensionality of [`Environment`] so
49/// benchmarking harnesses and evolutionary outer loops can work with a plain
50/// trait object (`dyn BenchEnv`) rather than threading dimension parameters
51/// through their own type signatures.
52///
53/// Concrete adapters that bridge a typed [`Environment`] to `BenchEnv` live
54/// in `rlevo-environments` behind the `bench` feature.
55///
56/// # Errors
57///
58/// Both [`reset`] and [`step`] return [`BenchError`], which wraps the
59/// upstream [`EnvironmentError`] variants so callers can distinguish
60/// recoverable environment failures from programming bugs caught by
61/// `catch_unwind`.
62///
63/// [`Environment`]: crate::environment::Environment
64/// [`EnvironmentError`]: crate::environment::EnvironmentError
65/// [`reset`]: BenchEnv::reset
66/// [`step`]: BenchEnv::step
67pub trait BenchEnv {
68    /// The observation type the environment produces on each step.
69    type Observation;
70    /// The action type the environment accepts on each step.
71    type Action;
72
73    /// Reset the environment to an initial state and return the first observation.
74    ///
75    /// # Errors
76    ///
77    /// Returns [`BenchError::Reset`] if the underlying environment's reset
78    /// operation fails.
79    fn reset(&mut self) -> Result<Self::Observation, BenchError>;
80
81    /// Apply `action` and advance the environment by one step.
82    ///
83    /// Returns a [`BenchStep`] containing the next observation, the scalar
84    /// reward, and a `done` flag indicating episode termination.
85    ///
86    /// # Errors
87    ///
88    /// Returns [`BenchError::Step`] if the underlying environment's step
89    /// operation fails.
90    fn step(
91        &mut self,
92        action: Self::Action,
93    ) -> Result<BenchStep<Self::Observation>, BenchError>;
94}