border_core/
evaluator.rs

1//! Evaluation interface for reinforcement learning agents.
2//!
3//! This module provides interfaces and implementations for evaluating the performance
4//! of reinforcement learning agents. Evaluation is a crucial step in the development
5//! of reinforcement learning systems, allowing developers to:
6//! - Measure the effectiveness of trained agents
7//! - Compare different algorithms or hyperparameters
8//! - Monitor training progress
9//! - Validate the generalization of learned policies
10
11use crate::{record::Record, Agent, Env, ReplayBufferBase};
12use anyhow::Result;
13mod default_evaluator;
14pub use default_evaluator::DefaultEvaluator;
15
16/// Interface for evaluating reinforcement learning agents.
17///
18/// This trait defines the standard interface for evaluating agents in different
19/// environments. Implementations of this trait should:
20/// - Run the agent in the environment for a specified number of episodes
21/// - Collect performance metrics (e.g., average return, success rate)
22/// - Return the results in a standardized format
23///
24/// # Type Parameters
25///
26/// * `E` - The environment type that the agent operates in
27///
28/// # Examples
29///
30/// ```ignore
31/// struct CustomEvaluator<E: Env> {
32///     env: E,
33///     n_episodes: usize,
34/// }
35///
36/// impl<E: Env> Evaluator<E> for CustomEvaluator<E> {
37///     fn evaluate<R>(&mut self, agent: &mut Box<dyn Agent<E, R>>) -> Result<Record>
38///     where
39///         R: ReplayBufferBase,
40///     {
41///         // Custom evaluation logic
42///         // ...
43///     }
44/// }
45/// ```
46pub trait Evaluator<E: Env> {
47    /// Evaluates an agent's performance in the environment.
48    ///
49    /// This method should:
50    /// 1. Set the agent to evaluation mode
51    /// 2. Run the agent in the environment for the specified number of episodes
52    /// 3. Collect and aggregate performance metrics
53    /// 4. Return the results in a [`Record`]
54    ///
55    /// # Arguments
56    ///
57    /// * `agent` - The agent to evaluate
58    ///
59    /// # Returns
60    ///
61    /// A tuple of (performance metric, [`Record`] containing the evaluation results)
62    ///
63    /// In [`Trainer`], the performance metric is used to choose the best model.
64    ///
65    /// # Errors
66    ///
67    /// Returns an error if:
68    /// - The environment fails to reset or step
69    /// - The agent fails to produce actions
70    /// - The evaluation process encounters an unexpected error
71    ///
72    /// # Note
73    ///
74    /// The caller is responsible for managing the agent's internal state,
75    /// such as switching between training and evaluation modes. This allows
76    /// for flexible evaluation strategies that may require different agent
77    /// configurations.
78    ///
79    /// [`Trainer`]: crate::Trainer
80    fn evaluate<R>(&mut self, agent: &mut Box<dyn Agent<E, R>>) -> Result<(f32, Record)>
81    where
82        R: ReplayBufferBase;
83}