border_core/evaluator.rs
1//! Evaluation interface for reinforcement learning agents.
2//!
3//! This module provides interfaces and implementations for evaluating the performance
4//! of reinforcement learning agents. Evaluation is a crucial step in the development
5//! of reinforcement learning systems, allowing developers to:
6//! - Measure the effectiveness of trained agents
7//! - Compare different algorithms or hyperparameters
8//! - Monitor training progress
9//! - Validate the generalization of learned policies
10
11use crate::{record::Record, Agent, Env, ReplayBufferBase};
12use anyhow::Result;
13mod default_evaluator;
14pub use default_evaluator::DefaultEvaluator;
15
16/// Interface for evaluating reinforcement learning agents.
17///
18/// This trait defines the standard interface for evaluating agents in different
19/// environments. Implementations of this trait should:
20/// - Run the agent in the environment for a specified number of episodes
21/// - Collect performance metrics (e.g., average return, success rate)
22/// - Return the results in a standardized format
23///
24/// # Type Parameters
25///
26/// * `E` - The environment type that the agent operates in
27///
28/// # Examples
29///
30/// ```ignore
31/// struct CustomEvaluator<E: Env> {
32/// env: E,
33/// n_episodes: usize,
34/// }
35///
36/// impl<E: Env> Evaluator<E> for CustomEvaluator<E> {
37/// fn evaluate<R>(&mut self, agent: &mut Box<dyn Agent<E, R>>) -> Result<Record>
38/// where
39/// R: ReplayBufferBase,
40/// {
41/// // Custom evaluation logic
42/// // ...
43/// }
44/// }
45/// ```
46pub trait Evaluator<E: Env> {
47 /// Evaluates an agent's performance in the environment.
48 ///
49 /// This method should:
50 /// 1. Set the agent to evaluation mode
51 /// 2. Run the agent in the environment for the specified number of episodes
52 /// 3. Collect and aggregate performance metrics
53 /// 4. Return the results in a [`Record`]
54 ///
55 /// # Arguments
56 ///
57 /// * `agent` - The agent to evaluate
58 ///
59 /// # Returns
60 ///
61 /// A tuple of (performance metric, [`Record`] containing the evaluation results)
62 ///
63 /// In [`Trainer`], the performance metric is used to choose the best model.
64 ///
65 /// # Errors
66 ///
67 /// Returns an error if:
68 /// - The environment fails to reset or step
69 /// - The agent fails to produce actions
70 /// - The evaluation process encounters an unexpected error
71 ///
72 /// # Note
73 ///
74 /// The caller is responsible for managing the agent's internal state,
75 /// such as switching between training and evaluation modes. This allows
76 /// for flexible evaluation strategies that may require different agent
77 /// configurations.
78 ///
79 /// [`Trainer`]: crate::Trainer
80 fn evaluate<R>(&mut self, agent: &mut Box<dyn Agent<E, R>>) -> Result<(f32, Record)>
81 where
82 R: ReplayBufferBase;
83}