border_core/base/env.rs
1//! Environment interface for reinforcement learning.
2//!
3//! This module defines the core interface for environments in reinforcement learning.
4//! An environment represents a Markov Decision Process (MDP) where an agent can interact
5//! through actions and receive observations and rewards in return.
6
7/// Represents a reinforcement learning environment, typically modeled as a Markov Decision Process (MDP).
8///
9/// This trait defines the interface for environments in reinforcement learning. It provides methods for:
10/// - Building the environment with specific configurations
11/// - Performing steps in the environment
12/// - Resetting the environment to its initial state
13/// - Handling episode termination and truncation
14///
15/// # Associated Types
16///
17/// * `Config` - Configuration parameters for the environment
18/// * `Obs` - The type of observations returned by the environment
19/// * `Act` - The type of actions accepted by the environment
20/// * `Info` - Additional information returned with each step
21///
22/// # Examples
23///
24/// A typical interaction with an environment might look like:
25/// ```ignore
26/// let config = EnvConfig::default();
27/// let mut env = Env::build(&config, 42)?;
28/// let mut obs = env.reset(None)?;
29///
30/// loop {
31/// let action = agent.sample(&obs);
32/// let (step, _) = env.step(&action);
33/// obs = step.obs;
34///
35/// if step.is_done() {
36/// break;
37/// }
38/// }
39/// ```
40use super::{Act, Info, Obs, Step};
41use crate::record::Record;
42use anyhow::Result;
43
44/// Environment interface for reinforcement learning.
45pub trait Env {
46 /// Configuration parameters for the environment.
47 ///
48 /// This type should contain all necessary parameters to build and configure
49 /// the environment, such as environment-specific settings, rendering options,
50 /// or difficulty levels.
51 type Config: Clone;
52
53 /// The type of observations returned by the environment.
54 ///
55 /// Observations represent the state of the environment as perceived by the agent.
56 /// This type must implement the [`Obs`] trait.
57 type Obs: Obs;
58
59 /// The type of actions accepted by the environment.
60 ///
61 /// Actions represent the decisions made by the agent that affect the environment.
62 /// This type must implement the [`Act`] trait.
63 type Act: Act;
64
65 /// Additional information returned with each step.
66 ///
67 /// This type can be used to provide extra information about the environment's state
68 /// that isn't part of the observation. It must implement the [`Info`] trait.
69 type Info: Info;
70
71 /// Builds a new instance of the environment with the given configuration and random seed.
72 ///
73 /// # Arguments
74 ///
75 /// * `config` - Configuration parameters for the environment
76 /// * `seed` - Random seed for reproducibility
77 ///
78 /// # Returns
79 ///
80 /// A new instance of the environment or an error if construction fails
81 fn build(config: &Self::Config, seed: i64) -> Result<Self>
82 where
83 Self: Sized;
84
85 /// Performs a single step in the environment.
86 ///
87 /// This method advances the environment by one time step, applying the given action
88 /// and returning the resulting observation, reward, and termination information.
89 ///
90 /// # Arguments
91 ///
92 /// * `a` - The action to apply to the environment
93 ///
94 /// # Returns
95 ///
96 /// A tuple containing:
97 /// 1. A [`Step`] object with the next observation, reward, and termination info
98 /// 2. A [`Record`] with additional environment-specific information
99 fn step(&mut self, a: &Self::Act) -> (Step<Self>, Record)
100 where
101 Self: Sized;
102
103 /// Resets the environment to its initial state.
104 ///
105 /// This method resets the environment when:
106 /// - `is_done` is `None` (initial reset)
107 /// - `is_done[0] == 1` (episode termination)
108 ///
109 /// # Arguments
110 ///
111 /// * `is_done` - Optional vector indicating which environments to reset
112 ///
113 /// # Note
114 ///
115 /// While the interface supports vectorized environments through `is_done`,
116 /// the current implementation only supports single environments.
117 /// Therefore, `is_done.len()` is expected to be 1.
118 fn reset(&mut self, is_done: Option<&Vec<i8>>) -> Result<Self::Obs>;
119
120 /// Performs a step and automatically resets the environment if the episode ends.
121 ///
122 /// This is a convenience method that combines [`step`] and [`reset`] operations.
123 /// If the step results in episode termination, the environment is automatically
124 /// reset and the initial observation is included in the returned step.
125 ///
126 /// # Arguments
127 ///
128 /// * `a` - The action to apply to the environment
129 ///
130 /// # Returns
131 ///
132 /// A tuple containing:
133 /// 1. A [`Step`] object with the next observation, reward, and termination info
134 /// 2. A [`Record`] with additional environment-specific information
135 ///
136 /// [`step`]: Env::step
137 /// [`reset`]: Env::reset
138 fn step_with_reset(&mut self, a: &Self::Act) -> (Step<Self>, Record)
139 where
140 Self: Sized,
141 {
142 let (step, record) = self.step(a);
143 assert_eq!(step.is_terminated.len(), 1);
144 let step = if step.is_done() {
145 let init_obs = self.reset(None).unwrap();
146 Step {
147 act: step.act,
148 obs: step.obs,
149 reward: step.reward,
150 is_terminated: step.is_terminated,
151 is_truncated: step.is_truncated,
152 info: step.info,
153 init_obs: Some(init_obs),
154 }
155 } else {
156 step
157 };
158
159 (step, record)
160 }
161
162 /// Resets the environment with a specific index.
163 ///
164 /// This method is primarily used during evaluation to control the initial state
165 /// of the environment. The index can be used in various ways, such as:
166 /// - As a random seed for deterministic initialization
167 /// - To select specific starting conditions
168 /// - To control the difficulty level
169 ///
170 /// # Arguments
171 ///
172 /// * `ix` - An index used to control the reset behavior
173 ///
174 /// # Note
175 ///
176 /// This method is called by the [`Trainer`] during evaluation to ensure
177 /// consistent testing conditions.
178 ///
179 /// [`Trainer`]: crate::Trainer
180 fn reset_with_index(&mut self, ix: usize) -> Result<Self::Obs>;
181}