border_core/base/
step.rs

1//! Step processing interface for reinforcement learning.
2//!
3//! This module defines the core interfaces for processing environment steps in reinforcement learning.
4//! It provides structures and traits for handling the transition between states, including
5//! observations, actions, rewards, and episode termination information.
6
7use super::Env;
8
9/// Additional information that can be associated with environment steps.
10///
11/// This trait is used to define custom information types that can be attached to
12/// environment steps. It is typically implemented for types that provide extra
13/// context about the environment's state or the agent's actions.
14///
15/// # Examples
16///
17/// ```ignore
18/// #[derive(Debug)]
19/// struct CustomInfo {
20///     velocity: f32,
21///     position: (f32, f32),
22/// }
23///
24/// impl Info for CustomInfo {}
25/// ```
26pub trait Info {}
27
28impl Info for () {}
29
30/// Represents a single step in the environment, containing the action taken,
31/// the resulting observation, reward, and episode status.
32///
33/// This struct encapsulates all the information produced by an environment
34/// during a single interaction step. It is used to create transitions of the form
35/// `(o_t, a_t, o_t+1, r_t)` for training reinforcement learning agents.
36///
37/// # Type Parameters
38///
39/// * `E` - The environment type that produced this step
40///
41/// # Fields
42///
43/// * `act` - The action taken by the agent
44/// * `obs` - The observation received from the environment
45/// * `reward` - The reward received for the action
46/// * `is_terminated` - Flags indicating if the episode has terminated
47/// * `is_truncated` - Flags indicating if the episode has been truncated
48/// * `info` - Additional environment-specific information
49/// * `init_obs` - The initial observation of the next episode (if applicable)
50///
51/// # Examples
52///
53/// ```ignore
54/// let step = Step::new(
55///     observation,
56///     action,
57///     vec![0.5],  // reward
58///     vec![0],    // not terminated
59///     vec![0],    // not truncated
60///     info,
61///     None,       // no initial observation
62/// );
63///
64/// if step.is_done() {
65///     // Handle episode completion
66/// }
67/// ```
68pub struct Step<E: Env> {
69    /// The action taken by the agent in this step.
70    pub act: E::Act,
71
72    /// The observation received from the environment after taking the action.
73    pub obs: E::Obs,
74
75    /// The reward received for taking the action.
76    pub reward: Vec<f32>,
77
78    /// Flags indicating if the episode has terminated.
79    /// A value of 1 indicates termination.
80    pub is_terminated: Vec<i8>,
81
82    /// Flags indicating if the episode has been truncated.
83    /// A value of 1 indicates truncation.
84    pub is_truncated: Vec<i8>,
85
86    /// Additional environment-specific information.
87    pub info: E::Info,
88
89    /// The initial observation of the next episode, if applicable.
90    /// This is used when an episode ends and a new one begins.
91    pub init_obs: Option<E::Obs>,
92}
93
94impl<E: Env> Step<E> {
95    /// Constructs a new [`Step`] object with the given components.
96    ///
97    /// # Arguments
98    ///
99    /// * `obs` - The observation received from the environment
100    /// * `act` - The action taken by the agent
101    /// * `reward` - The reward received for the action
102    /// * `is_terminated` - Flags indicating episode termination
103    /// * `is_truncated` - Flags indicating episode truncation
104    /// * `info` - Additional environment-specific information
105    /// * `init_obs` - The initial observation of the next episode
106    ///
107    /// # Returns
108    ///
109    /// A new [`Step`] object containing all the provided information
110    pub fn new(
111        obs: E::Obs,
112        act: E::Act,
113        reward: Vec<f32>,
114        is_terminated: Vec<i8>,
115        is_truncated: Vec<i8>,
116        info: E::Info,
117        init_obs: Option<E::Obs>,
118    ) -> Self {
119        Step {
120            act,
121            obs,
122            reward,
123            is_terminated,
124            is_truncated,
125            info,
126            init_obs,
127        }
128    }
129
130    /// Checks if the episode has ended, either through termination or truncation.
131    ///
132    /// # Returns
133    ///
134    /// `true` if the episode has ended, `false` otherwise
135    #[inline]
136    pub fn is_done(&self) -> bool {
137        self.is_terminated[0] == 1 || self.is_truncated[0] == 1
138    }
139}
140
141/// Processes environment steps and produces items for a replay buffer.
142///
143/// This trait defines the interface for converting [`Step`] objects into items
144/// that can be stored in a replay buffer. It is used by the [`Trainer`] to
145/// transform environment interactions into training samples.
146///
147/// # Type Parameters
148///
149/// * `E` - The environment type
150///
151/// # Associated Types
152///
153/// * `Config` - Configuration parameters for the processor
154/// * `Output` - The type of items produced by the processor
155///
156/// # Examples
157///
158/// ```ignore
159/// struct SimpleProcessor;
160///
161/// impl<E: Env> StepProcessor<E> for SimpleProcessor {
162///     type Config = ();
163///     type Output = (E::Obs, E::Act, E::Obs, f32);
164///
165///     fn build(_: &Self::Config) -> Self {
166///         Self
167///     }
168///
169///     fn reset(&mut self, _: E::Obs) {}
170///
171///     fn process(&mut self, step: Step<E>) -> Self::Output {
172///         (step.init_obs.unwrap(), step.act, step.obs, step.reward[0])
173///     }
174/// }
175/// ```
176///
177/// [`Trainer`]: crate::Trainer
178pub trait StepProcessor<E: Env> {
179    /// Configuration parameters for the processor.
180    ///
181    /// This type must implement `Clone` to support building multiple instances
182    /// with the same configuration.
183    type Config: Clone;
184
185    /// The type of items produced by the processor.
186    ///
187    /// This type should match the `Item` type of the replay buffer that will
188    /// store the processed steps.
189    type Output;
190
191    /// Builds a new processor with the given configuration.
192    ///
193    /// # Arguments
194    ///
195    /// * `config` - The configuration parameters
196    ///
197    /// # Returns
198    ///
199    /// A new instance of the processor
200    fn build(config: &Self::Config) -> Self;
201
202    /// Resets the processor with a new initial observation.
203    ///
204    /// This method is called at the start of each episode to initialize
205    /// the processor with the first observation.
206    ///
207    /// # Arguments
208    ///
209    /// * `init_obs` - The initial observation of the episode
210    fn reset(&mut self, init_obs: E::Obs);
211
212    /// Processes a step and produces an item for the replay buffer.
213    ///
214    /// This method transforms a [`Step`] object into an item that can be
215    /// stored in a replay buffer. The transformation typically involves
216    /// creating a transition tuple of the form `(o_t, a_t, o_t+1, r_t)`.
217    ///
218    /// # Arguments
219    ///
220    /// * `step` - The step to process
221    ///
222    /// # Returns
223    ///
224    /// An item ready to be stored in a replay buffer
225    fn process(&mut self, step: Step<E>) -> Self::Output;
226}