border_core/base/
step.rs

1//! Environment step.
2use super::Env;
3
4/// Additional information to `Obs` and `Act`.
5pub trait Info {}
6
7/// Represents an action, observation and reward tuple `(a_t, o_t+1, r_t)`
8/// with some additional information.
9///
10/// An environment emits [`Step`] object at every interaction steps.
11/// This object might be used to create transitions `(o_t, a_t, o_t+1, r_t)`.
12pub struct Step<E: Env> {
13    /// Action.
14    pub act: E::Act,
15
16    /// Observation.
17    pub obs: E::Obs,
18
19    /// Reward.
20    pub reward: Vec<f32>,
21
22    /// Flag denoting if episode is terminated.
23    pub is_terminated: Vec<i8>,
24
25    /// Flag denoting if episode is truncated.
26    pub is_truncated: Vec<i8>,
27
28    /// Information defined by user.
29    pub info: E::Info,
30
31    /// Initial observation. If `is_done[i] == 0`, the corresponding element will not be used.
32    pub init_obs: E::Obs,
33}
34
35impl<E: Env> Step<E> {
36    /// Constructs a [`Step`] object.
37    pub fn new(
38        obs: E::Obs,
39        act: E::Act,
40        reward: Vec<f32>,
41        is_terminated: Vec<i8>,
42        is_truncated: Vec<i8>,
43        info: E::Info,
44        init_obs: E::Obs,
45    ) -> Self {
46        Step {
47            act,
48            obs,
49            reward,
50            is_terminated,
51            is_truncated,
52            info,
53            init_obs,
54        }
55    }
56
57    #[inline]
58    /// Terminated or truncated.
59    pub fn is_done(&self) -> bool {
60        self.is_terminated[0] == 1 || self.is_truncated[0] == 1
61    }
62}
63
64/// Process [`Step`] and output an item [`Self::Output`].
65///
66/// This trait is used in [`Trainer`](crate::Trainer). [`Step`] object is transformed to
67/// [`Self::Output`], which will be pushed into a replay buffer implementing
68/// [`ExperienceBufferBase`](crate::ExperienceBufferBase).
69/// The type [`Self::Output`] should be the same with [`ExperienceBufferBase::Item`].
70///
71/// [`Self::Output`]: StepProcessor::Output
72/// [`ExperienceBufferBase::Item`]: crate::ExperienceBufferBase::Item
73pub trait StepProcessor<E: Env> {
74    /// Configuration.
75    type Config: Clone;
76
77    /// The type of transitions produced by this trait.
78    type Output;
79
80    /// Build a producer.
81    fn build(config: &Self::Config) -> Self;
82
83    /// Resets the object.
84    fn reset(&mut self, init_obs: E::Obs);
85
86    /// Processes a [`Step`] object.
87    fn process(&mut self, step: Step<E>) -> Self::Output;
88}