border_core/base/step.rs
1//! Environment step.
2use super::Env;
3
4/// Additional information to `Obs` and `Act`.
5pub trait Info {}
6
7/// Represents an action, observation and reward tuple `(a_t, o_t+1, r_t)`
8/// with some additional information.
9///
10/// An environment emits [`Step`] object at every interaction steps.
11/// This object might be used to create transitions `(o_t, a_t, o_t+1, r_t)`.
12pub struct Step<E: Env> {
13 /// Action.
14 pub act: E::Act,
15
16 /// Observation.
17 pub obs: E::Obs,
18
19 /// Reward.
20 pub reward: Vec<f32>,
21
22 /// Flag denoting if episode is terminated.
23 pub is_terminated: Vec<i8>,
24
25 /// Flag denoting if episode is truncated.
26 pub is_truncated: Vec<i8>,
27
28 /// Information defined by user.
29 pub info: E::Info,
30
31 /// Initial observation. If `is_done[i] == 0`, the corresponding element will not be used.
32 pub init_obs: E::Obs,
33}
34
35impl<E: Env> Step<E> {
36 /// Constructs a [`Step`] object.
37 pub fn new(
38 obs: E::Obs,
39 act: E::Act,
40 reward: Vec<f32>,
41 is_terminated: Vec<i8>,
42 is_truncated: Vec<i8>,
43 info: E::Info,
44 init_obs: E::Obs,
45 ) -> Self {
46 Step {
47 act,
48 obs,
49 reward,
50 is_terminated,
51 is_truncated,
52 info,
53 init_obs,
54 }
55 }
56
57 #[inline]
58 /// Terminated or truncated.
59 pub fn is_done(&self) -> bool {
60 self.is_terminated[0] == 1 || self.is_truncated[0] == 1
61 }
62}
63
64/// Process [`Step`] and output an item [`Self::Output`].
65///
66/// This trait is used in [`Trainer`](crate::Trainer). [`Step`] object is transformed to
67/// [`Self::Output`], which will be pushed into a replay buffer implementing
68/// [`ExperienceBufferBase`](crate::ExperienceBufferBase).
69/// The type [`Self::Output`] should be the same with [`ExperienceBufferBase::Item`].
70///
71/// [`Self::Output`]: StepProcessor::Output
72/// [`ExperienceBufferBase::Item`]: crate::ExperienceBufferBase::Item
73pub trait StepProcessor<E: Env> {
74 /// Configuration.
75 type Config: Clone;
76
77 /// The type of transitions produced by this trait.
78 type Output;
79
80 /// Build a producer.
81 fn build(config: &Self::Config) -> Self;
82
83 /// Resets the object.
84 fn reset(&mut self, init_obs: E::Obs);
85
86 /// Processes a [`Step`] object.
87 fn process(&mut self, step: Step<E>) -> Self::Output;
88}