1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
//! Environment step.
use super::Env;

/// Additional information to `Obs` and `Act`.
pub trait Info {}

/// Represents an action, observation and reward tuple `(a_t, o_t+1, r_t)`
/// with some additional information.
///
/// An environment emits [`Step`] object at every interaction steps.
/// This object might be used to create transitions `(o_t, a_t, o_t+1, r_t)`.
///
/// Old versions of the library support veectorized environments, which requires
/// elements in [`Step`] to be able to handle multiple values.
/// This is why `reward` and `is_done` are vector.
pub struct Step<E: Env> {
    /// Action.
    pub act: E::Act,

    /// Observation.
    pub obs: E::Obs,

    /// Reward.
    pub reward: Vec<f32>,

    /// Flag denoting if episode is done.
    pub is_done: Vec<i8>,

    /// Information defined by user.
    pub info: E::Info,

    /// Initial observation. If `is_done[i] == 0`, the corresponding element will not be used.
    pub init_obs: E::Obs,
}

impl<E: Env> Step<E> {
    /// Constructs a [`Step`] object.
    pub fn new(
        obs: E::Obs,
        act: E::Act,
        reward: Vec<f32>,
        is_done: Vec<i8>,
        info: E::Info,
        init_obs: E::Obs,
    ) -> Self {
        Step {
            act,
            obs,
            reward,
            is_done,
            info,
            init_obs,
        }
    }
}

/// Process [`Step`] and output an item [`Self::Output`].
///
/// This trait is used in [`Trainer`](crate::Trainer). [`Step`] object is transformed to
/// [`Self::Output`], which will be pushed into a replay buffer implementing
/// [`ExperienceBufferBase`](crate::ExperienceBufferBase).
/// The type [`Self::Output`] should be the same with [`ExperienceBufferBase::PushedItem`].
///
/// [`Self::Output`]: StepProcessorBase::Output
/// [`ExperienceBufferBase::PushedItem`]: crate::ExperienceBufferBase::PushedItem
pub trait StepProcessorBase<E: Env> {
    /// Configuration.
    type Config: Clone;

    /// The type of transitions produced by this trait.
    type Output;

    /// Build a producer.
    fn build(config: &Self::Config) -> Self;

    /// Resets the object.
    fn reset(&mut self, init_obs: E::Obs);

    /// Processes a [`Step`] object.
    fn process(&mut self, step: Step<E>) -> Self::Output;
}