1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
//! Environment step.
use super::Env;

/// Additional information to `Obs` and `Act`.
pub trait Info {}

/// All information given at every step of agent-envieronment interaction.
///
/// Old versions of the library support veectorized environments, which requires
/// elements in [Step] to be able to handle multiple values.
/// This is why `reward` and `is_done` are vector.
pub struct Step<E: Env> {
    /// Action.
    pub act: E::Act,

    /// Observation.
    pub obs: E::Obs,

    /// Reward.
    pub reward: Vec<f32>,

    /// Flag denoting if episode is done.
    pub is_done: Vec<i8>,

    /// Information defined by user.
    pub info: E::Info,

    /// Initial observation. If `is_done[i] == 0`, the corresponding element will not be used.
    pub init_obs: E::Obs,
}

impl<E: Env> Step<E> {
    /// Constructs a [Step] object.
    pub fn new(
        obs: E::Obs,
        act: E::Act,
        reward: Vec<f32>,
        is_done: Vec<i8>,
        info: E::Info,
        init_obs: E::Obs,
    ) -> Self {
        Step {
            act,
            obs,
            reward,
            is_done,
            info,
            init_obs,
        }
    }
}

/// Process [Step] and output an item.
pub trait StepProcessorBase<E: Env> {
    /// Configuration.
    type Config: Clone;

    /// The type of transitions produced by this trait.
    type Output;

    /// Build a producer.
    fn build(config: &Self::Config) -> Self;

    /// Resets the object.
    fn reset(&mut self, init_obs: E::Obs);

    /// Processes a [Step].
    fn process(&mut self, step: Step<E>) -> Self::Output;
}