1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
//! Batch.

/// A batch of samples.
///
/// It is used to train agents.
pub trait Batch {
    /// A set of observation in a batch.
    type ObsBatch;

    /// A set of observation in a batch.
    type ActBatch;

    /// Unpack the data `(o_t, a_t, o_t+1, r_t, is_done_t)`.
    ///
    /// Optionally, the return value has sample indices in the replay buffer and
    /// thier weights. Those are used for prioritized experience replay (PER).
    fn unpack(
        self,
    ) -> (
        Self::ObsBatch,
        Self::ActBatch,
        Self::ObsBatch,
        Vec<f32>,
        Vec<i8>,
        Option<Vec<usize>>,
        Option<Vec<f32>>,
    );

    /// Returns the number of samples in the batch.
    fn len(&self) -> usize;

    /// Returns `o_t`.
    fn obs(&self) -> &Self::ObsBatch;

    /// Returns `a_t`.
    fn act(&self) -> &Self::ActBatch;

    /// Returns `o_t+1`.
    fn next_obs(&self) -> &Self::ObsBatch;

    /// Returns `r_t`.
    fn reward(&self) -> &Vec<f32>;

    /// Returns `is_done_t`.
    fn is_done(&self) -> &Vec<i8>;

    /// Returns `weight`. It is used for PER.
    fn weight(&self) -> &Option<Vec<f32>>;

    /// Returns `ix_sample`. It is used for PER.
    fn ix_sample(&self) -> &Option<Vec<usize>>;
}