border_core/base/
batch.rs

1//! Batch.
2
3/// A batch of transitions for training agents.
4///
5/// This trait represents a standard transition `(o, a, o', r, is_done)`,
6/// where `o` is an observation, `a` is an action, `o'` is an observation
7/// after some time steps. Typically, `o'` is for the next step and used as
8/// single-step backup. `o'` can also be for the multiple steps after `o` and
9/// in this case it is sometimes called n-step backup.
10///
11/// The type of `o` and `o'` is the associated type `ObsBatch`.
12/// The type of `a` is the associated type `ActBatch`.
13pub trait TransitionBatch {
14    /// A set of observation in a batch.
15    type ObsBatch;
16
17    /// A set of observation in a batch.
18    type ActBatch;
19
20    /// Unpack the data `(o_t, a_t, o_t+n, r_t, is_terminated_t, is_truncated_t)`.
21    ///
22    /// Optionally, the return value has sample indices in the replay buffer and
23    /// thier weights. Those are used for prioritized experience replay (PER).
24    fn unpack(
25        self,
26    ) -> (
27        Self::ObsBatch,
28        Self::ActBatch,
29        Self::ObsBatch,
30        Vec<f32>,
31        Vec<i8>,
32        Vec<i8>,
33        Option<Vec<usize>>,
34        Option<Vec<f32>>,
35    );
36
37    /// Returns the number of samples in the batch.
38    fn len(&self) -> usize;
39
40    /// Returns `o_t`.
41    fn obs(&self) -> &Self::ObsBatch;
42}