1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
//! Environment step.
use super::Env;
/// Additional information to `Obs` and `Act`.
pub trait Info {}
/// Represents an action, observation and reward tuple `(a_t, o_t+1, r_t)`
/// with some additional information.
///
/// An environment emits [`Step`] object at every interaction steps.
/// This object might be used to create transitions `(o_t, a_t, o_t+1, r_t)`.
///
/// Old versions of the library support veectorized environments, which requires
/// elements in [`Step`] to be able to handle multiple values.
/// This is why `reward` and `is_done` are vector.
pub struct Step<E: Env> {
/// Action.
pub act: E::Act,
/// Observation.
pub obs: E::Obs,
/// Reward.
pub reward: Vec<f32>,
/// Flag denoting if episode is done.
pub is_done: Vec<i8>,
/// Information defined by user.
pub info: E::Info,
/// Initial observation. If `is_done[i] == 0`, the corresponding element will not be used.
pub init_obs: E::Obs,
}
impl<E: Env> Step<E> {
/// Constructs a [`Step`] object.
pub fn new(
obs: E::Obs,
act: E::Act,
reward: Vec<f32>,
is_done: Vec<i8>,
info: E::Info,
init_obs: E::Obs,
) -> Self {
Step {
act,
obs,
reward,
is_done,
info,
init_obs,
}
}
}
/// Process [`Step`] and output an item [`Self::Output`].
///
/// This trait is used in [`Trainer`](crate::Trainer). [`Step`] object is transformed to
/// [`Self::Output`], which will be pushed into a replay buffer implementing
/// [`ExperienceBufferBase`](crate::ExperienceBufferBase).
/// The type [`Self::Output`] should be the same with [`ExperienceBufferBase::PushedItem`].
///
/// [`Self::Output`]: StepProcessorBase::Output
/// [`ExperienceBufferBase::PushedItem`]: crate::ExperienceBufferBase::PushedItem
pub trait StepProcessorBase<E: Env> {
/// Configuration.
type Config: Clone;
/// The type of transitions produced by this trait.
type Output;
/// Build a producer.
fn build(config: &Self::Config) -> Self;
/// Resets the object.
fn reset(&mut self, init_obs: E::Obs);
/// Processes a [`Step`] object.
fn process(&mut self, step: Step<E>) -> Self::Output;
}