Trait relearn::envs::Mdp[−][src]

pub trait Mdp {
    type State;
    type Action;
    fn initial_state(&self, rng: &mut StdRng) -> Self::State;
    fn step(
        &self, 
        state: Self::State, 
        action: &Self::Action, 
        rng: &mut StdRng
    ) -> (Option<Self::State>, f64, bool);
}

Expand description

A Markov decision process (MDP).

The concept of an episode is an abstraction on the MDP formalism. An episode ending means that all possible future trajectories have 0 reward on each step.

Associated Types

[src]

type State

[src]

type Action

Required methods

[src]

fn initial_state(&self, rng: &mut StdRng) -> Self::State

Sample a new initial state.

[src]

fn step(
    &self,
    state: Self::State,
    action: &Self::Action,
    rng: &mut StdRng
) -> (Option<Self::State>, f64, bool )

Sample a state transition.

Returns

state: The resulting state. Is None if the resulting state is terminal. All trajectories from terminal states yield 0 reward on each step.
reward: The reward value for this transition.
episode_done: Whether this step ends the current episode.
- If observation is None then episode_done must be true.
- An episode may be done for other reasons, like a step limit.

Trait relearn::envs::Mdp[−][src]

Associated Types

type State

type Action

Required methods

fn initial_state(&self, rng: &mut StdRng) -> Self::State

fn step(
    &self,
    state: Self::State,
    action: &Self::Action,
    rng: &mut StdRng
) -> (Option<Self::State>, f64, bool )

Returns

Implementors

impl Mdp for Chain

type State = usize

type Action = Move

impl<D: Distribution<f64> + Bounded<f64>> Mdp for Bandit<D>

type State = ()

type Action = usize

Trait relearn::envs::Mdp[−][src]

Associated Types

type State

type Action

Required methods

fn initial_state(&self, rng: &mut StdRng) -> Self::State

fn step( &self, state: Self::State, action: &Self::Action, rng: &mut StdRng) -> (Option<Self::State>, f64, bool)

Returns

Implementors

impl Mdp for Chain

type State = usize

type Action = Move

impl<D: Distribution<f64> + Bounded<f64>> Mdp for Bandit<D>

type State = ()

type Action = usize

fn step(
&self,
state: Self::State,
action: &Self::Action,
rng: &mut StdRng
) -> (Option<Self::State>, f64, bool )