Trait relearn::envs::Pomdp[−][src]

pub trait Pomdp {
    type State;
    type Observation;
    type Action;
    fn initial_state(&self, rng: &mut StdRng) -> Self::State;
    fn observe(
        &self, 
        state: &Self::State, 
        rng: &mut StdRng
    ) -> Self::Observation;
    fn step(
        &self, 
        state: Self::State, 
        action: &Self::Action, 
        rng: &mut StdRng
    ) -> (Option<Self::State>, f64, bool);
}

Expand description

A partially observable Markov decision process (POMDP).

The concept of an episode is an abstraction on the MDP formalism. An episode ending means that all possible future trajectories have 0 reward on each step.

Associated Types

[src]

type State

[src]

type Observation

[src]

type Action

Required methods

[src]

fn initial_state(&self, rng: &mut StdRng) -> Self::State

Sample a new initial state.

[src]

fn observe(&self, state: &Self::State, rng: &mut StdRng) -> Self::Observation

Sample an observation for a state.

[src]

fn step(
    &self,
    state: Self::State,
    action: &Self::Action,
    rng: &mut StdRng
) -> (Option<Self::State>, f64, bool )

Sample a state transition.

Returns

state: The resulting state. Is None if the resulting state is terminal. All trajectories from terminal states yield 0 reward on each step.
reward: The reward value for this transition.
episode_done: Whether this step ends the episode.
- If observation is None then episode_done must be true.
- An episode may be done for other reasons, like a step limit.

Implementors

[src]

impl Pomdp for MemoryGame

type State = (usize, usize )

type Observation = usize

type Action = usize

[src]

Trait relearn::envs::Pomdp[−][src]

Associated Types

type State

type Observation

type Action

Required methods

fn initial_state(&self, rng: &mut StdRng) -> Self::State

fn observe(&self, state: &Self::State, rng: &mut StdRng) -> Self::Observation

fn step(
    &self,
    state: Self::State,
    action: &Self::Action,
    rng: &mut StdRng
) -> (Option<Self::State>, f64, bool )

Returns

Implementors

impl Pomdp for MemoryGame

type State = (usize, usize )

type Observation = usize

type Action = usize

impl<E> Pomdp for MetaPomdp<E> where
E: PomdpDistribution,
<E::Pomdp as Pomdp>::Action: Copy,

type State = MetaState<E::Pomdp>

type Observation = (Option<<E::Pomdp as Pomdp>::Observation>, Option<(<E::Pomdp as Pomdp>::Action, f64 )>, bool )

type Action = <E::Pomdp as Pomdp>::Action

impl<E: Mdp> Pomdp for E where
E::State: Copy,

type State = E::State

type Observation = E::State

type Action = E::Action

impl<E: Pomdp> Pomdp for Wrapped<E, StepLimit>

type State = (E::State, u64 )

type Observation = E::Observation

type Action = E::Action

Trait relearn::envs::Pomdp[−][src]

Associated Types

type State

type Observation

type Action

Required methods

fn initial_state(&self, rng: &mut StdRng) -> Self::State

fn observe(&self, state: &Self::State, rng: &mut StdRng) -> Self::Observation

fn step( &self, state: Self::State, action: &Self::Action, rng: &mut StdRng) -> (Option<Self::State>, f64, bool)

Returns

Implementors

impl Pomdp for MemoryGame

type State = (usize, usize)

type Observation = usize

type Action = usize

impl<E> Pomdp for MetaPomdp<E> where E: PomdpDistribution, <E::Pomdp as Pomdp>::Action: Copy,

type State = MetaState<E::Pomdp>

type Observation = (Option<<E::Pomdp as Pomdp>::Observation>, Option<(<E::Pomdp as Pomdp>::Action, f64)>, bool)

type Action = <E::Pomdp as Pomdp>::Action

impl<E: Mdp> Pomdp for E where E::State: Copy,

type State = E::State

type Observation = E::State

type Action = E::Action

impl<E: Pomdp> Pomdp for Wrapped<E, StepLimit>

type State = (E::State, u64)

type Observation = E::Observation

type Action = E::Action

fn step(
&self,
state: Self::State,
action: &Self::Action,
rng: &mut StdRng
) -> (Option<Self::State>, f64, bool )

type State = (usize, usize )

impl<E> Pomdp for MetaPomdp<E> where
E: PomdpDistribution,
<E::Pomdp as Pomdp>::Action: Copy,

type Observation = (Option<<E::Pomdp as Pomdp>::Observation>, Option<(<E::Pomdp as Pomdp>::Action, f64 )>, bool )

impl<E: Mdp> Pomdp for E where
E::State: Copy,

type State = (E::State, u64 )