Trait EnvStructure

pub trait EnvStructure {
    type ObservationSpace: Space;
    type ActionSpace: Space;
    type FeedbackSpace: Space;

    // Required methods
    fn observation_space(&self) -> Self::ObservationSpace;
    fn action_space(&self) -> Self::ActionSpace;
    fn feedback_space(&self) -> Self::FeedbackSpace;
    fn discount_factor(&self) -> f64;
}

Expand description

The external structure of a reinforcement learning environment.

Required Associated Types§

type ObservationSpace: Space

type ActionSpace: Space

type FeedbackSpace: Space

Required Methods§

fn observation_space(&self) -> Self::ObservationSpace

Space containing all possible observations.

This is not required to be tight: the space may contain elements that can never be produced as a state observation.

fn action_space(&self) -> Self::ActionSpace

The space of all possible actions.

Every element in this space must be a valid action in all environment states (although immediately ending the episode with negative reward is a possible outcome). The environment may misbehave or panic for actions outside of this action space.

fn feedback_space(&self) -> Self::FeedbackSpace

The space of all possible feedback.

This is not required to be tight: the space may contain elements that can never be produced as a feedback signal.

fn discount_factor(&self) -> f64

A discount factor applied to future feedback.

A value between 0 and 1, inclusive.

Implementations on Foreign Types§

impl<T: EnvStructure + ?Sized> EnvStructure for &T

type ObservationSpace = <T as EnvStructure>::ObservationSpace

type ActionSpace = <T as EnvStructure>::ActionSpace

type FeedbackSpace = <T as EnvStructure>::FeedbackSpace

fn observation_space(&self) -> Self::ObservationSpace

fn action_space(&self) -> Self::ActionSpace

fn feedback_space(&self) -> Self::FeedbackSpace

fn discount_factor(&self) -> f64

impl<T: EnvStructure + ?Sized> EnvStructure for Box<T>

type ObservationSpace = <T as EnvStructure>::ObservationSpace

type ActionSpace = <T as EnvStructure>::ActionSpace

type FeedbackSpace = <T as EnvStructure>::FeedbackSpace

fn observation_space(&self) -> Self::ObservationSpace

fn action_space(&self) -> Self::ActionSpace

fn feedback_space(&self) -> Self::FeedbackSpace

fn discount_factor(&self) -> f64

Implementors§

impl EnvStructure for CartPole

type ObservationSpace = CartPolePhysicalStateSpace

type ActionSpace = IndexedTypeSpace<Push>

type FeedbackSpace = IntervalSpace<Reward>

impl EnvStructure for Chain

type ObservationSpace = IndexSpace

type ActionSpace = IndexedTypeSpace<Move>

type FeedbackSpace = IntervalSpace<Reward>

impl EnvStructure for DirichletRandomMdps

type ObservationSpace = IndexSpace

type ActionSpace = IndexSpace

type FeedbackSpace = IntervalSpace<Reward>

impl EnvStructure for MemoryGame

type ObservationSpace = IndexSpace

type ActionSpace = IndexSpace

type FeedbackSpace = IntervalSpace<Reward>

impl EnvStructure for OneHotBandits

type ObservationSpace = SingletonSpace

type ActionSpace = IndexSpace

type FeedbackSpace = IntervalSpace<Reward>

impl EnvStructure for PartitionGame

type ObservationSpace = TupleSpace2<PowerSpace<BooleanSpace, NUM_FEATURES>, OptionSpace<TupleSpace2<PowerSpace<BooleanSpace, NUM_FEATURES>, IndexedTypeSpace<Classification>>>>

type ActionSpace = IndexedTypeSpace<Action>

type FeedbackSpace = IntervalSpace<Reward>

impl EnvStructure for UniformBernoulliBandits

type ObservationSpace = SingletonSpace

type ActionSpace = IndexSpace

type FeedbackSpace = IntervalSpace<Reward>

impl<D: Bounded<f64>> EnvStructure for Bandit<D>

type ObservationSpace = SingletonSpace

type ActionSpace = IndexSpace

type FeedbackSpace = IntervalSpace<Reward>

impl<E> EnvStructure for MetaEnv<E>
where E: EnvStructure, E::FeedbackSpace: MetaFeedbackSpace,

type ObservationSpace = MetaObservationSpace<<E as EnvStructure>::ObservationSpace, <E as EnvStructure>::ActionSpace, <<E as EnvStructure>::FeedbackSpace as MetaFeedbackSpace>::InnerSpace>

type ActionSpace = <E as EnvStructure>::ActionSpace

type FeedbackSpace = <<E as EnvStructure>::FeedbackSpace as MetaFeedbackSpace>::OuterSpace

impl<E, OS1, OS2, AS1, AS2, FS1, FS2> EnvStructure for FirstPlayerView<E>
where E: EnvStructure<ObservationSpace = TupleSpace2<OS1, OS2>, ActionSpace = TupleSpace2<AS1, AS2>, FeedbackSpace = TupleSpace2<FS1, FS2>>, OS1: Space, AS1: Space, FS1: Space,

type ObservationSpace = OS1

type ActionSpace = AS1

type FeedbackSpace = FS1

impl<E, OS1, OS2, AS1, AS2, FS1, FS2> EnvStructure for SecondPlayerView<E>
where E: EnvStructure<ObservationSpace = TupleSpace2<OS1, OS2>, ActionSpace = TupleSpace2<AS1, AS2>, FeedbackSpace = TupleSpace2<FS1, FS2>>, OS2: Space, AS2: Space, FS2: Space,

type ObservationSpace = OS2

type ActionSpace = AS2

type FeedbackSpace = FS2

impl<E, W> EnvStructure for Wrapped<E, W>
where E: EnvStructure, W: StructurePreservingWrapper,

type ObservationSpace = <E as EnvStructure>::ObservationSpace

type ActionSpace = <E as EnvStructure>::ActionSpace

type FeedbackSpace = <E as EnvStructure>::FeedbackSpace

impl<OS, AS, FS> EnvStructure for StoredEnvStructure<OS, AS, FS>
where OS: Space + Clone, AS: Space + Clone, FS: Space + Clone,

type ObservationSpace = OS

type ActionSpace = AS

type FeedbackSpace = FS

impl<T, OS, AS, FS> EnvStructure for InnerEnvStructure<T>
where T: EnvStructure<ObservationSpace = MetaObservationSpace<OS, AS, FS>, ActionSpace = AS, FeedbackSpace = FS>, OS: Space, AS: Space, FS: Space,

type ObservationSpace = OS

type ActionSpace = AS

type FeedbackSpace = FS

impl<T: EnvStructure> EnvStructure for Wrapped<T, VisibleStepLimit>

type ObservationSpace = StepLimitObsSpace<<T as EnvStructure>::ObservationSpace>

type ActionSpace = <T as EnvStructure>::ActionSpace

type FeedbackSpace = <T as EnvStructure>::FeedbackSpace

impl<const W: usize, const H: usize, const VW: usize, const VH: usize> EnvStructure for FruitGame<W, H, VW, VH>

type ObservationSpace = TupleSpace2<PrincipalObsSpace<VW, VH>, AssistantObsSpace<VW, VH>>

type ActionSpace = TupleSpace2<IndexedTypeSpace<Move>, IndexedTypeSpace<Move>>

type FeedbackSpace = TupleSpace2<IntervalSpace<Reward>, IntervalSpace<Reward>>