Struct relearn::envs::meta::MetaEnv

source · [−]

pub struct MetaEnv<E> {
    pub env_distribution: E,
}

Expand description

A meta reinforcement learning environment that treats RL itself as an environment.

An episode in this meta environment is called a “Trial” and consists of several episodes from the inner environment. A new inner environment with a different structure seed is sampled for each Trial. A meta episode ends when a fixed number of inner episodes have been completed.

The step metadata from the inner environment are embedded as observations.

Observations

A MetaObservation. Consists of the inner observation, the previous step action and feedback, and whether the inner episode is done.

Actions

The action space is the same as the action space of the inner environments. Actions are forwarded to the inner environment except when the current state is the last state of the inner episode (episode_done == true). In that case, the provided action is ignored and the next state will be the start of a new inner episode.

Feedback

The inner environment feedback must implement MetaFeedback (and MetaFeedbackSpace for the space). This feedback is decomposed into separate inner and outer feedback. In the case of Reward feedback, the same value is used as both the inner and outer feedback.

States

The state (MetaState) consists of an inner environment instance, an inner environment state, an episode index within the trial, and details of the most recent inner step within the episode.

Reference

This meta environment design is roughly consistent with the structure used in the paper “RL^2: Fast Reinforcement Learning via Slow Reinforcement Learning” by Duan et al.

Fields

env_distribution: E

Environment distribution from which each trial’s episode is sampled.

Struct relearn::envs::meta::MetaEnv

Fields

Implementations

impl<E> MetaEnv<E>

pub const fn new(env_distribution: E) -> Self

impl<E> MetaEnv<E> where E: EnvStructure,

pub const fn inner_structure(&self) -> InnerEnvStructure<&Self>

Trait Implementations

impl<EC> BuildEnv for MetaEnv<EC> where EC: BuildEnvDist, EC::Action: Copy, EC::FeedbackSpace: MetaFeedbackSpace<Element = EC::Feedback>, <EC::FeedbackSpace as Space>::Element: MetaFeedback, EC::Feedback: MetaFeedback,

type Observation = <<MetaEnv<EC> as BuildEnv>::Environment as Environment>::Observation

type Action = <<MetaEnv<EC> as BuildEnv>::Environment as Environment>::Action

type Feedback = <<MetaEnv<EC> as BuildEnv>::Environment as Environment>::Feedback

type ObservationSpace = <<MetaEnv<EC> as BuildEnv>::Environment as EnvStructure>::ObservationSpace

type ActionSpace = <<MetaEnv<EC> as BuildEnv>::Environment as EnvStructure>::ActionSpace

type FeedbackSpace = <<MetaEnv<EC> as BuildEnv>::Environment as EnvStructure>::FeedbackSpace

type Environment = MetaEnv<<EC as BuildEnvDist>::EnvDistribution>

fn build_env(&self, _: &mut Prng) -> Result<Self::Environment, BuildEnvError>

impl<E: Clone> Clone for MetaEnv<E>

fn clone(&self) -> MetaEnv<E>

fn clone_from(&mut self, source: &Self)

impl<E: Debug> Debug for MetaEnv<E>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<E: Default> Default for MetaEnv<E>

fn default() -> Self

impl<'de, E> Deserialize<'de> for MetaEnv<E> where E: Deserialize<'de>,

fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where __D: Deserializer<'de>,

impl<E> EnvStructure for MetaEnv<E> where E: EnvStructure, E::FeedbackSpace: MetaFeedbackSpace,

type ObservationSpace = MetaObservationSpace<<E as EnvStructure>::ObservationSpace, <E as EnvStructure>::ActionSpace, <<E as EnvStructure>::FeedbackSpace as MetaFeedbackSpace>::InnerSpace>

type ActionSpace = <E as EnvStructure>::ActionSpace

type FeedbackSpace = <<E as EnvStructure>::FeedbackSpace as MetaFeedbackSpace>::OuterSpace

fn observation_space(&self) -> Self::ObservationSpace

fn action_space(&self) -> Self::ActionSpace

fn feedback_space(&self) -> Self::FeedbackSpace

fn discount_factor(&self) -> f64

impl<E> Environment for MetaEnv<E> where E: EnvDistribution, E::Action: Clone, E::Observation: Clone, E::Feedback: MetaFeedback,

type State = MetaState<<E as EnvDistribution>::Environment>

type Observation = MetaObservation<<E as EnvDistribution>::Observation, <E as EnvDistribution>::Action, <<E as EnvDistribution>::Feedback as MetaFeedback>::Inner>

type Action = <E as EnvDistribution>::Action

type Feedback = <<E as EnvDistribution>::Feedback as MetaFeedback>::Outer

fn initial_state(&self, rng: &mut Prng) -> Self::State

fn observe(&self, state: &Self::State, rng: &mut Prng) -> Self::Observation

fn step( &self, state: Self::State, action: &Self::Action, rng: &mut Prng, logger: &mut dyn StatsLogger) -> (Successor<Self::State>, Self::Feedback)

impl<E: Hash> Hash for MetaEnv<E>

fn hash<__H: Hasher>(&self, state: &mut __H)

fn hash_slice<H>(data: &[Self], state: &mut H) where H: Hasher,

impl<E: PartialEq> PartialEq<MetaEnv<E>> for MetaEnv<E>

fn eq(&self, other: &MetaEnv<E>) -> bool

fn ne(&self, other: &MetaEnv<E>) -> bool

impl<E> Serialize for MetaEnv<E> where E: Serialize,

fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error> where __S: Serializer,

impl<E: Copy> Copy for MetaEnv<E>

impl<E: Eq> Eq for MetaEnv<E>

impl<E> StructuralEq for MetaEnv<E>

impl<E> StructuralPartialEq for MetaEnv<E>

Auto Trait Implementations

impl<E> RefUnwindSafe for MetaEnv<E> where E: RefUnwindSafe,

impl<E> Send for MetaEnv<E> where E: Send,

impl<E> Sync for MetaEnv<E> where E: Sync,

impl<E> Unpin for MetaEnv<E> where E: Unpin,

impl<E> UnwindSafe for MetaEnv<E> where E: UnwindSafe,

Blanket Implementations

impl<T> Any for T where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> AsAny for T where T: Any,

fn as_any(&self) -> &(dyn Any + 'static)

impl<T> Borrow<T> for T where T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<Q, K> Equivalent<K> for Q where Q: Eq + ?Sized, K: Borrow<Q> + ?Sized,

fn equivalent(&self, key: &K) -> bool

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for T where U: From<T>,

fn into(self) -> U

impl<T> Pointable for T

const ALIGN: usize = mem::align_of::<T>()

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

impl<E> MetaEnv<E> where
E: EnvStructure,

impl<EC> BuildEnv for MetaEnv<EC> where
EC: BuildEnvDist,
EC::Action: Copy,
EC::FeedbackSpace: MetaFeedbackSpace<Element = EC::Feedback>,
<EC::FeedbackSpace as Space>::Element: MetaFeedback,
EC::Feedback: MetaFeedback,

impl<'de, E> Deserialize<'de> for MetaEnv<E> where
E: Deserialize<'de>,

fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where
__D: Deserializer<'de>,

impl<E> EnvStructure for MetaEnv<E> where
E: EnvStructure,
E::FeedbackSpace: MetaFeedbackSpace,

impl<E> Environment for MetaEnv<E> where
E: EnvDistribution,
E::Action: Clone,
E::Observation: Clone,
E::Feedback: MetaFeedback,

fn step(
&self,
state: Self::State,
action: &Self::Action,
rng: &mut Prng,
logger: &mut dyn StatsLogger
) -> (Successor<Self::State>, Self::Feedback)

fn hash<H: Hasher>(&self, state: &mut H)

fn hash_slice<H>(data: &[Self], state: &mut H) where
H: Hasher,

impl<E> Serialize for MetaEnv<E> where
E: Serialize,

fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where
S: Serializer,

impl<E> RefUnwindSafe for MetaEnv<E> where
E: RefUnwindSafe,

impl<E> Send for MetaEnv<E> where
E: Send,

impl<E> Sync for MetaEnv<E> where
E: Sync,

impl<E> Unpin for MetaEnv<E> where
E: Unpin,

impl<E> UnwindSafe for MetaEnv<E> where
E: UnwindSafe,

impl<T> Any for T where
T: 'static + ?Sized,

impl<T> AsAny for T where
T: Any,

impl<T> Borrow<T> for T where
T: ?Sized,

impl<T> BorrowMut<T> for T where
T: ?Sized,

impl<Q, K> Equivalent<K> for Q where
Q: Eq + ?Sized,
K: Borrow<Q> + ?Sized,

impl<T, U> Into<U> for T where
U: From<T>,

impl<T> ToOwned for T where
T: Clone,

impl<T, U> TryFrom<U> for T where
U: Into<T>,

impl<T, U> TryInto<U> for T where
U: TryFrom<T>,

impl<V, T> VZip<V> for T where
V: MultiLane<T>,

impl<T> DeserializeOwned for T where
T: for<'de> Deserialize<'de>,

impl<T> Pomdp for T where
T: Environment<Feedback = Reward>,

impl<T> PomdpStructure for T where
T: EnvStructure<FeedbackSpace = IntervalSpace<Reward>>,