pub struct MetaEnv<E> {
pub env_distribution: E,
}
Expand description
A meta reinforcement learning environment that treats RL itself as an environment.
An episode in this meta environment is called a “Trial” and consists of several episodes from the inner environment. A new inner environment with a different structure seed is sampled for each Trial. A meta episode ends when a fixed number of inner episodes have been completed.
The step metadata from the inner environment are embedded as observations.
Observations
A MetaObservation
. Consists of the inner observation, the previous step action and
feedback, and whether the inner episode is done.
Actions
The action space is the same as the action space of the inner environments.
Actions are forwarded to the inner environment except when the current state is the last state
of the inner episode (episode_done == true
).
In that case, the provided action is ignored and the next state will be the start of a new
inner episode.
Feedback
The inner environment feedback must implement MetaFeedback
(and MetaFeedbackSpace
for
the space). This feedback is decomposed into separate inner and outer feedback.
In the case of Reward
feedback, the same value is used as both the inner and outer feedback.
States
The state (MetaState
) consists of an inner environment instance,
an inner environment state, an episode index within the trial, and details of the most recent
inner step within the episode.
Reference
This meta environment design is roughly consistent with the structure used in the paper “RL^2: Fast Reinforcement Learning via Slow Reinforcement Learning” by Duan et al.
Fields
env_distribution: E
Environment distribution from which each trial’s episode is sampled.
Implementations
sourceimpl<E> MetaEnv<E> where
E: EnvStructure,
impl<E> MetaEnv<E> where
E: EnvStructure,
sourcepub const fn inner_structure(&self) -> InnerEnvStructure<&Self>
pub const fn inner_structure(&self) -> InnerEnvStructure<&Self>
View the structure of the inner environment.
Trait Implementations
sourceimpl<EC> BuildEnv for MetaEnv<EC> where
EC: BuildEnvDist,
EC::Action: Copy,
EC::FeedbackSpace: MetaFeedbackSpace<Element = EC::Feedback>,
<EC::FeedbackSpace as Space>::Element: MetaFeedback,
EC::Feedback: MetaFeedback,
impl<EC> BuildEnv for MetaEnv<EC> where
EC: BuildEnvDist,
EC::Action: Copy,
EC::FeedbackSpace: MetaFeedbackSpace<Element = EC::Feedback>,
<EC::FeedbackSpace as Space>::Element: MetaFeedback,
EC::Feedback: MetaFeedback,
type Observation = <<MetaEnv<EC> as BuildEnv>::Environment as Environment>::Observation
type Observation = <<MetaEnv<EC> as BuildEnv>::Environment as Environment>::Observation
Environment observation type.
type Action = <<MetaEnv<EC> as BuildEnv>::Environment as Environment>::Action
type Action = <<MetaEnv<EC> as BuildEnv>::Environment as Environment>::Action
Environment action type.
type Feedback = <<MetaEnv<EC> as BuildEnv>::Environment as Environment>::Feedback
type Feedback = <<MetaEnv<EC> as BuildEnv>::Environment as Environment>::Feedback
Environment feedback type.
type ObservationSpace = <<MetaEnv<EC> as BuildEnv>::Environment as EnvStructure>::ObservationSpace
type ObservationSpace = <<MetaEnv<EC> as BuildEnv>::Environment as EnvStructure>::ObservationSpace
Environment observation space type.
type ActionSpace = <<MetaEnv<EC> as BuildEnv>::Environment as EnvStructure>::ActionSpace
type ActionSpace = <<MetaEnv<EC> as BuildEnv>::Environment as EnvStructure>::ActionSpace
Environment action space type.
type FeedbackSpace = <<MetaEnv<EC> as BuildEnv>::Environment as EnvStructure>::FeedbackSpace
type FeedbackSpace = <<MetaEnv<EC> as BuildEnv>::Environment as EnvStructure>::FeedbackSpace
Environment feedback space type.
type Environment = MetaEnv<<EC as BuildEnvDist>::EnvDistribution>
type Environment = MetaEnv<<EC as BuildEnvDist>::EnvDistribution>
Type of environment to build
sourcefn build_env(&self, _: &mut Prng) -> Result<Self::Environment, BuildEnvError>
fn build_env(&self, _: &mut Prng) -> Result<Self::Environment, BuildEnvError>
Build an environment instance. Read more
sourceimpl<'de, E> Deserialize<'de> for MetaEnv<E> where
E: Deserialize<'de>,
impl<'de, E> Deserialize<'de> for MetaEnv<E> where
E: Deserialize<'de>,
sourcefn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
sourceimpl<E> EnvStructure for MetaEnv<E> where
E: EnvStructure,
E::FeedbackSpace: MetaFeedbackSpace,
impl<E> EnvStructure for MetaEnv<E> where
E: EnvStructure,
E::FeedbackSpace: MetaFeedbackSpace,
type ObservationSpace = MetaObservationSpace<<E as EnvStructure>::ObservationSpace, <E as EnvStructure>::ActionSpace, <<E as EnvStructure>::FeedbackSpace as MetaFeedbackSpace>::InnerSpace>
type ActionSpace = <E as EnvStructure>::ActionSpace
type FeedbackSpace = <<E as EnvStructure>::FeedbackSpace as MetaFeedbackSpace>::OuterSpace
sourcefn observation_space(&self) -> Self::ObservationSpace
fn observation_space(&self) -> Self::ObservationSpace
Space containing all possible observations. Read more
sourcefn action_space(&self) -> Self::ActionSpace
fn action_space(&self) -> Self::ActionSpace
The space of all possible actions. Read more
sourcefn feedback_space(&self) -> Self::FeedbackSpace
fn feedback_space(&self) -> Self::FeedbackSpace
The space of all possible feedback. Read more
sourcefn discount_factor(&self) -> f64
fn discount_factor(&self) -> f64
A discount factor applied to future feedback. Read more
sourceimpl<E> Environment for MetaEnv<E> where
E: EnvDistribution,
E::Action: Clone,
E::Observation: Clone,
E::Feedback: MetaFeedback,
impl<E> Environment for MetaEnv<E> where
E: EnvDistribution,
E::Action: Clone,
E::Observation: Clone,
E::Feedback: MetaFeedback,
type State = MetaState<<E as EnvDistribution>::Environment>
type State = MetaState<<E as EnvDistribution>::Environment>
Environment state type. Not necessarily observable by the agent.
type Observation = MetaObservation<<E as EnvDistribution>::Observation, <E as EnvDistribution>::Action, <<E as EnvDistribution>::Feedback as MetaFeedback>::Inner>
type Observation = MetaObservation<<E as EnvDistribution>::Observation, <E as EnvDistribution>::Action, <<E as EnvDistribution>::Feedback as MetaFeedback>::Inner>
Observation of the state provided to the agent.
type Action = <E as EnvDistribution>::Action
type Action = <E as EnvDistribution>::Action
Action selected by the agent.
type Feedback = <<E as EnvDistribution>::Feedback as MetaFeedback>::Outer
type Feedback = <<E as EnvDistribution>::Feedback as MetaFeedback>::Outer
sourcefn initial_state(&self, rng: &mut Prng) -> Self::State
fn initial_state(&self, rng: &mut Prng) -> Self::State
Sample a state for the start of a new episode. Read more
sourcefn observe(&self, state: &Self::State, rng: &mut Prng) -> Self::Observation
fn observe(&self, state: &Self::State, rng: &mut Prng) -> Self::Observation
Generate an observation for a given state.
sourcefn step(
&self,
state: Self::State,
action: &Self::Action,
rng: &mut Prng,
logger: &mut dyn StatsLogger
) -> (Successor<Self::State>, Self::Feedback)
fn step(
&self,
state: Self::State,
action: &Self::Action,
rng: &mut Prng,
logger: &mut dyn StatsLogger
) -> (Successor<Self::State>, Self::Feedback)
Perform a state transition in reponse to an action. Read more
sourcefn run<T, L>(self, actor: T, seed: SimSeed, logger: L) -> Steps<Self, T, Prng, L>ⓘNotable traits for Steps<E, T, R, L>impl<E, T, R, L> Iterator for Steps<E, T, R, L> where
E: Environment,
T: Actor<E::Observation, E::Action>,
R: BorrowMut<Prng>,
L: StatsLogger, type Item = PartialStep<E::Observation, E::Action, E::Feedback>;
where
T: Actor<Self::Observation, Self::Action>,
L: StatsLogger,
Self: Sized,
fn run<T, L>(self, actor: T, seed: SimSeed, logger: L) -> Steps<Self, T, Prng, L>ⓘNotable traits for Steps<E, T, R, L>impl<E, T, R, L> Iterator for Steps<E, T, R, L> where
E: Environment,
T: Actor<E::Observation, E::Action>,
R: BorrowMut<Prng>,
L: StatsLogger, type Item = PartialStep<E::Observation, E::Action, E::Feedback>;
where
T: Actor<Self::Observation, Self::Action>,
L: StatsLogger,
Self: Sized,
E: Environment,
T: Actor<E::Observation, E::Action>,
R: BorrowMut<Prng>,
L: StatsLogger, type Item = PartialStep<E::Observation, E::Action, E::Feedback>;
Run this environment with the given actor.
impl<E: Copy> Copy for MetaEnv<E>
impl<E: Eq> Eq for MetaEnv<E>
impl<E> StructuralEq for MetaEnv<E>
impl<E> StructuralPartialEq for MetaEnv<E>
Auto Trait Implementations
impl<E> RefUnwindSafe for MetaEnv<E> where
E: RefUnwindSafe,
impl<E> Send for MetaEnv<E> where
E: Send,
impl<E> Sync for MetaEnv<E> where
E: Sync,
impl<E> Unpin for MetaEnv<E> where
E: Unpin,
impl<E> UnwindSafe for MetaEnv<E> where
E: UnwindSafe,
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
sourceimpl<Q, K> Equivalent<K> for Q where
Q: Eq + ?Sized,
K: Borrow<Q> + ?Sized,
impl<Q, K> Equivalent<K> for Q where
Q: Eq + ?Sized,
K: Borrow<Q> + ?Sized,
sourcefn equivalent(&self, key: &K) -> bool
fn equivalent(&self, key: &K) -> bool
Compare self to key
and return true
if they are equal.