pub struct TimeLimit<E: Environment> { /* private fields */ }Expand description
Wraps an environment and truncates episodes after max_steps steps.
This is one of the most universally needed wrappers. Without it, environments without natural termination conditions (e.g. locomotion tasks) run forever.
Episodes truncated by this wrapper emit EpisodeStatus::Truncated,
not EpisodeStatus::Terminated, so algorithms correctly bootstrap
the value of the final state.
Implementations§
Source§impl<E: Environment> TimeLimit<E>
impl<E: Environment> TimeLimit<E>
Sourcepub fn new(env: E, max_steps: usize) -> Self
pub fn new(env: E, max_steps: usize) -> Self
Examples found in repository?
examples/cartpole.rs (line 140)
136fn main() {
137 const NUM_EPISODES: usize = 10;
138 const ENV_SEED: u64 = 42;
139
140 let mut env = TimeLimit::new(CartPole::new(ENV_SEED), MAX_STEPS);
141 let mut rng = SmallRng::seed_from_u64(0);
142
143 println!("CartPole-v1 — random agent, {NUM_EPISODES} episodes\n");
144 println!("{:<8} {:>8} {:>7} {:>12}", "Episode", "Return", "Steps", "Outcome");
145 println!("{}", "-".repeat(40));
146
147 let mut total_return = 0.0;
148
149 for ep in 1..=NUM_EPISODES {
150 let (ret, status, steps) = run_episode(&mut env, &mut rng);
151 total_return += ret;
152
153 let outcome = match status {
154 EpisodeStatus::Terminated => "Terminated",
155 EpisodeStatus::Truncated => "Truncated ",
156 EpisodeStatus::Continuing => unreachable!(),
157 };
158
159 println!("{ep:<8} {ret:>8.1} {steps:>7} {outcome:>12}");
160 }
161
162 println!("{}", "-".repeat(40));
163 println!(
164 "Mean return over {NUM_EPISODES} episodes: {:.1}",
165 total_return / NUM_EPISODES as f64
166 );
167}pub fn elapsed_steps(&self) -> usize
pub fn remaining_steps(&self) -> usize
Trait Implementations§
Source§impl<E: Environment> Environment for TimeLimit<E>
impl<E: Environment> Environment for TimeLimit<E>
Source§type Observation = <E as Environment>::Observation
type Observation = <E as Environment>::Observation
Source§type Action = <E as Environment>::Action
type Action = <E as Environment>::Action
The action type consumed by
step().Source§type Info = <E as Environment>::Info
type Info = <E as Environment>::Info
Auxiliary information returned alongside observations. Read more
Source§fn step(
&mut self,
action: Self::Action,
) -> StepResult<Self::Observation, Self::Info>
fn step( &mut self, action: Self::Action, ) -> StepResult<Self::Observation, Self::Info>
Advance the environment by one timestep. Read more
Auto Trait Implementations§
impl<E> Freeze for TimeLimit<E>where
E: Freeze,
impl<E> RefUnwindSafe for TimeLimit<E>where
E: RefUnwindSafe,
impl<E> Send for TimeLimit<E>where
E: Send,
impl<E> Sync for TimeLimit<E>where
E: Sync,
impl<E> Unpin for TimeLimit<E>where
E: Unpin,
impl<E> UnsafeUnpin for TimeLimit<E>where
E: UnsafeUnpin,
impl<E> UnwindSafe for TimeLimit<E>where
E: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more