pub struct StepResult<O, I> {
pub observation: O,
pub reward: f64,
pub status: EpisodeStatus,
pub info: I,
}Expand description
The output of a single environment step.
Returned by crate::Environment::step. Contains everything an agent needs
to learn: the next observation, the reward signal, whether the episode
is done, and any auxiliary info.
Fields§
§observation: OThe observation after taking the action.
reward: f64The scalar reward signal.
status: EpisodeStatusWhether the episode continues, terminated, or was truncated.
info: IAuxiliary information (e.g. diagnostics, hidden state, sub-rewards).
Typed — no HashMap<String, Any> here.
Implementations§
Source§impl<O, I> StepResult<O, I>
impl<O, I> StepResult<O, I>
Sourcepub fn new(observation: O, reward: f64, status: EpisodeStatus, info: I) -> Self
pub fn new(observation: O, reward: f64, status: EpisodeStatus, info: I) -> Self
Examples found in repository?
examples/pursuit.rs (line 98)
74 fn step(&mut self, actions: HashMap<u8, u8>)
75 -> HashMap<u8, StepResult<PursuitObs, ()>>
76 {
77 for (&id, &action) in &actions {
78 self.pos[id as usize] = Self::clamp_move(self.pos[id as usize], action);
79 }
80
81 // Prey moves randomly, bouncing at the walls.
82 self.prey = Self::clamp_move(self.prey, self.rng.gen_range(0..2));
83 self.step += 1;
84
85 let caught = self.active.iter().any(|&id| self.pos[id as usize] == self.prey);
86
87 let status = if caught {
88 EpisodeStatus::Terminated
89 } else if self.step >= MAX_STEPS {
90 EpisodeStatus::Truncated
91 } else {
92 EpisodeStatus::Continuing
93 };
94
95 // Build results before mutating the active list.
96 let results = self.active.iter().map(|&id| {
97 let reward = if caught && self.pos[id as usize] == self.prey { 1.0 } else { 0.0 };
98 (id, StepResult::new(self.obs(id), reward, status.clone(), ()))
99 }).collect();
100
101 if status.is_done() {
102 self.active.clear();
103 }
104
105 results
106 }More examples
examples/cartpole.rs (line 101)
67 fn step(&mut self, action: usize) -> StepResult<CartPoleObs, ()> {
68 let [x, x_dot, theta, theta_dot] = self.state;
69
70 let force = if action == 1 { FORCE_MAG } else { -FORCE_MAG };
71 let cos_theta = theta.cos();
72 let sin_theta = theta.sin();
73
74 // Equations of motion (Euler integration, same as Gymnasium)
75 let temp = (force + POLE_MASS_LEN * theta_dot * theta_dot * sin_theta) / TOTAL_MASS;
76 let theta_acc = (GRAVITY * sin_theta - cos_theta * temp)
77 / (HALF_POLE_LEN * (4.0 / 3.0 - MASS_POLE * cos_theta * cos_theta / TOTAL_MASS));
78 let x_acc = temp - POLE_MASS_LEN * theta_acc * cos_theta / TOTAL_MASS;
79
80 let new_x = x + TAU * x_dot;
81 let new_x_dot = x_dot + TAU * x_acc;
82 let new_theta = theta + TAU * theta_dot;
83 let new_theta_dot = theta_dot + TAU * theta_acc;
84
85 self.state = [new_x, new_x_dot, new_theta, new_theta_dot];
86
87 let status = if Self::is_terminal(&self.state) {
88 EpisodeStatus::Terminated
89 } else {
90 EpisodeStatus::Continuing
91 };
92
93 // +1 every step the pole stays up (reward is 0 on the terminal step
94 // in Gymnasium; we match that here)
95 let reward = if status == EpisodeStatus::Continuing {
96 1.0
97 } else {
98 0.0
99 };
100
101 StepResult::new(self.state, reward, status, ())
102 }Sourcepub fn is_done(&self) -> bool
pub fn is_done(&self) -> bool
Convenience: is the episode over for any reason?
Examples found in repository?
examples/cartpole.rs (line 130)
120fn run_episode(env: &mut TimeLimit<CartPole>, rng: &mut SmallRng) -> (f64, EpisodeStatus, usize) {
121 env.reset(None);
122 let mut total_reward = 0.0;
123 let mut steps = 0;
124
125 loop {
126 let action = env.sample_action(rng);
127 let result = env.step(action);
128 total_reward += result.reward;
129 steps += 1;
130 if result.is_done() {
131 return (total_reward, result.status, steps);
132 }
133 }
134}Sourcepub fn map_obs<O2>(self, f: impl FnOnce(O) -> O2) -> StepResult<O2, I>
pub fn map_obs<O2>(self, f: impl FnOnce(O) -> O2) -> StepResult<O2, I>
Map the observation to a different type (useful for wrapper implementations).
Trait Implementations§
Source§impl<O: Clone, I: Clone> Clone for StepResult<O, I>
impl<O: Clone, I: Clone> Clone for StepResult<O, I>
Source§fn clone(&self) -> StepResult<O, I>
fn clone(&self) -> StepResult<O, I>
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreAuto Trait Implementations§
impl<O, I> Freeze for StepResult<O, I>
impl<O, I> RefUnwindSafe for StepResult<O, I>where
O: RefUnwindSafe,
I: RefUnwindSafe,
impl<O, I> Send for StepResult<O, I>
impl<O, I> Sync for StepResult<O, I>
impl<O, I> Unpin for StepResult<O, I>
impl<O, I> UnsafeUnpin for StepResult<O, I>where
O: UnsafeUnpin,
I: UnsafeUnpin,
impl<O, I> UnwindSafe for StepResult<O, I>where
O: UnwindSafe,
I: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more