pub struct PpoAgent<E, Enc, Act, B>where
E: Environment,
B: AutodiffBackend,{ /* private fields */ }Expand description
A PPO agent with discrete actions.
Implements clipped-surrogate PPO with GAE advantage estimation and an
actor-critic network. Generic over environment, encoder, action mapper,
and Burn backend – the same pattern as DqnAgent.
On-policy: experience is collected into a rollout buffer, used for
n_epochs gradient passes, then discarded. The buffer size is
n_steps * n_envs; an update fires automatically when it fills.
§Parallel environments
Set PpoConfig::n_envs to match the number of environments feeding
this agent (e.g. bevy-gym’s NUM_ENVS). All envs contribute to the
same rollout buffer; the update fires after n_steps ticks.
Implementations§
Source§impl<E, Enc, Act, B> PpoAgent<E, Enc, Act, B>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
impl<E, Enc, Act, B> PpoAgent<E, Enc, Act, B>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Trait Implementations§
Source§impl<E, Enc, Act, B> Checkpointable for PpoAgent<E, Enc, Act, B>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
impl<E, Enc, Act, B> Checkpointable for PpoAgent<E, Enc, Act, B>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Source§impl<E, Enc, Act, B> LearningAgent<E> for PpoAgent<E, Enc, Act, B>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
impl<E, Enc, Act, B> LearningAgent<E> for PpoAgent<E, Enc, Act, B>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Source§fn act(&mut self, obs: &E::Observation, mode: ActMode) -> E::Action
fn act(&mut self, obs: &E::Observation, mode: ActMode) -> E::Action
Select an action for
obs according to mode.Source§fn observe(&mut self, experience: Experience<E::Observation, E::Action>)
fn observe(&mut self, experience: Experience<E::Observation, E::Action>)
Record a transition and update the agent’s internal state.
Source§fn total_steps(&self) -> usize
fn total_steps(&self) -> usize
Total number of
observe calls since construction.Source§fn episode_extras(&self) -> HashMap<String, f64>
fn episode_extras(&self) -> HashMap<String, f64>
Per-episode aggregates of step-level values, reported at episode end. Read more
Source§fn on_episode_start(&mut self)
fn on_episode_start(&mut self)
Called by
crate::training::TrainingSession at the start of each
episode so the agent can reset its per-episode aggregators.Auto Trait Implementations§
impl<E, Enc, Act, B> !Freeze for PpoAgent<E, Enc, Act, B>
impl<E, Enc, Act, B> !RefUnwindSafe for PpoAgent<E, Enc, Act, B>
impl<E, Enc, Act, B> Send for PpoAgent<E, Enc, Act, B>
impl<E, Enc, Act, B> !Sync for PpoAgent<E, Enc, Act, B>
impl<E, Enc, Act, B> Unpin for PpoAgent<E, Enc, Act, B>where
Enc: Unpin,
Act: Unpin,
<B as Backend>::Device: Unpin,
E: Unpin,
<E as Environment>::Observation: Unpin,
<B as Backend>::FloatTensorPrimitive: Unpin,
<B as Backend>::QuantizedTensorPrimitive: Unpin,
<<B as AutodiffBackend>::InnerBackend as Backend>::FloatTensorPrimitive: Unpin,
<<B as AutodiffBackend>::InnerBackend as Backend>::QuantizedTensorPrimitive: Unpin,
impl<E, Enc, Act, B> UnsafeUnpin for PpoAgent<E, Enc, Act, B>where
Enc: UnsafeUnpin,
Act: UnsafeUnpin,
<B as Backend>::Device: UnsafeUnpin,
<B as Backend>::FloatTensorPrimitive: UnsafeUnpin,
<B as Backend>::QuantizedTensorPrimitive: UnsafeUnpin,
impl<E, Enc, Act, B> !UnwindSafe for PpoAgent<E, Enc, Act, B>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more