pub struct DqnAgent<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>>{ /* private fields */ }Expand description
A DQN agent.
Implements ε-greedy action selection, experience replay, and TD learning with a target network. Generic over:
E: the environment type (must satisfyrl_traits::Environment)Enc: the observation encoder (convertsE::Observationto tensors)Act: the action mapper (convertsE::Actionto/from integer indices)B: the Burn backend (e.g.NdArray,Wgpu)Buf: the replay buffer (defaults toCircularBuffer– swap for PER etc.)
Implementations§
Source§impl<E, Enc, Act, B> DqnAgent<E, Enc, Act, B>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
impl<E, Enc, Act, B> DqnAgent<E, Enc, Act, B>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Source§impl<E, Enc, Act, B, Buf> DqnAgent<E, Enc, Act, B, Buf>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Buf: ReplayBuffer<E::Observation, E::Action>,
impl<E, Enc, Act, B, Buf> DqnAgent<E, Enc, Act, B, Buf>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Buf: ReplayBuffer<E::Observation, E::Action>,
Sourcepub fn new_with_buffer(
encoder: Enc,
action_mapper: Act,
config: DqnConfig,
device: B::Device,
seed: u64,
buffer: Buf,
) -> Self
pub fn new_with_buffer( encoder: Enc, action_mapper: Act, config: DqnConfig, device: B::Device, seed: u64, buffer: Buf, ) -> Self
Create a new agent with a custom replay buffer.
Sourcepub fn set_total_steps(&mut self, steps: usize)
pub fn set_total_steps(&mut self, steps: usize)
Override the internal step counter (use when resuming training).
Sourcepub fn into_policy(self) -> DqnPolicy<E, Enc, Act, B::InnerBackend>
pub fn into_policy(self) -> DqnPolicy<E, Enc, Act, B::InnerBackend>
Convert this trained agent into an inference-only DqnPolicy.
Trait Implementations§
Source§impl<E, Enc, Act, B, Buf> Checkpointable for DqnAgent<E, Enc, Act, B, Buf>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Buf: ReplayBuffer<E::Observation, E::Action>,
impl<E, Enc, Act, B, Buf> Checkpointable for DqnAgent<E, Enc, Act, B, Buf>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Buf: ReplayBuffer<E::Observation, E::Action>,
Source§impl<E, Enc, Act, B, Buf> LearningAgent<E> for DqnAgent<E, Enc, Act, B, Buf>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Buf: ReplayBuffer<E::Observation, E::Action>,
impl<E, Enc, Act, B, Buf> LearningAgent<E> for DqnAgent<E, Enc, Act, B, Buf>where
E: Environment,
E::Observation: Clone + Send + Sync + 'static,
E::Action: Clone + Send + Sync + 'static,
Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Buf: ReplayBuffer<E::Observation, E::Action>,
Source§fn act(&mut self, obs: &E::Observation, mode: ActMode) -> E::Action
fn act(&mut self, obs: &E::Observation, mode: ActMode) -> E::Action
Select an action for
obs according to mode.Source§fn observe(&mut self, experience: Experience<E::Observation, E::Action>)
fn observe(&mut self, experience: Experience<E::Observation, E::Action>)
Record a transition and update the agent’s internal state.
Source§fn total_steps(&self) -> usize
fn total_steps(&self) -> usize
Total number of
observe calls since construction.Source§fn episode_extras(&self) -> HashMap<String, f64>
fn episode_extras(&self) -> HashMap<String, f64>
Per-episode aggregates of step-level values, reported at episode end. Read more
Source§fn on_episode_start(&mut self)
fn on_episode_start(&mut self)
Called by
crate::training::TrainingSession at the start of each
episode so the agent can reset its per-episode aggregators.Source§impl<E, Enc, Act, B, Buf> Policy<<E as Environment>::Observation, <E as Environment>::Action> for DqnAgent<E, Enc, Act, B, Buf>where
E: Environment,
Enc: ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Buf: ReplayBuffer<E::Observation, E::Action>,
impl<E, Enc, Act, B, Buf> Policy<<E as Environment>::Observation, <E as Environment>::Action> for DqnAgent<E, Enc, Act, B, Buf>where
E: Environment,
Enc: ObservationEncoder<E::Observation, B::InnerBackend>,
Act: DiscreteActionMapper<E::Action>,
B: AutodiffBackend,
Buf: ReplayBuffer<E::Observation, E::Action>,
Source§fn act(&self, obs: &E::Observation) -> E::Action
fn act(&self, obs: &E::Observation) -> E::Action
Select an action given the current observation.
Auto Trait Implementations§
impl<E, Enc, Act, B, Buf> Freeze for DqnAgent<E, Enc, Act, B, Buf>
impl<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>> !RefUnwindSafe for DqnAgent<E, Enc, Act, B, Buf>
impl<E, Enc, Act, B, Buf> Send for DqnAgent<E, Enc, Act, B, Buf>
impl<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>> !Sync for DqnAgent<E, Enc, Act, B, Buf>
impl<E, Enc, Act, B, Buf> Unpin for DqnAgent<E, Enc, Act, B, Buf>where
Buf: Unpin,
Enc: Unpin,
Act: Unpin,
<B as Backend>::Device: Unpin,
E: Unpin,
<B as Backend>::FloatTensorPrimitive: Unpin,
<B as Backend>::QuantizedTensorPrimitive: Unpin,
<<B as AutodiffBackend>::InnerBackend as Backend>::FloatTensorPrimitive: Unpin,
<<B as AutodiffBackend>::InnerBackend as Backend>::QuantizedTensorPrimitive: Unpin,
impl<E, Enc, Act, B, Buf> UnsafeUnpin for DqnAgent<E, Enc, Act, B, Buf>
impl<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>> !UnwindSafe for DqnAgent<E, Enc, Act, B, Buf>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more