Struct DqnAgent

Source

pub struct DqnAgent<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>>where
    E: Environment,
    B: AutodiffBackend,
    Buf: ReplayBuffer<E::Observation, E::Action>,
{ /* private fields */ }

Expand description

A DQN agent.

Implements ε-greedy action selection, experience replay, and TD learning with a target network. Generic over:

E: the environment type (must satisfy rl_traits::Environment)
Enc: the observation encoder (converts E::Observation to tensors)
Act: the action mapper (converts E::Action to/from integer indices)
B: the Burn backend (e.g. NdArray, Wgpu)
Buf: the replay buffer (defaults to CircularBuffer – swap for PER etc.)

Implementations§

Source §

impl<E, Enc, Act, B> DqnAgent<E, Enc, Act, B>
where E: Environment, E::Observation: Clone + Send + Sync + 'static, E::Action: Clone + Send + Sync + 'static, Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>, Act: DiscreteActionMapper<E::Action>, B: AutodiffBackend,

Source

pub fn new( encoder: Enc, action_mapper: Act, config: DqnConfig, device: B::Device, seed: u64, ) -> Self

Create a new agent using the default CircularBuffer replay buffer.

Source §

impl<E, Enc, Act, B, Buf> DqnAgent<E, Enc, Act, B, Buf>
where E: Environment, E::Observation: Clone + Send + Sync + 'static, E::Action: Clone + Send + Sync + 'static, Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>, Act: DiscreteActionMapper<E::Action>, B: AutodiffBackend, Buf: ReplayBuffer<E::Observation, E::Action>,

Source

pub fn new_with_buffer( encoder: Enc, action_mapper: Act, config: DqnConfig, device: B::Device, seed: u64, buffer: Buf, ) -> Self

Create a new agent with a custom replay buffer.

Source

pub fn epsilon(&self) -> f64

The current exploration probability.

Source

pub fn set_total_steps(&mut self, steps: usize)

Override the internal step counter (use when resuming training).

Source

pub fn into_policy(self) -> DqnPolicy<E, Enc, Act, B::InnerBackend>

Convert this trained agent into an inference-only DqnPolicy.

Trait Implementations§

Source §

impl<E, Enc, Act, B, Buf> Checkpointable for DqnAgent<E, Enc, Act, B, Buf>
where E: Environment, E::Observation: Clone + Send + Sync + 'static, E::Action: Clone + Send + Sync + 'static, Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>, Act: DiscreteActionMapper<E::Action>, B: AutodiffBackend, Buf: ReplayBuffer<E::Observation, E::Action>,

Source §

fn save(&self, path: &Path) -> Result<()>

Save weights to path (without extension – implementations add their own).

Source §

fn load(self, path: &Path) -> Result<Self>

Load weights from path, consuming and returning self.

Source §

impl<E, Enc, Act, B, Buf> LearningAgent<E> for DqnAgent<E, Enc, Act, B, Buf>
where E: Environment, E::Observation: Clone + Send + Sync + 'static, E::Action: Clone + Send + Sync + 'static, Enc: ObservationEncoder<E::Observation, B> + ObservationEncoder<E::Observation, B::InnerBackend>, Act: DiscreteActionMapper<E::Action>, B: AutodiffBackend, Buf: ReplayBuffer<E::Observation, E::Action>,

Source §

fn act(&mut self, obs: &E::Observation, mode: ActMode) -> E::Action

Select an action for obs according to mode.

Source §

fn observe(&mut self, experience: Experience<E::Observation, E::Action>)

Record a transition and update the agent’s internal state.

Source §

fn total_steps(&self) -> usize

Total number of observe calls since construction.

Source §

fn episode_extras(&self) -> HashMap<String, f64>

Per-episode aggregates of step-level values, reported at episode end. Read more

Source §

fn on_episode_start(&mut self)

Called by crate::training::TrainingSession at the start of each episode so the agent can reset its per-episode aggregators.

Source §

impl<E, Enc, Act, B, Buf> Policy<<E as Environment>::Observation, <E as Environment>::Action> for DqnAgent<E, Enc, Act, B, Buf>
where E: Environment, Enc: ObservationEncoder<E::Observation, B::InnerBackend>, Act: DiscreteActionMapper<E::Action>, B: AutodiffBackend, Buf: ReplayBuffer<E::Observation, E::Action>,

Source §

fn act(&self, obs: &E::Observation) -> E::Action

Select an action given the current observation.

Auto Trait Implementations§

§

impl<E, Enc, Act, B, Buf> Freeze for DqnAgent<E, Enc, Act, B, Buf>
where Buf: Freeze, Enc: Freeze, Act: Freeze, ::Device: Freeze,

§

impl<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>> !RefUnwindSafe for DqnAgent<E, Enc, Act, B, Buf>

§

impl<E, Enc, Act, B, Buf> Send for DqnAgent<E, Enc, Act, B, Buf>
where Buf: Send, Enc: Send, Act: Send, E: Send,

§

impl<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>> !Sync for DqnAgent<E, Enc, Act, B, Buf>

§

impl<E, Enc, Act, B, Buf> Unpin for DqnAgent<E, Enc, Act, B, Buf>
where Buf: Unpin, Enc: Unpin, Act: Unpin, ::Device: Unpin, E: Unpin, ::FloatTensorPrimitive: Unpin, ::QuantizedTensorPrimitive: Unpin, <::InnerBackend as Backend>::FloatTensorPrimitive: Unpin, <::InnerBackend as Backend>::QuantizedTensorPrimitive: Unpin,

§

impl<E, Enc, Act, B, Buf> UnsafeUnpin for DqnAgent<E, Enc, Act, B, Buf>
where Buf: UnsafeUnpin, Enc: UnsafeUnpin, Act: UnsafeUnpin, ::Device: UnsafeUnpin,

§

impl<E, Enc, Act, B, Buf = CircularBuffer<<E as Environment>::Observation, <E as Environment>::Action>> !UnwindSafe for DqnAgent<E, Enc, Act, B, Buf>

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

Source §