Struct DqnConfig

Source

pub struct DqnConfig {
    pub gamma: f64,
    pub learning_rate: f64,
    pub batch_size: usize,
    pub buffer_capacity: usize,
    pub min_replay_size: usize,
    pub target_update_freq: usize,
    pub hidden_sizes: Vec<usize>,
    pub epsilon_start: f64,
    pub epsilon_end: f64,
    pub epsilon_decay_steps: usize,
}

Expand description

Configuration for a DQN agent.

All hyperparameters live here. Pass this to DqnAgent::new(). The defaults reflect standard DQN practice suitable for moderately complex environments. Simple environments like CartPole will want smaller buffer/warmup values and faster epsilon decay.

Fields§

§gamma: f64

Discount factor γ. Controls how much future rewards are valued. Typical values: 0.95–0.999. Default: 0.99

§learning_rate: f64

Learning rate for the Adam optimiser. Default: 1e-4

§batch_size: usize

Number of experiences sampled per gradient update. Default: 32

§buffer_capacity: usize

Maximum number of experiences in the replay buffer. Oldest are overwritten when full. Default: 100_000

§min_replay_size: usize

Minimum number of experiences collected before training begins. During warm-up, actions are sampled randomly. Must be >= batch_size. Default: 10_000

§target_update_freq: usize

Number of steps between hard target network updates.

The target network is a frozen copy of the online network used to compute stable TD targets. Updating it too frequently causes instability; too rarely slows learning. Default: 1_000

§hidden_sizes: Vec<usize>

Hidden layer sizes for the Q-network.

The network architecture is: obs_size -> hidden[0] -> hidden[1] -> ... -> num_actions All hidden layers use ReLU activations. Default: [128, 128]

§epsilon_start: f64

Starting epsilon for ε-greedy exploration. At step 0, actions are random with this probability. Default: 1.0

§epsilon_end: f64

Final epsilon after decay is complete. Default: 0.05

§epsilon_decay_steps: usize

Number of steps over which epsilon decays linearly from epsilon_start to epsilon_end. Default: 50_000

Struct DqnConfig Copy item path

Fields§

Implementations§

impl DqnConfig

pub fn epsilon_at(&self, step: usize) -> f64

Trait Implementations§

impl Clone for DqnConfig

fn clone(&self) -> DqnConfig

fn clone_from(&mut self, source: &Self)

impl Debug for DqnConfig

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for DqnConfig

fn default() -> Self

Auto Trait Implementations§

impl Freeze for DqnConfig

impl RefUnwindSafe for DqnConfig

impl Send for DqnConfig

impl Sync for DqnConfig

impl Unpin for DqnConfig

impl UnsafeUnpin for DqnConfig

impl UnwindSafe for DqnConfig

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoComptime for T

fn comptime(self) -> Self

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct DqnConfig

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,