Struct QLearningSolver

Source

pub struct QLearningSolver {
    pub q: Vec<Vec<f64>>,
    pub alpha: f64,
    pub gamma: f64,
    pub epsilon: f64,
    pub visit_count: Vec<Vec<u64>>,
}

Expand description

Q-learning solver with epsilon-greedy exploration and decaying step size.

Fields§

§q: Vec<Vec<f64>>

Q-value table Q[s][a].

§alpha: f64

Learning rate α.

§gamma: f64

Discount factor γ.

§epsilon: f64

Exploration rate ε.

§visit_count: Vec<Vec<u64>>

Step counter per (s,a) pair (for step-size decay).

Implementations§

Source §

impl QLearningSolver

Source

pub fn new( num_states: usize, num_actions: usize, alpha: f64, gamma: f64, epsilon: f64, ) -> Self

Construct a Q-learning solver.

Source

pub fn update(&mut self, s: usize, a: usize, r: f64, s_next: usize)

Perform a Q-learning update with harmonic step size 1/(1 + n(s,a)).

Source

pub fn select_action(&self, s: usize, rng_val: f64) -> usize

Select an action using epsilon-greedy policy (deterministic tie-breaking). rng_val ∈ [0,1) is a uniform random value supplied by the caller.

Source

pub fn has_converged(&self, prev_q: &[Vec<f64>], tol: f64) -> bool

Check convergence: max |Q(s,a) - Q_prev(s,a)| < tol.

Source

pub fn greedy_policy(&self) -> Vec<usize>

Return the current greedy policy.

Trait Implementations§

Source §

impl Clone for QLearningSolver

Source §

fn clone(&self) -> QLearningSolver

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for QLearningSolver

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl UnwindSafe for QLearningSolver

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct QLearningSolver Copy item path

Fields§

Implementations§

impl QLearningSolver

pub fn new( num_states: usize, num_actions: usize, alpha: f64, gamma: f64, epsilon: f64, ) -> Self

pub fn update(&mut self, s: usize, a: usize, r: f64, s_next: usize)

pub fn select_action(&self, s: usize, rng_val: f64) -> usize

pub fn has_converged(&self, prev_q: &[Vec<f64>], tol: f64) -> bool

pub fn greedy_policy(&self) -> Vec<usize>

Trait Implementations§

impl Clone for QLearningSolver

fn clone(&self) -> QLearningSolver

fn clone_from(&mut self, source: &Self)

impl Debug for QLearningSolver

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl Freeze for QLearningSolver

impl RefUnwindSafe for QLearningSolver

impl Send for QLearningSolver

impl Sync for QLearningSolver

impl Unpin for QLearningSolver

impl UnsafeUnpin for QLearningSolver

impl UnwindSafe for QLearningSolver

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct QLearningSolver

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,