Struct PolicyGradient

Source

pub struct PolicyGradient {
    pub theta: Vec<Vec<f64>>,
    pub alpha: f64,
    pub gamma: f64,
}

Expand description

Policy gradient agent (softmax parameterisation).

Policy: π_θ(a|s) = exp(θ[s][a]) / Σ exp(θ[s][a’]) Update: θ[s][a] += α · ∇_θ log π_θ(a|s) · G where G is the return.

Fields§

§theta: Vec<Vec<f64>>

Policy parameter table θ[s][a].

§alpha: f64

Learning rate α.

§gamma: f64

Discount factor γ.

Implementations§

Source §

impl PolicyGradient

Source

pub fn new( num_states: usize, num_actions: usize, alpha: f64, gamma: f64, ) -> Self

Construct a policy gradient agent.

Source

pub fn softmax(&self, s: usize) -> Vec<f64>

Compute the softmax policy π_θ(·|s).

Source

pub fn update(&mut self, s: usize, a: usize, g: f64)

Update θ using a single (s, a, G) sample from a trajectory.

Source

pub fn expected_return(&self, s: usize, q: &ActionValueFunction) -> f64

Expected return from state s as E_π[Q(s,a)].

Source

pub fn convergence_rate(&self, q: &ActionValueFunction) -> f64

Convergence rate estimate: max |∇J| across states (gradient norm).

Trait Implementations§

Source §

impl Clone for PolicyGradient

Source §

fn clone(&self) -> PolicyGradient

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for PolicyGradient

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl UnwindSafe for PolicyGradient

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct PolicyGradient Copy item path

Fields§

Implementations§

impl PolicyGradient

pub fn new( num_states: usize, num_actions: usize, alpha: f64, gamma: f64, ) -> Self

pub fn softmax(&self, s: usize) -> Vec<f64>

pub fn update(&mut self, s: usize, a: usize, g: f64)

pub fn expected_return(&self, s: usize, q: &ActionValueFunction) -> f64

pub fn convergence_rate(&self, q: &ActionValueFunction) -> f64

Trait Implementations§

impl Clone for PolicyGradient

fn clone(&self) -> PolicyGradient

fn clone_from(&mut self, source: &Self)

impl Debug for PolicyGradient

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl Freeze for PolicyGradient

impl RefUnwindSafe for PolicyGradient

impl Send for PolicyGradient

impl Sync for PolicyGradient

impl Unpin for PolicyGradient

impl UnsafeUnpin for PolicyGradient

impl UnwindSafe for PolicyGradient

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct PolicyGradient

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,