Struct MDP

Source

pub struct MDP {
    pub num_states: usize,
    pub num_actions: usize,
    pub transitions: Vec<Vec<Vec<f64>>>,
    pub rewards: Vec<Vec<f64>>,
    pub discount: f64,
}

Expand description

A finite Markov Decision Process (S, A, P, R, γ).

num_states: |S|
num_actions: |A|
transitions[s][a][s']: P(s’ | s, a)
rewards[s][a]: R(s, a)
discount: γ ∈ [0,1)

Fields§

§num_states: usize

Number of states.

§num_actions: usize

Number of actions.

§transitions: Vec<Vec<Vec<f64>>>

Transition probabilities: transitions[s][a] is a probability vector over next states.

§rewards: Vec<Vec<f64>>

Expected reward: rewards[s][a].

§discount: f64

Discount factor γ ∈ [0,1).

Implementations§

Source §

impl MDP

Source

pub fn new( num_states: usize, num_actions: usize, transitions: Vec<Vec<Vec<f64>>>, rewards: Vec<Vec<f64>>, discount: f64, ) -> Self

Construct a new MDP.

Source

pub fn bellman_operator(&self, v: &[f64]) -> Vec<f64>

Apply the Bellman operator: (TV)(s) = max_a [R(s,a) + γ Σ P(s’|s,a) V(s’)].

Source

pub fn value_iteration(&self, tol: f64, max_iter: usize) -> Vec<f64>

Value iteration: iterate the Bellman operator until convergence.

Source

pub fn policy_improvement(&self, v: &[f64]) -> Vec<usize>

Extract the greedy policy from a value function.

Source

pub fn policy_evaluation( &self, policy: &[usize], tol: f64, max_iter: usize, ) -> Vec<f64>

Policy evaluation: compute V^π for a deterministic policy using iterative updates.

Trait Implementations§

Source §

impl Clone for MDP

Source §

fn clone(&self) -> MDP

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl Debug for MDP

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl UnwindSafe for MDP

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Struct MDP Copy item path

Fields§

Implementations§

impl MDP

pub fn new( num_states: usize, num_actions: usize, transitions: Vec<Vec<Vec<f64>>>, rewards: Vec<Vec<f64>>, discount: f64, ) -> Self

pub fn bellman_operator(&self, v: &[f64]) -> Vec<f64>

pub fn value_iteration(&self, tol: f64, max_iter: usize) -> Vec<f64>

pub fn policy_improvement(&self, v: &[f64]) -> Vec<usize>

pub fn policy_evaluation( &self, policy: &[usize], tol: f64, max_iter: usize, ) -> Vec<f64>

Trait Implementations§

impl Clone for MDP

fn clone(&self) -> MDP

fn clone_from(&mut self, source: &Self)

impl Debug for MDP

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl Freeze for MDP

impl RefUnwindSafe for MDP

impl Send for MDP

impl Sync for MDP

impl Unpin for MDP

impl UnsafeUnpin for MDP

impl UnwindSafe for MDP

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct MDP

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,