Struct MdpSolver

Source

pub struct MdpSolver {
    pub num_states: usize,
    pub num_actions: usize,
    pub discount: f64,
    pub rewards: Vec<Vec<f64>>,
    pub transitions: Vec<Vec<Vec<f64>>>,
}

Expand description

Discrete-state, discrete-action MDP solver.

Implements value iteration and policy iteration for discounted MDPs.

Fields§

§num_states: usize

Number of states.

§num_actions: usize

Number of actions.

§discount: f64

Discount factor γ ∈ [0, 1).

§rewards: Vec<Vec<f64>>

Reward matrix R[s][a].

§transitions: Vec<Vec<Vec<f64>>>

Transition probabilities P[s][a][s’].

Implementations§

Source §

impl MdpSolver

Source

pub fn new( num_states: usize, num_actions: usize, discount: f64, rewards: Vec<Vec<f64>>, transitions: Vec<Vec<Vec<f64>>>, ) -> Self

Create a new MDP solver.

Source

pub fn bellman_update(&self, v: &[f64]) -> Vec<f64>

Apply the Bellman operator once: T V(s) = max_a [R(s,a) + γ Σ P(s’|s,a) V(s’)]

Source

pub fn value_iteration(&self, tol: f64, max_iter: usize) -> (Vec<f64>, usize)

Run value iteration until convergence.

Returns (V*, iterations) where V*[s] is the optimal value of state s.

Source

pub fn extract_policy(&self, v: &[f64]) -> Vec<usize>

Extract greedy policy from value function V.

Returns policy[s] = argmax_a [R(s,a) + γ Σ P(s’|s,a) V(s’)].

Source

pub fn policy_evaluation( &self, policy: &[usize], tol: f64, max_iter: usize, ) -> Vec<f64>

Evaluate a fixed policy: V^π(s) = R(s,π(s)) + γ Σ P(s’|s,π(s)) V^π(s’)

Solved by iteration (similar to value iteration for fixed policy).

Source

pub fn policy_iteration( &self, tol: f64, max_iter: usize, ) -> (Vec<usize>, Vec<f64>, usize)

Policy iteration: alternate between evaluation and improvement.

Returns (optimal_policy, optimal_value, iterations).

Auto Trait Implementations§

§

impl UnwindSafe for MdpSolver

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

MdpSolver

Struct MdpSolver

Fields§

Implementations§

impl MdpSolver

pub fn new( num_states: usize, num_actions: usize, discount: f64, rewards: Vec<Vec<f64>>, transitions: Vec<Vec<Vec<f64>>>, ) -> Self

pub fn bellman_update(&self, v: &[f64]) -> Vec<f64>

pub fn value_iteration(&self, tol: f64, max_iter: usize) -> (Vec<f64>, usize)

pub fn extract_policy(&self, v: &[f64]) -> Vec<usize>

pub fn policy_evaluation( &self, policy: &[usize], tol: f64, max_iter: usize, ) -> Vec<f64>

pub fn policy_iteration( &self, tol: f64, max_iter: usize, ) -> (Vec<usize>, Vec<f64>, usize)

Auto Trait Implementations§

impl Freeze for MdpSolver

impl RefUnwindSafe for MdpSolver

impl Send for MdpSolver

impl Sync for MdpSolver

impl Unpin for MdpSolver

impl UnsafeUnpin for MdpSolver

impl UnwindSafe for MdpSolver

Blanket Implementations§

impl<T> Any for T
where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for T
where T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for T
where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for T
where U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for T
where U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

MdpSolver

Struct MdpSolver Copy item path

Fields§

Implementations§

impl MdpSolver

pub fn new( num_states: usize, num_actions: usize, discount: f64, rewards: Vec<Vec<f64>>, transitions: Vec<Vec<Vec<f64>>>, ) -> Self

pub fn bellman_update(&self, v: &[f64]) -> Vec<f64>

pub fn value_iteration(&self, tol: f64, max_iter: usize) -> (Vec<f64>, usize)

pub fn extract_policy(&self, v: &[f64]) -> Vec<usize>

pub fn policy_evaluation( &self, policy: &[usize], tol: f64, max_iter: usize, ) -> Vec<f64>

pub fn policy_iteration( &self, tol: f64, max_iter: usize, ) -> (Vec<usize>, Vec<f64>, usize)

Auto Trait Implementations§

impl Freeze for MdpSolver

impl RefUnwindSafe for MdpSolver

impl Send for MdpSolver

impl Sync for MdpSolver

impl Unpin for MdpSolver

impl UnsafeUnpin for MdpSolver

impl UnwindSafe for MdpSolver

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct MdpSolver

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,