use super::functions::*;
#[derive(Debug, Clone)]
pub struct NashEquilibrium {
pub player1_strategy: Vec<usize>,
pub player2_strategy: Vec<usize>,
pub value: Vec<f64>,
}
impl NashEquilibrium {
pub fn new(
player1_strategy: Vec<usize>,
player2_strategy: Vec<usize>,
value: Vec<f64>,
) -> Self {
Self {
player1_strategy,
player2_strategy,
value,
}
}
pub fn verify_nash_property(&self) -> bool {
!self.player1_strategy.is_empty()
&& !self.player2_strategy.is_empty()
&& self.player1_strategy.len() == self.value.len()
&& self.player2_strategy.len() == self.value.len()
}
}
#[derive(Debug, Clone)]
pub struct RiskSensitiveCost {
pub alpha: f64,
pub theta: f64,
}
impl RiskSensitiveCost {
pub fn new(alpha: f64, theta: f64) -> Self {
Self { alpha, theta }
}
pub fn var(&self, samples: &[f64]) -> f64 {
if samples.is_empty() {
return 0.0;
}
let mut sorted = samples.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let idx = ((1.0 - self.alpha) * sorted.len() as f64).floor() as usize;
sorted[idx.min(sorted.len() - 1)]
}
pub fn cvar(&self, samples: &[f64]) -> f64 {
if samples.is_empty() {
return 0.0;
}
let var_val = self.var(samples);
let tail: Vec<f64> = samples.iter().cloned().filter(|&x| x >= var_val).collect();
if tail.is_empty() {
return var_val;
}
tail.iter().sum::<f64>() / tail.len() as f64
}
pub fn entropic_risk(&self, samples: &[f64]) -> f64 {
if samples.is_empty() {
return 0.0;
}
let mean_exp: f64 =
samples.iter().map(|&x| (self.theta * x).exp()).sum::<f64>() / samples.len() as f64;
mean_exp.ln() / self.theta
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub struct RiskSensitiveControl {
pub risk_parameter: f64,
pub time_horizon: f64,
pub control_space: String,
pub state_space: String,
}
#[allow(dead_code)]
impl RiskSensitiveControl {
pub fn risk_averse(theta: f64, horizon: f64) -> Self {
assert!(theta > 0.0, "risk-averse requires θ > 0");
RiskSensitiveControl {
risk_parameter: theta,
time_horizon: horizon,
control_space: "U".to_string(),
state_space: "R^n".to_string(),
}
}
pub fn exponential_criterion(&self) -> String {
format!(
"Risk-sensitive criterion: J(u) = (1/{:.3}) log E[exp({:.3} ∫ r dt)]",
self.risk_parameter, self.risk_parameter
)
}
pub fn risk_sensitive_hjb(&self) -> String {
format!(
"RS-HJB: 0 = ∂V/∂t + min_u[f·∇V + (1/2)tr(σσ^T ∇²V) + r + ({:.3}/2)|σ^T∇V|²]",
self.risk_parameter
)
}
pub fn certainty_equivalent(&self, expected_cost: f64, variance: f64) -> f64 {
expected_cost + self.risk_parameter / 2.0 * variance
}
pub fn is_robust_control_connection(&self) -> bool {
self.risk_parameter > 0.0
}
}
#[derive(Debug, Clone)]
pub struct MeanFieldGameSolver {
pub num_states: usize,
pub num_actions: usize,
pub transitions: Vec<Vec<Vec<f64>>>,
pub rewards: Vec<Vec<f64>>,
pub discount: f64,
pub tol: f64,
pub max_iter: usize,
}
impl MeanFieldGameSolver {
#[allow(clippy::too_many_arguments)]
pub fn new(
num_states: usize,
num_actions: usize,
transitions: Vec<Vec<Vec<f64>>>,
rewards: Vec<Vec<f64>>,
discount: f64,
tol: f64,
max_iter: usize,
) -> Self {
Self {
num_states,
num_actions,
transitions,
rewards,
discount,
tol,
max_iter,
}
}
fn solve_individual(&self) -> Vec<usize> {
let mdp = MDP::new(
self.num_states,
self.num_actions,
self.transitions.clone(),
self.rewards.clone(),
self.discount,
);
let v = mdp.value_iteration(self.tol * 0.01, self.max_iter);
mdp.policy_improvement(&v)
}
fn stationary_distribution(&self, policy: &[usize]) -> Vec<f64> {
let mut mu = vec![1.0_f64 / self.num_states as f64; self.num_states];
for _ in 0..self.max_iter {
let mut new_mu = vec![0.0_f64; self.num_states];
for s in 0..self.num_states {
let a = policy[s];
for sp in 0..self.num_states {
new_mu[sp] += mu[s] * self.transitions[s][a][sp];
}
}
let delta: f64 = mu
.iter()
.zip(new_mu.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0_f64, f64::max);
mu = new_mu;
if delta < self.tol {
break;
}
}
mu
}
pub fn solve(&self) -> (Vec<usize>, Vec<f64>) {
let policy = self.solve_individual();
let mu = self.stationary_distribution(&policy);
(policy, mu)
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub enum NumericalSDEScheme {
EulerMaruyama,
Milstein,
RungeKuttaSDE,
StochasticTaylor,
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub struct SDGame {
pub player1_strategy: String,
pub player2_strategy: String,
pub value_function: Option<f64>,
pub is_zero_sum: bool,
pub horizon: f64,
}
#[allow(dead_code)]
impl SDGame {
pub fn zero_sum(horizon: f64) -> Self {
SDGame {
player1_strategy: "min".to_string(),
player2_strategy: "max".to_string(),
value_function: None,
is_zero_sum: true,
horizon,
}
}
pub fn cooperative(horizon: f64) -> Self {
SDGame {
player1_strategy: "cooperative".to_string(),
player2_strategy: "cooperative".to_string(),
value_function: None,
is_zero_sum: false,
horizon,
}
}
pub fn isaacs_equation(&self) -> String {
if self.is_zero_sum {
format!(
"Isaacs equation for zero-sum game on [0,{}]: -∂V/∂t = min_u max_v H(x,u,v,∇V)",
self.horizon
)
} else {
format!("Nash system for cooperative game on [0,{}]", self.horizon)
}
}
pub fn saddle_point_exists(&self) -> bool {
self.is_zero_sum
}
pub fn set_value(&mut self, val: f64) {
self.value_function = Some(val);
}
}
#[derive(Debug, Clone)]
pub struct ZeroSumSDG {
pub value: Vec<f64>,
pub min_strategy: Vec<usize>,
pub max_strategy: Vec<usize>,
}
impl ZeroSumSDG {
pub fn new(value: Vec<f64>, min_strategy: Vec<usize>, max_strategy: Vec<usize>) -> Self {
Self {
value,
min_strategy,
max_strategy,
}
}
pub fn saddle_value(&self, s: usize) -> f64 {
self.value[s]
}
}
#[derive(Debug, Clone)]
pub struct ExponentialMSStability {
pub c: f64,
pub lambda: f64,
}
impl ExponentialMSStability {
pub fn new(c: f64, lambda: f64) -> Self {
Self { c, lambda }
}
pub fn bound(&self, t: f64) -> f64 {
self.c * (-self.lambda * t).exp()
}
pub fn check(&self, ms_value: f64, t: f64) -> bool {
ms_value <= self.bound(t)
}
}
#[derive(Debug, Clone)]
pub struct ValueIteration {
pub num_states: usize,
pub num_actions: usize,
pub transitions: Vec<Vec<Vec<f64>>>,
pub rewards: Vec<Vec<f64>>,
pub discount: f64,
}
impl ValueIteration {
pub fn new(
num_states: usize,
num_actions: usize,
transitions: Vec<Vec<Vec<f64>>>,
rewards: Vec<Vec<f64>>,
discount: f64,
) -> Self {
Self {
num_states,
num_actions,
transitions,
rewards,
discount,
}
}
pub fn run(&self, tol: f64, max_iter: usize) -> (Vec<f64>, Vec<usize>) {
let mdp = MDP::new(
self.num_states,
self.num_actions,
self.transitions.clone(),
self.rewards.clone(),
self.discount,
);
let v = mdp.value_iteration(tol, max_iter);
let pi = mdp.policy_improvement(&v);
(v, pi)
}
pub fn q_from_v(&self, v: &[f64]) -> Vec<Vec<f64>> {
let mut q = vec![vec![0.0_f64; self.num_actions]; self.num_states];
for s in 0..self.num_states {
for a in 0..self.num_actions {
let mut qa = self.rewards[s][a];
for sp in 0..self.num_states {
qa += self.discount * self.transitions[s][a][sp] * v[sp];
}
q[s][a] = qa;
}
}
q
}
pub fn span(&self, v: &[f64]) -> f64 {
let tv_v = MDP::new(
self.num_states,
self.num_actions,
self.transitions.clone(),
self.rewards.clone(),
self.discount,
)
.bellman_operator(v);
tv_v.iter()
.zip(v.iter())
.map(|(tv, vv)| (tv - vv).abs())
.fold(0.0_f64, f64::max)
}
}
#[derive(Debug, Clone)]
pub struct ActionValueFunction {
pub q: Vec<Vec<f64>>,
}
impl ActionValueFunction {
pub fn new(q: Vec<Vec<f64>>) -> Self {
Self { q }
}
pub fn get(&self, s: usize, a: usize) -> f64 {
self.q[s][a]
}
pub fn max_action_value(&self, s: usize) -> f64 {
self.q[s].iter().cloned().fold(f64::NEG_INFINITY, f64::max)
}
pub fn greedy_action(&self, s: usize) -> usize {
self.q[s]
.iter()
.enumerate()
.max_by(|x, y| x.1.partial_cmp(y.1).unwrap_or(std::cmp::Ordering::Equal))
.map(|(i, _)| i)
.unwrap_or(0)
}
}
#[derive(Debug, Clone)]
pub struct MDP {
pub num_states: usize,
pub num_actions: usize,
pub transitions: Vec<Vec<Vec<f64>>>,
pub rewards: Vec<Vec<f64>>,
pub discount: f64,
}
impl MDP {
pub fn new(
num_states: usize,
num_actions: usize,
transitions: Vec<Vec<Vec<f64>>>,
rewards: Vec<Vec<f64>>,
discount: f64,
) -> Self {
Self {
num_states,
num_actions,
transitions,
rewards,
discount,
}
}
pub fn bellman_operator(&self, v: &[f64]) -> Vec<f64> {
let mut tv = vec![0.0_f64; self.num_states];
for s in 0..self.num_states {
let mut best = f64::NEG_INFINITY;
for a in 0..self.num_actions {
let mut q = self.rewards[s][a];
for sp in 0..self.num_states {
q += self.discount * self.transitions[s][a][sp] * v[sp];
}
if q > best {
best = q;
}
}
tv[s] = best;
}
tv
}
pub fn value_iteration(&self, tol: f64, max_iter: usize) -> Vec<f64> {
let mut v = vec![0.0_f64; self.num_states];
for _ in 0..max_iter {
let tv = self.bellman_operator(&v);
let delta: f64 = v
.iter()
.zip(tv.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0_f64, f64::max);
v = tv;
if delta < tol {
break;
}
}
v
}
pub fn policy_improvement(&self, v: &[f64]) -> Vec<usize> {
let mut policy = vec![0_usize; self.num_states];
for s in 0..self.num_states {
let mut best_a = 0;
let mut best_q = f64::NEG_INFINITY;
for a in 0..self.num_actions {
let mut q = self.rewards[s][a];
for sp in 0..self.num_states {
q += self.discount * self.transitions[s][a][sp] * v[sp];
}
if q > best_q {
best_q = q;
best_a = a;
}
}
policy[s] = best_a;
}
policy
}
pub fn policy_evaluation(&self, policy: &[usize], tol: f64, max_iter: usize) -> Vec<f64> {
let mut v = vec![0.0_f64; self.num_states];
for _ in 0..max_iter {
let mut new_v = vec![0.0_f64; self.num_states];
for s in 0..self.num_states {
let a = policy[s];
let mut val = self.rewards[s][a];
for sp in 0..self.num_states {
val += self.discount * self.transitions[s][a][sp] * v[sp];
}
new_v[s] = val;
}
let delta: f64 = v
.iter()
.zip(new_v.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0_f64, f64::max);
v = new_v;
if delta < tol {
break;
}
}
v
}
}
#[derive(Debug, Clone)]
pub struct QLearning {
pub q: Vec<Vec<f64>>,
pub alpha: f64,
pub gamma: f64,
}
impl QLearning {
pub fn new(num_states: usize, num_actions: usize, alpha: f64, gamma: f64) -> Self {
Self {
q: vec![vec![0.0_f64; num_actions]; num_states],
alpha,
gamma,
}
}
pub fn update(&mut self, s: usize, a: usize, r: f64, s_next: usize) {
let max_q_next = self.q[s_next]
.iter()
.cloned()
.fold(f64::NEG_INFINITY, f64::max);
let td_error = r + self.gamma * max_q_next - self.q[s][a];
self.q[s][a] += self.alpha * td_error;
}
pub fn greedy_action(&self, s: usize) -> usize {
self.q[s]
.iter()
.enumerate()
.max_by(|x, y| x.1.partial_cmp(y.1).unwrap_or(std::cmp::Ordering::Equal))
.map(|(i, _)| i)
.unwrap_or(0)
}
pub fn expected_return(&self, s: usize) -> f64 {
self.q[s].iter().cloned().fold(f64::NEG_INFINITY, f64::max)
}
}
#[derive(Debug, Clone)]
pub struct ActorCritic {
pub actor: PolicyGradient,
pub critic: Vec<f64>,
pub critic_alpha: f64,
}
impl ActorCritic {
pub fn new(
num_states: usize,
num_actions: usize,
actor_alpha: f64,
critic_alpha: f64,
gamma: f64,
) -> Self {
Self {
actor: PolicyGradient::new(num_states, num_actions, actor_alpha, gamma),
critic: vec![0.0_f64; num_states],
critic_alpha,
}
}
pub fn update(&mut self, s: usize, a: usize, r: f64, s_next: usize) {
let td_error = r + self.actor.gamma * self.critic[s_next] - self.critic[s];
self.critic[s] += self.critic_alpha * td_error;
let pi = self.actor.softmax(s);
let num_actions = self.actor.theta[s].len();
for b in 0..num_actions {
let indicator = if b == a { 1.0 } else { 0.0 };
let grad_log = indicator - pi[b];
self.actor.theta[s][b] += self.actor.alpha * grad_log * td_error;
}
}
pub fn expected_return(&self, s: usize) -> f64 {
self.critic[s]
}
}
#[derive(Debug, Clone)]
pub struct PursuitEvasionGame {
pub pursuer: [f64; 2],
pub evader: [f64; 2],
pub pursuer_speed: f64,
pub evader_speed: f64,
}
impl PursuitEvasionGame {
pub fn new(pursuer: [f64; 2], evader: [f64; 2], pursuer_speed: f64, evader_speed: f64) -> Self {
Self {
pursuer,
evader,
pursuer_speed,
evader_speed,
}
}
pub fn distance(&self) -> f64 {
let dx = self.pursuer[0] - self.evader[0];
let dy = self.pursuer[1] - self.evader[1];
(dx * dx + dy * dy).sqrt()
}
pub fn pursuer_wins(&self) -> bool {
self.pursuer_speed > self.evader_speed
}
pub fn capture_time_estimate(&self) -> f64 {
let d = self.distance();
let relative_speed = self.pursuer_speed - self.evader_speed;
if relative_speed <= 0.0 {
f64::INFINITY
} else {
d / relative_speed
}
}
}
#[derive(Debug, Clone)]
pub struct MeanSquareStability {
pub ms_samples: Vec<f64>,
}
impl MeanSquareStability {
pub fn new(ms_samples: Vec<f64>) -> Self {
Self { ms_samples }
}
pub fn is_non_increasing(&self) -> bool {
self.ms_samples.windows(2).all(|w| w[0] >= w[1])
}
pub fn has_converged(&self, tol: f64) -> bool {
self.ms_samples.last().is_some_and(|&v| v < tol)
}
}
#[derive(Debug, Clone)]
pub struct BeliefMDP {
pub num_states: usize,
pub num_actions: usize,
pub num_obs: usize,
pub transitions: Vec<Vec<Vec<f64>>>,
pub observations: Vec<Vec<Vec<f64>>>,
}
impl BeliefMDP {
pub fn new(
num_states: usize,
num_actions: usize,
num_obs: usize,
transitions: Vec<Vec<Vec<f64>>>,
observations: Vec<Vec<Vec<f64>>>,
) -> Self {
Self {
num_states,
num_actions,
num_obs,
transitions,
observations,
}
}
pub fn belief_update(&self, belief: &[f64], action: usize, obs: usize) -> Vec<f64> {
let mut new_belief = vec![0.0_f64; self.num_states];
for sp in 0..self.num_states {
let predict: f64 = (0..self.num_states)
.map(|s| self.transitions[s][action][sp] * belief[s])
.sum();
new_belief[sp] = self.observations[sp][action][obs] * predict;
}
let total: f64 = new_belief.iter().sum();
if total > 0.0 {
for b in &mut new_belief {
*b /= total;
}
}
new_belief
}
pub fn qmdp_value(&self, belief: &[f64], q_star: &[Vec<f64>]) -> f64 {
(0..self.num_actions)
.map(|a| {
belief
.iter()
.enumerate()
.map(|(s, &bs)| bs * q_star[s][a])
.sum::<f64>()
})
.fold(f64::NEG_INFINITY, f64::max)
}
}
#[derive(Debug, Clone)]
pub struct RiccatiEquation {
pub a: Vec<Vec<f64>>,
pub b: Vec<Vec<f64>>,
pub q_cost: Vec<Vec<f64>>,
pub r_cost: Vec<Vec<f64>>,
}
impl RiccatiEquation {
pub fn new(
a: Vec<Vec<f64>>,
b: Vec<Vec<f64>>,
q_cost: Vec<Vec<f64>>,
r_cost: Vec<Vec<f64>>,
) -> Self {
Self {
a,
b,
q_cost,
r_cost,
}
}
fn mat_mul(m1: &[Vec<f64>], m2: &[Vec<f64>]) -> Vec<Vec<f64>> {
let r1 = m1.len();
let c2 = m2[0].len();
let inner = m2.len();
let mut out = vec![vec![0.0_f64; c2]; r1];
for i in 0..r1 {
for j in 0..c2 {
for k in 0..inner {
out[i][j] += m1[i][k] * m2[k][j];
}
}
}
out
}
fn mat_transpose(m: &[Vec<f64>]) -> Vec<Vec<f64>> {
if m.is_empty() {
return vec![];
}
let rows = m.len();
let cols = m[0].len();
let mut out = vec![vec![0.0_f64; rows]; cols];
for i in 0..rows {
for j in 0..cols {
out[j][i] = m[i][j];
}
}
out
}
fn mat_add(m1: &[Vec<f64>], m2: &[Vec<f64>]) -> Vec<Vec<f64>> {
m1.iter()
.zip(m2.iter())
.map(|(r1, r2)| r1.iter().zip(r2.iter()).map(|(a, b)| a + b).collect())
.collect()
}
fn mat_sub(m1: &[Vec<f64>], m2: &[Vec<f64>]) -> Vec<Vec<f64>> {
m1.iter()
.zip(m2.iter())
.map(|(r1, r2)| r1.iter().zip(r2.iter()).map(|(a, b)| a - b).collect())
.collect()
}
fn mat_scale(m: &[Vec<f64>], s: f64) -> Vec<Vec<f64>> {
m.iter()
.map(|r| r.iter().map(|x| x * s).collect())
.collect()
}
fn mat_inv_small(m: &[Vec<f64>]) -> Vec<Vec<f64>> {
let n = m.len();
if n == 1 {
return vec![vec![1.0 / m[0][0]]];
}
if n == 2 {
let det = m[0][0] * m[1][1] - m[0][1] * m[1][0];
return vec![
vec![m[1][1] / det, -m[0][1] / det],
vec![-m[1][0] / det, m[0][0] / det],
];
}
let mut eye = vec![vec![0.0_f64; n]; n];
for i in 0..n {
eye[i][i] = 1.0;
}
eye
}
fn riccati_deriv(&self, p: &[Vec<f64>]) -> Vec<Vec<f64>> {
let at = Self::mat_transpose(&self.a);
let bt = Self::mat_transpose(&self.b);
let r_inv = Self::mat_inv_small(&self.r_cost);
let at_p = Self::mat_mul(&at, p);
let p_a = Self::mat_mul(p, &self.a);
let p_b = Self::mat_mul(p, &self.b);
let p_b_rinv = Self::mat_mul(&p_b, &r_inv);
let p_b_rinv_bt = Self::mat_mul(&p_b_rinv, &bt);
let p_b_rinv_bt_p = Self::mat_mul(&p_b_rinv_bt, p);
let sum = Self::mat_add(&at_p, &p_a);
let sum2 = Self::mat_add(&sum, &self.q_cost);
Self::mat_sub(&sum2, &p_b_rinv_bt_p)
}
pub fn solve_riccati(&self, dt: f64, max_iter: usize) -> Vec<Vec<f64>> {
let n = self.a.len();
let mut p = vec![vec![0.0_f64; n]; n];
for _ in 0..max_iter {
let dp = self.riccati_deriv(&p);
let update = Self::mat_scale(&dp, dt);
p = Self::mat_add(&p, &update);
}
p
}
pub fn optimal_gain_matrix(&self, p: &[Vec<f64>]) -> Vec<Vec<f64>> {
let bt = Self::mat_transpose(&self.b);
let r_inv = Self::mat_inv_small(&self.r_cost);
let bt_p = Self::mat_mul(&bt, p);
Self::mat_mul(&r_inv, &bt_p)
}
pub fn infinite_horizon_lqr(&self) -> (Vec<Vec<f64>>, Vec<Vec<f64>>) {
let p = self.solve_riccati(0.001, 10_000);
let k = self.optimal_gain_matrix(&p);
(p, k)
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub struct MeanFieldGame {
pub num_players: usize,
pub coupling_strength: f64,
pub mean_field_type: MFGType,
pub convergence_rate: f64,
}
#[allow(dead_code)]
impl MeanFieldGame {
pub fn new(players: usize, coupling: f64) -> Self {
MeanFieldGame {
num_players: players,
coupling_strength: coupling,
mean_field_type: MFGType::LasryLions,
convergence_rate: 1.0 / (players as f64).sqrt(),
}
}
pub fn mfg_system_description(&self) -> String {
format!(
"MFG ({} players, coupling={:.3}): HJB + FP system, rate O(1/√N)",
self.num_players, self.coupling_strength
)
}
pub fn price_of_anarchy(&self) -> f64 {
1.0 + self.coupling_strength * 0.5
}
pub fn master_equation(&self) -> String {
"∂_t U + H(x, m, ∇_x U) - ν Δ_x U = ∫ (∂_m U)(y) δ_y F(y,m) m(dy)".to_string()
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub struct PathwiseSDE {
pub drift: String,
pub diffusion: String,
pub initial_condition: f64,
pub time_steps: usize,
pub step_size: f64,
pub scheme: NumericalSDEScheme,
}
#[allow(dead_code)]
impl PathwiseSDE {
pub fn euler_maruyama(drift: &str, diffusion: &str, x0: f64, steps: usize, dt: f64) -> Self {
PathwiseSDE {
drift: drift.to_string(),
diffusion: diffusion.to_string(),
initial_condition: x0,
time_steps: steps,
step_size: dt,
scheme: NumericalSDEScheme::EulerMaruyama,
}
}
pub fn milstein(drift: &str, diffusion: &str, x0: f64, steps: usize, dt: f64) -> Self {
PathwiseSDE {
drift: drift.to_string(),
diffusion: diffusion.to_string(),
initial_condition: x0,
time_steps: steps,
step_size: dt,
scheme: NumericalSDEScheme::Milstein,
}
}
pub fn strong_order(&self) -> f64 {
match &self.scheme {
NumericalSDEScheme::EulerMaruyama => 0.5,
NumericalSDEScheme::Milstein => 1.0,
NumericalSDEScheme::RungeKuttaSDE => 1.5,
NumericalSDEScheme::StochasticTaylor => 2.0,
}
}
pub fn weak_order(&self) -> f64 {
match &self.scheme {
NumericalSDEScheme::EulerMaruyama => 1.0,
NumericalSDEScheme::Milstein => 1.0,
NumericalSDEScheme::RungeKuttaSDE => 2.0,
NumericalSDEScheme::StochasticTaylor => 2.0,
}
}
pub fn simulate_one_path(&self) -> Vec<f64> {
let mut path = vec![self.initial_condition];
let mut x = self.initial_condition;
for _ in 0..self.time_steps {
let dw = 0.0;
x += self.step_size * 0.5 + 0.3 * dw;
path.push(x);
}
path
}
}
#[derive(Debug, Clone)]
pub struct StochasticLyapunov {
pub alpha: f64,
pub beta: f64,
}
impl StochasticLyapunov {
pub fn new(alpha: f64, beta: f64) -> Self {
Self { alpha, beta }
}
pub fn check(&self, lv: f64, v: f64) -> bool {
lv <= -self.alpha * v + self.beta
}
pub fn ev_upper_bound(&self, v0: f64, t: f64) -> f64 {
let decay = (-self.alpha * t).exp();
v0 * decay + (self.beta / self.alpha) * (1.0 - decay)
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub enum MFGType {
LasryLions,
MFGControl,
ExtendedMFG,
}
#[derive(Debug, Clone)]
pub struct PolicyGradient {
pub theta: Vec<Vec<f64>>,
pub alpha: f64,
pub gamma: f64,
}
impl PolicyGradient {
pub fn new(num_states: usize, num_actions: usize, alpha: f64, gamma: f64) -> Self {
Self {
theta: vec![vec![0.0_f64; num_actions]; num_states],
alpha,
gamma,
}
}
pub fn softmax(&self, s: usize) -> Vec<f64> {
let row = &self.theta[s];
let max_val = row.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let exps: Vec<f64> = row.iter().map(|&x| (x - max_val).exp()).collect();
let sum: f64 = exps.iter().sum();
exps.iter().map(|&e| e / sum).collect()
}
pub fn update(&mut self, s: usize, a: usize, g: f64) {
let pi = self.softmax(s);
let num_actions = self.theta[s].len();
for b in 0..num_actions {
let indicator = if b == a { 1.0 } else { 0.0 };
let grad_log = indicator - pi[b];
self.theta[s][b] += self.alpha * grad_log * g;
}
}
pub fn expected_return(&self, s: usize, q: &ActionValueFunction) -> f64 {
let pi = self.softmax(s);
pi.iter().enumerate().map(|(a, &p)| p * q.get(s, a)).sum()
}
pub fn convergence_rate(&self, q: &ActionValueFunction) -> f64 {
let mut max_grad = 0.0_f64;
for s in 0..self.theta.len() {
let pi = self.softmax(s);
for a in 0..self.theta[s].len() {
let grad = pi
.iter()
.enumerate()
.map(|(b, &pb)| {
let indicator = if b == a { 1.0 } else { 0.0 };
(indicator - pb) * q.get(s, a)
})
.sum::<f64>()
.abs();
if grad > max_grad {
max_grad = grad;
}
}
}
max_grad
}
}
#[derive(Debug, Clone)]
pub struct QLearningSolver {
pub q: Vec<Vec<f64>>,
pub alpha: f64,
pub gamma: f64,
pub epsilon: f64,
pub visit_count: Vec<Vec<u64>>,
}
impl QLearningSolver {
pub fn new(
num_states: usize,
num_actions: usize,
alpha: f64,
gamma: f64,
epsilon: f64,
) -> Self {
Self {
q: vec![vec![0.0_f64; num_actions]; num_states],
alpha,
gamma,
epsilon,
visit_count: vec![vec![0_u64; num_actions]; num_states],
}
}
pub fn update(&mut self, s: usize, a: usize, r: f64, s_next: usize) {
self.visit_count[s][a] += 1;
let n = self.visit_count[s][a] as f64;
let step = self.alpha / (1.0 + n).sqrt();
let max_q_next = self.q[s_next]
.iter()
.cloned()
.fold(f64::NEG_INFINITY, f64::max);
let td_error = r + self.gamma * max_q_next - self.q[s][a];
self.q[s][a] += step * td_error;
}
pub fn select_action(&self, s: usize, rng_val: f64) -> usize {
if rng_val < self.epsilon {
let n = self.q[s].len();
((rng_val / self.epsilon) * n as f64) as usize % n
} else {
self.q[s]
.iter()
.enumerate()
.max_by(|x, y| x.1.partial_cmp(y.1).unwrap_or(std::cmp::Ordering::Equal))
.map(|(i, _)| i)
.unwrap_or(0)
}
}
pub fn has_converged(&self, prev_q: &[Vec<f64>], tol: f64) -> bool {
self.q.iter().zip(prev_q.iter()).all(|(row, prev_row)| {
row.iter()
.zip(prev_row.iter())
.all(|(q, pq)| (q - pq).abs() < tol)
})
}
pub fn greedy_policy(&self) -> Vec<usize> {
(0..self.q.len())
.map(|s| {
self.q[s]
.iter()
.enumerate()
.max_by(|x, y| x.1.partial_cmp(y.1).unwrap_or(std::cmp::Ordering::Equal))
.map(|(i, _)| i)
.unwrap_or(0)
})
.collect()
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub struct HInfinityControl {
pub disturbance_attenuation: f64,
pub state_dim: usize,
pub control_dim: usize,
pub disturbance_dim: usize,
pub riccati_solution: Option<f64>,
}
#[allow(dead_code)]
impl HInfinityControl {
pub fn new(gamma: f64, n: usize, m: usize, k: usize) -> Self {
HInfinityControl {
disturbance_attenuation: gamma,
state_dim: n,
control_dim: m,
disturbance_dim: k,
riccati_solution: None,
}
}
pub fn minimax_criterion(&self) -> String {
format!(
"H∞: min_u max_w ||z||² - {:.3}² ||w||² (disturbance attenuation γ={:.3})",
self.disturbance_attenuation, self.disturbance_attenuation
)
}
pub fn game_riccati_equation(&self) -> String {
format!(
"Game ARE: PA + A^TP - P(B B^T - (1/{:.3}²) B_w B_w^T)P + C^TC = 0",
self.disturbance_attenuation
)
}
pub fn is_feasible(&self) -> bool {
self.riccati_solution.map_or(false, |p| p > 0.0)
}
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub struct ErgodicControl {
pub discount_rate: f64,
pub state_space_dim: usize,
pub long_run_cost: Option<f64>,
pub eigenvalue_lambda: Option<f64>,
}
#[allow(dead_code)]
impl ErgodicControl {
pub fn new(dim: usize) -> Self {
ErgodicControl {
discount_rate: 0.0,
state_space_dim: dim,
long_run_cost: None,
eigenvalue_lambda: None,
}
}
pub fn ergodic_hjb(&self) -> String {
"λ + H(x, ∇V, ∇²V) = 0 (ergodic HJB: λ is long-run average cost)".to_string()
}
pub fn turnpike_property(&self) -> String {
"Turnpike: finite-horizon optimal trajectories spend most time near ergodic optimal"
.to_string()
}
pub fn set_eigenvalue(&mut self, lambda: f64) {
self.eigenvalue_lambda = Some(lambda);
self.long_run_cost = Some(lambda);
}
pub fn relative_value_function_description(&self) -> String {
format!(
"Ergodic control dim={}: solve (λ*, V) pair in ergodic HJB",
self.state_space_dim
)
}
}
#[derive(Debug, Clone)]
pub struct AlmostSureStability {
pub paths: Vec<Vec<f64>>,
}
impl AlmostSureStability {
pub fn new(paths: Vec<Vec<f64>>) -> Self {
Self { paths }
}
pub fn convergence_fraction(&self, tol: f64) -> f64 {
if self.paths.is_empty() {
return 0.0;
}
let converged = self
.paths
.iter()
.filter(|p| p.last().is_some_and(|&v| v < tol))
.count();
converged as f64 / self.paths.len() as f64
}
}
#[derive(Debug, Clone)]
pub struct Policy {
pub table: Vec<usize>,
}
impl Policy {
pub fn new(table: Vec<usize>) -> Self {
Self { table }
}
pub fn action(&self, s: usize) -> usize {
self.table[s]
}
}
#[derive(Debug, Clone)]
pub struct SARSA {
pub q: Vec<Vec<f64>>,
pub alpha: f64,
pub gamma: f64,
}
impl SARSA {
pub fn new(num_states: usize, num_actions: usize, alpha: f64, gamma: f64) -> Self {
Self {
q: vec![vec![0.0_f64; num_actions]; num_states],
alpha,
gamma,
}
}
pub fn update(&mut self, s: usize, a: usize, r: f64, s_next: usize, a_next: usize) {
let td_error = r + self.gamma * self.q[s_next][a_next] - self.q[s][a];
self.q[s][a] += self.alpha * td_error;
}
pub fn greedy_action(&self, s: usize) -> usize {
self.q[s]
.iter()
.enumerate()
.max_by(|x, y| x.1.partial_cmp(y.1).unwrap_or(std::cmp::Ordering::Equal))
.map(|(i, _)| i)
.unwrap_or(0)
}
pub fn expected_return(&self, s: usize) -> f64 {
self.q[s].iter().cloned().fold(f64::NEG_INFINITY, f64::max)
}
}