Skip to main content

oxilean_std/stochastic_control/
functions.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use oxilean_kernel::{BinderInfo, Declaration, Environment, Expr, Level, Name};
6
7use super::types::{
8    ActionValueFunction, ActorCritic, AlmostSureStability, BeliefMDP, ErgodicControl,
9    ExponentialMSStability, HInfinityControl, MeanFieldGame, MeanFieldGameSolver,
10    MeanSquareStability, NashEquilibrium, PathwiseSDE, Policy, PolicyGradient, PursuitEvasionGame,
11    QLearning, QLearningSolver, RiccatiEquation, RiskSensitiveControl, RiskSensitiveCost, SDGame,
12    StochasticLyapunov, ValueIteration, ZeroSumSDG, MDP, SARSA,
13};
14
15pub fn app(f: Expr, a: Expr) -> Expr {
16    Expr::App(Box::new(f), Box::new(a))
17}
18pub fn app2(f: Expr, a: Expr, b: Expr) -> Expr {
19    app(app(f, a), b)
20}
21pub fn app3(f: Expr, a: Expr, b: Expr, c: Expr) -> Expr {
22    app(app2(f, a, b), c)
23}
24pub fn cst(s: &str) -> Expr {
25    Expr::Const(Name::str(s), vec![])
26}
27pub fn prop() -> Expr {
28    Expr::Sort(Level::zero())
29}
30pub fn type0() -> Expr {
31    Expr::Sort(Level::succ(Level::zero()))
32}
33pub fn pi(bi: BinderInfo, name: &str, dom: Expr, body: Expr) -> Expr {
34    Expr::Pi(bi, Name::str(name), Box::new(dom), Box::new(body))
35}
36pub fn arrow(a: Expr, b: Expr) -> Expr {
37    pi(BinderInfo::Default, "_", a, b)
38}
39pub fn bvar(n: u32) -> Expr {
40    Expr::BVar(n)
41}
42pub fn nat_ty() -> Expr {
43    cst("Nat")
44}
45pub fn real_ty() -> Expr {
46    cst("Real")
47}
48pub fn list_ty(elem: Expr) -> Expr {
49    app(cst("List"), elem)
50}
51pub fn bool_ty() -> Expr {
52    cst("Bool")
53}
54pub fn fn_ty(dom: Expr, cod: Expr) -> Expr {
55    arrow(dom, cod)
56}
57/// `MDP : (Nat -> Nat -> Real -> Nat -> Real -> Prop) -> Prop`
58/// Markov Decision Process: (S, A, P, R, γ) = (states, actions, transitions, rewards, discount).
59pub fn mdp_ty() -> Expr {
60    let rel = fn_ty(
61        nat_ty(),
62        fn_ty(
63            nat_ty(),
64            fn_ty(real_ty(), fn_ty(nat_ty(), fn_ty(real_ty(), prop()))),
65        ),
66    );
67    arrow(rel, prop())
68}
69/// `Policy : Nat -> Nat`
70/// Deterministic policy: maps each state to an action.
71pub fn policy_ty() -> Expr {
72    fn_ty(nat_ty(), nat_ty())
73}
74/// `StochasticPolicy : Nat -> (Nat -> Real)`
75/// Stochastic policy: maps each state to a probability distribution over actions.
76pub fn stochastic_policy_ty() -> Expr {
77    fn_ty(nat_ty(), fn_ty(nat_ty(), real_ty()))
78}
79/// `ValueFunction : (Nat -> Nat) -> Nat -> Real`
80/// V^π(s) = E[∑ γ^t r_t | s_0=s, π]. Maps policy and state to a real value.
81pub fn value_function_ty() -> Expr {
82    fn_ty(fn_ty(nat_ty(), nat_ty()), fn_ty(nat_ty(), real_ty()))
83}
84/// `ActionValueFunction : (Nat -> Nat) -> Nat -> Nat -> Real`
85/// Q^π(s,a) = expected return from state s, taking action a, then following π.
86pub fn action_value_function_ty() -> Expr {
87    fn_ty(
88        fn_ty(nat_ty(), nat_ty()),
89        fn_ty(nat_ty(), fn_ty(nat_ty(), real_ty())),
90    )
91}
92/// `BellmanOperator : Prop`
93/// The Bellman operator T^π V(s) = R(s,π(s)) + γ Σ P(s'|s,π(s)) V(s') is a contraction.
94pub fn bellman_operator_ty() -> Expr {
95    prop()
96}
97/// `PolicyEvaluation : Prop`
98/// Policy evaluation computes V^π as the unique fixed-point of T^π.
99pub fn policy_evaluation_ty() -> Expr {
100    prop()
101}
102/// `PolicyImprovement : Prop`
103/// The greedy policy w.r.t. V^π is at least as good as π (policy improvement theorem).
104pub fn policy_improvement_ty() -> Expr {
105    prop()
106}
107/// `ValueIteration : Prop`
108/// Value iteration converges to V* = V^{π*} when γ < 1.
109pub fn value_iteration_ty() -> Expr {
110    prop()
111}
112/// `HamiltonJacobiBellman : Prop`
113/// HJB PDE: V_t + inf_u{L(x,u) + ∇V·f(x,u)} = 0 characterises optimal value.
114pub fn hjb_ty() -> Expr {
115    prop()
116}
117/// `StochasticOptimalControl : Prop`
118/// Itô dynamics: dx = f(x,u)dt + σ(x,u)dW; optimal cost via HJB.
119pub fn stochastic_optimal_control_ty() -> Expr {
120    prop()
121}
122/// `LinearQuadraticRegulator : (List (List Real)) -> (List (List Real)) -> (List (List Real)) -> (List (List Real)) -> Prop`
123/// LQR/LQG: minimise E[∫(x^T Q x + u^T R u)dt] subject to linear stochastic dynamics.
124pub fn lqr_ty() -> Expr {
125    let mat = list_ty(list_ty(real_ty()));
126    arrow(
127        mat.clone(),
128        arrow(mat.clone(), arrow(mat.clone(), arrow(mat, prop()))),
129    )
130}
131/// `RiccatiEquation : Prop`
132/// P_t = A^T P + P A - P B R^{-1} B^T P + Q: differential Riccati equation for LQR.
133pub fn riccati_equation_ty() -> Expr {
134    prop()
135}
136/// `solve_riccati_ty : Prop`
137/// The algebraic Riccati equation has a unique positive-definite solution under stabilisability.
138pub fn solve_riccati_ty() -> Expr {
139    prop()
140}
141/// `optimal_gain_matrix_ty : Prop`
142/// Optimal LQR gain K* = R^{-1} B^T P where P solves the Riccati equation.
143pub fn optimal_gain_matrix_ty() -> Expr {
144    prop()
145}
146/// `infinite_horizon_lqr_ty : Prop`
147/// Infinite-horizon LQR: steady-state optimal gain stabilises the system.
148pub fn infinite_horizon_lqr_ty() -> Expr {
149    prop()
150}
151/// `QLearning : Real -> Real -> Nat -> Nat -> Real -> Nat -> Prop`
152/// Q(s,a) ← Q(s,a) + α(r + γ max Q(s',a') − Q(s,a)) off-policy TD control.
153pub fn q_learning_ty() -> Expr {
154    arrow(
155        real_ty(),
156        arrow(
157            real_ty(),
158            arrow(
159                nat_ty(),
160                arrow(nat_ty(), arrow(real_ty(), arrow(nat_ty(), prop()))),
161            ),
162        ),
163    )
164}
165/// `SARSA : Real -> Real -> Nat -> Nat -> Real -> Nat -> Nat -> Prop`
166/// On-policy TD(0): Q(s,a) ← Q(s,a) + α(r + γ Q(s',a') − Q(s,a)).
167pub fn sarsa_ty() -> Expr {
168    arrow(
169        real_ty(),
170        arrow(
171            real_ty(),
172            arrow(
173                nat_ty(),
174                arrow(
175                    nat_ty(),
176                    arrow(real_ty(), arrow(nat_ty(), arrow(nat_ty(), prop()))),
177                ),
178            ),
179        ),
180    )
181}
182/// `PolicyGradient : Prop`
183/// ∇_θ J(π_θ) = E[∇_θ log π_θ(a|s) · Q^π(s,a)] (REINFORCE theorem).
184pub fn policy_gradient_ty() -> Expr {
185    prop()
186}
187/// `ActorCritic : Prop`
188/// Actor-critic: policy (actor) updated by gradient, value (critic) updated by TD error.
189pub fn actor_critic_ty() -> Expr {
190    prop()
191}
192/// `convergence_rate_rl_ty : Prop`
193/// Q-learning converges to Q* under appropriate step-size conditions (Watkins & Dayan 1992).
194pub fn convergence_rate_rl_ty() -> Expr {
195    prop()
196}
197/// `ZeroSumSDG : Prop`
198/// J = E[∫ L(x,u,v)dt + g(x_T)]: zero-sum stochastic differential game objective.
199pub fn zero_sum_sdg_ty() -> Expr {
200    prop()
201}
202/// `NashEquilibrium : Prop`
203/// (u*, v*) such that neither player can benefit from unilateral deviation.
204pub fn nash_equilibrium_ty() -> Expr {
205    prop()
206}
207/// `IsaacEquation : Prop`
208/// Hamilton-Jacobi-Isaacs PDE: saddle-point condition for zero-sum SDG.
209pub fn isaac_equation_ty() -> Expr {
210    prop()
211}
212/// `PursuitEvasionGame : Prop`
213/// Classical pursuit-evasion differential game with pursuit and evader dynamics.
214pub fn pursuit_evasion_game_ty() -> Expr {
215    prop()
216}
217/// `StochasticLyapunov : Prop`
218/// V with LV ≤ -αV + β: Foster-Lyapunov condition for stability.
219pub fn stochastic_lyapunov_ty() -> Expr {
220    prop()
221}
222/// `MeanSquareStability : Prop`
223/// E[||x_t||²] → 0 as t → ∞.
224pub fn mean_square_stability_ty() -> Expr {
225    prop()
226}
227/// `AlmostSureStability : Prop`
228/// ||x_t|| → 0 almost surely.
229pub fn almost_sure_stability_ty() -> Expr {
230    prop()
231}
232/// `ExponentialMSStability : Prop`
233/// E[||x_t||²] ≤ C e^{-λt} for some C,λ > 0.
234pub fn exponential_ms_stability_ty() -> Expr {
235    prop()
236}
237/// `POMDP : Prop`
238/// POMDP: (S, A, O, T, Z, R, γ) — states, actions, observations, transition,
239/// observation, reward, discount.
240pub fn pomdp_ty() -> Expr {
241    prop()
242}
243/// `BeliefState : (Nat -> Real) -> Prop`
244/// Belief b: Δ(S) — probability distribution over states.
245pub fn belief_state_ty() -> Expr {
246    arrow(fn_ty(nat_ty(), real_ty()), prop())
247}
248/// `BeliefUpdate : (Nat -> Real) -> Nat -> Nat -> (Nat -> Real) -> Prop`
249/// Bayesian belief update: b'(s') ∝ Z(o|s',a) Σ_s T(s'|s,a) b(s).
250pub fn belief_update_ty() -> Expr {
251    arrow(
252        fn_ty(nat_ty(), real_ty()),
253        arrow(
254            nat_ty(),
255            arrow(nat_ty(), arrow(fn_ty(nat_ty(), real_ty()), prop())),
256        ),
257    )
258}
259/// `BeliefMDP : Prop`
260/// Belief-space MDP induced by a POMDP: states are belief distributions.
261pub fn belief_mdp_ty() -> Expr {
262    prop()
263}
264/// `QMDPApproximation : Prop`
265/// QMDP approximation: V(b) ≈ max_a Σ_s b(s) Q*(s, a) — ignores future observations.
266pub fn qmdp_approximation_ty() -> Expr {
267    prop()
268}
269/// `EntropicRisk : Real -> (Nat -> Real) -> Real`
270/// Entropic risk measure: ρ_θ(X) = (1/θ) log E[e^{θX}].
271pub fn entropic_risk_ty() -> Expr {
272    arrow(real_ty(), arrow(fn_ty(nat_ty(), real_ty()), real_ty()))
273}
274/// `CVaROptimization : Real -> Prop`
275/// CVaR (Conditional Value-at-Risk) at level α: minimise E[X | X ≥ VaR_α(X)].
276pub fn cvar_optimization_ty() -> Expr {
277    arrow(real_ty(), prop())
278}
279/// `CoherentRiskMeasure : ((Nat -> Real) -> Real) -> Prop`
280/// A coherent risk measure satisfies monotonicity, sub-additivity, positive homogeneity,
281/// and translation invariance.
282pub fn coherent_risk_measure_ty() -> Expr {
283    arrow(fn_ty(fn_ty(nat_ty(), real_ty()), real_ty()), prop())
284}
285/// `RiskSensitiveBellman : Real -> Prop`
286/// Risk-sensitive Bellman equation: V(s) = min_a {l(s,a) + (1/θ) log E[e^{θ V(s')}]}.
287pub fn risk_sensitive_bellman_ty() -> Expr {
288    arrow(real_ty(), prop())
289}
290/// `MinimaxControl : Prop`
291/// Minimax (H∞) control: min_u max_{w ∈ W} J(u, w) — worst-case disturbance.
292pub fn minimax_control_ty() -> Expr {
293    prop()
294}
295/// `HInfinityStochastic : Prop`
296/// H∞ stochastic control: robust to stochastic uncertainty bounded by an ambiguity set.
297pub fn h_infinity_stochastic_ty() -> Expr {
298    prop()
299}
300/// `AmbiguitySet : ((Nat -> Real) -> Prop) -> Prop`
301/// Ambiguity set: collection of distributions consistent with observed data.
302pub fn ambiguity_set_ty() -> Expr {
303    arrow(fn_ty(fn_ty(nat_ty(), real_ty()), prop()), prop())
304}
305/// `DistributionallyRobustMDP : Prop`
306/// Distributionally robust MDP: optimise worst-case expected return over an ambiguity set.
307pub fn distributionally_robust_mdp_ty() -> Expr {
308    prop()
309}
310/// `MeanFieldGame : Prop`
311/// MFG: large-population game where each agent interacts with the population distribution.
312pub fn mean_field_game_ty() -> Expr {
313    prop()
314}
315/// `McKeanVlasovSDE : Prop`
316/// McKean-Vlasov SDE: dX = b(X, μ_t)dt + σ(X, μ_t)dW, μ_t = Law(X_t).
317pub fn mckean_vlasov_sde_ty() -> Expr {
318    prop()
319}
320/// `MFGNashEquilibrium : (Nat -> Real) -> Prop`
321/// MFG Nash equilibrium: (u*, μ*) such that u* is optimal given μ* and μ* = Law(X^{u*}).
322pub fn mfg_nash_equilibrium_ty() -> Expr {
323    arrow(fn_ty(nat_ty(), real_ty()), prop())
324}
325/// `MFGConsistencyCondition : Prop`
326/// Consistency: the distribution induced by optimal play equals the equilibrium distribution.
327pub fn mfg_consistency_ty() -> Expr {
328    prop()
329}
330/// `TwoPlayerZeroSumGame : (Nat -> Nat -> Nat -> Real) -> Prop`
331/// Two-player zero-sum stochastic game: reward kernel r(s, a1, a2).
332pub fn two_player_zero_sum_game_ty() -> Expr {
333    let reward_kernel = fn_ty(nat_ty(), fn_ty(nat_ty(), fn_ty(nat_ty(), real_ty())));
334    arrow(reward_kernel, prop())
335}
336/// `ShapleyOperator : Prop`
337/// Shapley (dynamic programming) operator for stochastic games.
338pub fn shapley_operator_ty() -> Expr {
339    prop()
340}
341/// `AverageCostMDP : Prop`
342/// Ergodic/average-cost MDP: min_{π} lim_{T→∞} (1/T) E[Σ_{t=0}^{T-1} r_t].
343pub fn average_cost_mdp_ty() -> Expr {
344    prop()
345}
346/// `PoissonEquation : Prop`
347/// Poisson equation for ergodic MDP: λ + h(s) = min_a {r(s,a) + Σ P(s'|s,a) h(s')}.
348pub fn poisson_equation_ty() -> Expr {
349    prop()
350}
351/// `BiasFunction : (Nat -> Real) -> Prop`
352/// Bias function h in the Poisson equation: captures relative costs across states.
353pub fn bias_function_ty() -> Expr {
354    arrow(fn_ty(nat_ty(), real_ty()), prop())
355}
356/// `OptimalStopping : Prop`
357/// Optimal stopping: choose stopping time τ to maximise E[g(X_τ)] — free boundary problem.
358pub fn optimal_stopping_ty() -> Expr {
359    prop()
360}
361/// `DynkinFormula : Prop`
362/// Dynkin's formula: E[f(X_τ)] = f(x) + E[∫_0^τ Lf(X_t)dt] for Markov processes.
363pub fn dynkin_formula_ty() -> Expr {
364    prop()
365}
366/// `QuasiVariationalInequality : Prop`
367/// QVI for impulse control: min{Lv + f, Mv - v} = 0.
368pub fn quasi_variational_inequality_ty() -> Expr {
369    prop()
370}
371/// `ImpulseControlPolicy : (Nat -> Real) -> Nat -> Prop`
372/// Impulse control policy: specifies intervention times and sizes.
373pub fn impulse_control_policy_ty() -> Expr {
374    arrow(fn_ty(nat_ty(), real_ty()), arrow(nat_ty(), prop()))
375}
376/// `CertaintyEquivalence : Prop`
377/// Certainty equivalence principle: replace unknown parameters with estimates in the control law.
378pub fn certainty_equivalence_ty() -> Expr {
379    prop()
380}
381/// `SelfTuningRegulator : Prop`
382/// Self-tuning regulator: online parameter estimation + adaptive LQR.
383pub fn self_tuning_regulator_ty() -> Expr {
384    prop()
385}
386/// `QlearningConvergence : Real -> Real -> Prop`
387/// Q-learning convergence theorem (Watkins-Dayan): Q_n → Q* a.s. under suitable step sizes.
388pub fn q_learning_convergence_ty() -> Expr {
389    arrow(real_ty(), arrow(real_ty(), prop()))
390}
391/// `FittedValueIteration : Prop`
392/// Fitted value iteration: approximate V* using a function approximator class.
393pub fn fitted_value_iteration_ty() -> Expr {
394    prop()
395}
396/// `JointPolicyConvergence : Nat -> Prop`
397/// Joint policy convergence in multi-agent RL: policies converge to correlated equilibrium.
398pub fn joint_policy_convergence_ty() -> Expr {
399    arrow(nat_ty(), prop())
400}
401/// `CorrelatedEquilibrium : ((Nat -> Nat -> Real) -> Prop) -> Prop`
402/// Correlated equilibrium: joint distribution σ over action profiles such that no agent
403/// benefits from deviating given σ.
404pub fn correlated_equilibrium_ty() -> Expr {
405    let joint_dist = fn_ty(fn_ty(nat_ty(), fn_ty(nat_ty(), real_ty())), prop());
406    arrow(joint_dist, prop())
407}
408/// `RegretMinimisation : Real -> Prop`
409/// Regret minimisation: no-regret algorithm guarantees cumulative regret = o(T).
410pub fn regret_minimisation_ty() -> Expr {
411    arrow(real_ty(), prop())
412}
413/// Populate an [`Environment`] with all stochastic-control axioms.
414pub fn build_env(env: &mut Environment) {
415    let axioms: &[(&str, Expr)] = &[
416        ("MDP", mdp_ty()),
417        ("Policy", policy_ty()),
418        ("StochasticPolicy", stochastic_policy_ty()),
419        ("ValueFunction", value_function_ty()),
420        ("ActionValueFunction", action_value_function_ty()),
421        ("BellmanOperator", bellman_operator_ty()),
422        ("PolicyEvaluation", policy_evaluation_ty()),
423        ("PolicyImprovement", policy_improvement_ty()),
424        ("ValueIteration", value_iteration_ty()),
425        ("HamiltonJacobiBellman", hjb_ty()),
426        ("StochasticOptimalControl", stochastic_optimal_control_ty()),
427        ("LinearQuadraticRegulator", lqr_ty()),
428        ("RiccatiEquation", riccati_equation_ty()),
429        ("SolveRiccati", solve_riccati_ty()),
430        ("OptimalGainMatrix", optimal_gain_matrix_ty()),
431        ("InfiniteHorizonLqr", infinite_horizon_lqr_ty()),
432        ("QLearning", q_learning_ty()),
433        ("SARSA", sarsa_ty()),
434        ("PolicyGradient", policy_gradient_ty()),
435        ("ActorCritic", actor_critic_ty()),
436        ("ConvergenceRateRL", convergence_rate_rl_ty()),
437        ("ZeroSumSDG", zero_sum_sdg_ty()),
438        ("NashEquilibrium", nash_equilibrium_ty()),
439        ("IsaacEquation", isaac_equation_ty()),
440        ("PursuitEvasionGame", pursuit_evasion_game_ty()),
441        ("StochasticLyapunov", stochastic_lyapunov_ty()),
442        ("MeanSquareStability", mean_square_stability_ty()),
443        ("AlmostSureStability", almost_sure_stability_ty()),
444        ("ExponentialMSStability", exponential_ms_stability_ty()),
445        ("POMDP", pomdp_ty()),
446        ("BeliefState", belief_state_ty()),
447        ("BeliefUpdate", belief_update_ty()),
448        ("BeliefMDP", belief_mdp_ty()),
449        ("QMDPApproximation", qmdp_approximation_ty()),
450        ("EntropicRisk", entropic_risk_ty()),
451        ("CVaROptimization", cvar_optimization_ty()),
452        ("CoherentRiskMeasure", coherent_risk_measure_ty()),
453        ("RiskSensitiveBellman", risk_sensitive_bellman_ty()),
454        ("MinimaxControl", minimax_control_ty()),
455        ("HInfinityStochastic", h_infinity_stochastic_ty()),
456        ("AmbiguitySet", ambiguity_set_ty()),
457        (
458            "DistributionallyRobustMDP",
459            distributionally_robust_mdp_ty(),
460        ),
461        ("MeanFieldGame", mean_field_game_ty()),
462        ("McKeanVlasovSDE", mckean_vlasov_sde_ty()),
463        ("MFGNashEquilibrium", mfg_nash_equilibrium_ty()),
464        ("MFGConsistencyCondition", mfg_consistency_ty()),
465        ("TwoPlayerZeroSumGame", two_player_zero_sum_game_ty()),
466        ("ShapleyOperator", shapley_operator_ty()),
467        ("AverageCostMDP", average_cost_mdp_ty()),
468        ("PoissonEquation", poisson_equation_ty()),
469        ("BiasFunction", bias_function_ty()),
470        ("OptimalStopping", optimal_stopping_ty()),
471        ("DynkinFormula", dynkin_formula_ty()),
472        (
473            "QuasiVariationalInequality",
474            quasi_variational_inequality_ty(),
475        ),
476        ("ImpulseControlPolicy", impulse_control_policy_ty()),
477        ("CertaintyEquivalence", certainty_equivalence_ty()),
478        ("SelfTuningRegulator", self_tuning_regulator_ty()),
479        ("QlearningConvergence", q_learning_convergence_ty()),
480        ("FittedValueIteration", fitted_value_iteration_ty()),
481        ("JointPolicyConvergence", joint_policy_convergence_ty()),
482        ("CorrelatedEquilibrium", correlated_equilibrium_ty()),
483        ("RegretMinimisation", regret_minimisation_ty()),
484    ];
485    for (name, ty) in axioms {
486        env.add(Declaration::Axiom {
487            name: Name::str(*name),
488            univ_params: vec![],
489            ty: ty.clone(),
490        })
491        .ok();
492    }
493}
494#[cfg(test)]
495mod tests {
496    use super::*;
497    #[test]
498    fn test_build_env() {
499        let mut env = Environment::new();
500        build_env(&mut env);
501        assert!(env.get(&Name::str("MDP")).is_some());
502        assert!(env.get(&Name::str("ValueFunction")).is_some());
503        assert!(env.get(&Name::str("QLearning")).is_some());
504        assert!(env.get(&Name::str("NashEquilibrium")).is_some());
505        assert!(env.get(&Name::str("StochasticLyapunov")).is_some());
506    }
507    #[test]
508    fn test_mdp_value_iteration() {
509        let transitions = vec![
510            vec![vec![1.0, 0.0], vec![0.0, 1.0]],
511            vec![vec![0.0, 1.0], vec![0.0, 1.0]],
512        ];
513        let rewards = vec![vec![0.0, 0.0], vec![1.0, 1.0]];
514        let mdp = MDP::new(2, 2, transitions, rewards, 0.9);
515        let v = mdp.value_iteration(1e-8, 1000);
516        assert!((v[1] - 10.0).abs() < 0.01, "V*(1) ≈ 10, got {}", v[1]);
517        assert!((v[0] - 9.0).abs() < 0.01, "V*(0) ≈ 9, got {}", v[0]);
518    }
519    #[test]
520    fn test_policy_evaluation() {
521        let transitions = vec![
522            vec![vec![1.0, 0.0], vec![0.0, 1.0]],
523            vec![vec![0.0, 1.0], vec![0.0, 1.0]],
524        ];
525        let rewards = vec![vec![0.0, 0.0], vec![1.0, 1.0]];
526        let mdp = MDP::new(2, 2, transitions, rewards, 0.9);
527        let policy = vec![1, 0];
528        let v = mdp.policy_evaluation(&policy, 1e-8, 1000);
529        assert!(v[1] > v[0], "Good state should have higher value");
530    }
531    #[test]
532    fn test_q_learning_update() {
533        let mut agent = QLearning::new(3, 2, 0.5, 0.9);
534        agent.update(0, 1, 1.0, 1);
535        assert!((agent.q[0][1] - 0.5).abs() < 1e-12, "Q(0,1) should be 0.5");
536    }
537    #[test]
538    fn test_sarsa_update() {
539        let mut agent = SARSA::new(3, 2, 0.5, 0.9);
540        agent.update(0, 1, 1.0, 1, 0);
541        assert!(
542            (agent.q[0][1] - 0.5).abs() < 1e-12,
543            "SARSA Q(0,1) should be 0.5"
544        );
545    }
546    #[test]
547    fn test_policy_gradient_softmax() {
548        let agent = PolicyGradient::new(2, 3, 0.01, 0.99);
549        let pi = agent.softmax(0);
550        let total: f64 = pi.iter().sum();
551        assert!((total - 1.0).abs() < 1e-12, "softmax should sum to 1");
552        assert!((pi[0] - 1.0 / 3.0).abs() < 1e-12);
553    }
554    #[test]
555    fn test_actor_critic_update() {
556        let mut ac = ActorCritic::new(2, 2, 0.1, 0.1, 0.9);
557        let v0 = ac.expected_return(0);
558        ac.update(0, 1, 1.0, 1);
559        assert!(
560            ac.expected_return(0) > v0,
561            "critic value should increase after positive reward"
562        );
563    }
564    #[test]
565    fn test_pursuit_evasion() {
566        let game = PursuitEvasionGame::new([0.0, 0.0], [3.0, 4.0], 2.0, 1.0);
567        assert!((game.distance() - 5.0).abs() < 1e-12);
568        assert!(game.pursuer_wins());
569        assert!((game.capture_time_estimate() - 5.0).abs() < 1e-12);
570    }
571    #[test]
572    fn test_stochastic_lyapunov_bound() {
573        let lv = StochasticLyapunov::new(0.5, 0.1);
574        assert!(lv.check(-0.9, 2.0));
575        let v0 = 10.0;
576        assert!((lv.ev_upper_bound(v0, 0.0) - v0).abs() < 1e-12);
577    }
578    #[test]
579    fn test_exponential_ms_stability() {
580        let stab = ExponentialMSStability::new(5.0, 0.5);
581        assert!((stab.bound(0.0) - 5.0).abs() < 1e-12);
582        assert!(stab.bound(10.0) < 0.1);
583        assert!(stab.check(4.9, 0.0));
584        assert!(!stab.check(5.1, 0.0));
585    }
586    #[test]
587    fn test_riccati_solve() {
588        let riccati = RiccatiEquation::new(
589            vec![vec![-1.0]],
590            vec![vec![1.0]],
591            vec![vec![1.0]],
592            vec![vec![1.0]],
593        );
594        let (p, k) = riccati.infinite_horizon_lqr();
595        assert!(p[0][0] > 0.0, "P should be positive");
596        assert!(k[0][0] > 0.0, "K should be positive");
597    }
598    #[test]
599    fn test_build_env_new_axioms() {
600        let mut env = Environment::new();
601        build_env(&mut env);
602        assert!(env.get(&Name::str("POMDP")).is_some());
603        assert!(env.get(&Name::str("BeliefState")).is_some());
604        assert!(env.get(&Name::str("BeliefUpdate")).is_some());
605        assert!(env.get(&Name::str("BeliefMDP")).is_some());
606        assert!(env.get(&Name::str("QMDPApproximation")).is_some());
607        assert!(env.get(&Name::str("EntropicRisk")).is_some());
608        assert!(env.get(&Name::str("CVaROptimization")).is_some());
609        assert!(env.get(&Name::str("CoherentRiskMeasure")).is_some());
610        assert!(env.get(&Name::str("RiskSensitiveBellman")).is_some());
611        assert!(env.get(&Name::str("MinimaxControl")).is_some());
612        assert!(env.get(&Name::str("HInfinityStochastic")).is_some());
613        assert!(env.get(&Name::str("AmbiguitySet")).is_some());
614        assert!(env.get(&Name::str("DistributionallyRobustMDP")).is_some());
615        assert!(env.get(&Name::str("MeanFieldGame")).is_some());
616        assert!(env.get(&Name::str("McKeanVlasovSDE")).is_some());
617        assert!(env.get(&Name::str("MFGNashEquilibrium")).is_some());
618        assert!(env.get(&Name::str("MFGConsistencyCondition")).is_some());
619        assert!(env.get(&Name::str("TwoPlayerZeroSumGame")).is_some());
620        assert!(env.get(&Name::str("ShapleyOperator")).is_some());
621        assert!(env.get(&Name::str("AverageCostMDP")).is_some());
622        assert!(env.get(&Name::str("PoissonEquation")).is_some());
623        assert!(env.get(&Name::str("BiasFunction")).is_some());
624        assert!(env.get(&Name::str("OptimalStopping")).is_some());
625        assert!(env.get(&Name::str("DynkinFormula")).is_some());
626        assert!(env.get(&Name::str("QuasiVariationalInequality")).is_some());
627        assert!(env.get(&Name::str("ImpulseControlPolicy")).is_some());
628        assert!(env.get(&Name::str("CertaintyEquivalence")).is_some());
629        assert!(env.get(&Name::str("SelfTuningRegulator")).is_some());
630        assert!(env.get(&Name::str("QlearningConvergence")).is_some());
631        assert!(env.get(&Name::str("FittedValueIteration")).is_some());
632        assert!(env.get(&Name::str("JointPolicyConvergence")).is_some());
633        assert!(env.get(&Name::str("CorrelatedEquilibrium")).is_some());
634        assert!(env.get(&Name::str("RegretMinimisation")).is_some());
635    }
636    #[test]
637    fn test_belief_update_normalises() {
638        let transitions = vec![vec![vec![1.0, 0.0]], vec![vec![0.0, 1.0]]];
639        let observations = vec![vec![vec![0.9, 0.1]], vec![vec![0.2, 0.8]]];
640        let pomdp = BeliefMDP::new(2, 1, 2, transitions, observations);
641        let belief = vec![0.5, 0.5];
642        let new_belief = pomdp.belief_update(&belief, 0, 0);
643        let total: f64 = new_belief.iter().sum();
644        assert!((total - 1.0).abs() < 1e-10, "belief should sum to 1");
645        assert!((new_belief[0] - 0.45 / 0.55).abs() < 1e-10, "b'(0) ≈ 0.818");
646    }
647    #[test]
648    fn test_qmdp_value() {
649        let transitions = vec![vec![vec![1.0, 0.0]], vec![vec![0.0, 1.0]]];
650        let observations = vec![vec![vec![1.0, 0.0]], vec![vec![0.0, 1.0]]];
651        let pomdp = BeliefMDP::new(2, 1, 2, transitions, observations);
652        let q_star = vec![vec![1.0], vec![5.0]];
653        let belief = vec![0.5, 0.5];
654        let v = pomdp.qmdp_value(&belief, &q_star);
655        assert!((v - 3.0).abs() < 1e-12, "QMDP value should be 3.0");
656    }
657    #[test]
658    fn test_cvar_basic() {
659        let rsc = RiskSensitiveCost::new(0.5, 1.0);
660        let samples: Vec<f64> = (1..=10).map(|x| x as f64).collect();
661        let cvar = rsc.cvar(&samples);
662        assert!(cvar >= 6.0, "CVaR_0.5 should be ≥ 6.0, got {cvar}");
663    }
664    #[test]
665    fn test_entropic_risk() {
666        let rsc = RiskSensitiveCost::new(0.5, 2.0);
667        let samples = vec![1.0_f64; 100];
668        let er = rsc.entropic_risk(&samples);
669        assert!((er - 1.0).abs() < 1e-10, "ρ_θ(1) = 1, got {er}");
670    }
671    #[test]
672    fn test_mean_field_game_solver() {
673        let transitions = vec![
674            vec![vec![1.0, 0.0], vec![0.0, 1.0]],
675            vec![vec![0.0, 1.0], vec![0.0, 1.0]],
676        ];
677        let rewards = vec![vec![0.0, 0.0], vec![1.0, 1.0]];
678        let solver = MeanFieldGameSolver::new(2, 2, transitions, rewards, 0.9, 1e-6, 1000);
679        let (policy, mu) = solver.solve();
680        assert_eq!(policy.len(), 2);
681        let total: f64 = mu.iter().sum();
682        assert!((total - 1.0).abs() < 1e-6, "MFG distribution sums to 1");
683    }
684    #[test]
685    fn test_value_iteration_solver() {
686        let transitions = vec![
687            vec![vec![1.0, 0.0], vec![0.0, 1.0]],
688            vec![vec![0.0, 1.0], vec![0.0, 1.0]],
689        ];
690        let rewards = vec![vec![0.0, 0.0], vec![1.0, 1.0]];
691        let vi = ValueIteration::new(2, 2, transitions, rewards, 0.9);
692        let (v, pi) = vi.run(1e-8, 1000);
693        assert!((v[1] - 10.0).abs() < 0.01, "V*(1) ≈ 10");
694        assert!((v[0] - 9.0).abs() < 0.01, "V*(0) ≈ 9");
695        assert_eq!(pi.len(), 2);
696    }
697    #[test]
698    fn test_q_from_v() {
699        let transitions = vec![
700            vec![vec![1.0, 0.0], vec![0.0, 1.0]],
701            vec![vec![0.0, 1.0], vec![0.0, 1.0]],
702        ];
703        let rewards = vec![vec![0.0, 0.0], vec![1.0, 1.0]];
704        let vi = ValueIteration::new(2, 2, transitions.clone(), rewards.clone(), 0.9);
705        let (v, _) = vi.run(1e-8, 1000);
706        let q = vi.q_from_v(&v);
707        assert!((q[1][0] - 10.0).abs() < 0.1, "Q*(1,0) ≈ 10");
708    }
709    #[test]
710    fn test_span_convergence() {
711        let transitions = vec![
712            vec![vec![1.0, 0.0], vec![0.0, 1.0]],
713            vec![vec![0.0, 1.0], vec![0.0, 1.0]],
714        ];
715        let rewards = vec![vec![0.0, 0.0], vec![1.0, 1.0]];
716        let vi = ValueIteration::new(2, 2, transitions, rewards, 0.9);
717        let (v, _) = vi.run(1e-8, 1000);
718        let span = vi.span(&v);
719        assert!(
720            span < 1e-6,
721            "span should be near 0 at convergence, got {span}"
722        );
723    }
724    #[test]
725    fn test_q_learning_solver_update() {
726        let mut solver = QLearningSolver::new(3, 2, 1.0, 0.9, 0.1);
727        let prev_q = solver.q.clone();
728        solver.update(0, 1, 1.0, 1);
729        assert!(
730            (solver.q[0][1] - prev_q[0][1]).abs() > 1e-12,
731            "Q(0,1) should have been updated"
732        );
733    }
734    #[test]
735    fn test_q_learning_solver_greedy_policy() {
736        let mut solver = QLearningSolver::new(2, 3, 0.5, 0.9, 0.0);
737        solver.q[0][2] = 5.0;
738        solver.q[1][1] = 3.0;
739        let policy = solver.greedy_policy();
740        assert_eq!(policy[0], 2, "state 0 should choose action 2");
741        assert_eq!(policy[1], 1, "state 1 should choose action 1");
742    }
743    #[test]
744    fn test_q_learning_solver_convergence_check() {
745        let solver = QLearningSolver::new(2, 2, 0.5, 0.9, 0.1);
746        let same_q = solver.q.clone();
747        assert!(
748            solver.has_converged(&same_q, 1e-10),
749            "identical Q tables should be converged"
750        );
751        let mut diff_q = same_q.clone();
752        diff_q[0][0] += 1.0;
753        assert!(
754            !solver.has_converged(&diff_q, 1e-10),
755            "different Q tables should not be converged"
756        );
757    }
758}
759#[cfg(test)]
760mod tests_stoch_control_ext {
761    use super::*;
762    #[test]
763    fn test_sd_game_zero_sum() {
764        let mut game = SDGame::zero_sum(1.0);
765        assert!(game.is_zero_sum);
766        assert!(game.saddle_point_exists());
767        let isaacs = game.isaacs_equation();
768        assert!(isaacs.contains("Isaacs"));
769        game.set_value(2.5);
770        assert_eq!(game.value_function, Some(2.5));
771    }
772    #[test]
773    fn test_mean_field_game() {
774        let mfg = MeanFieldGame::new(1000, 0.5);
775        assert!((mfg.convergence_rate - 1.0 / (1000_f64).sqrt()).abs() < 1e-10);
776        let desc = mfg.mfg_system_description();
777        assert!(desc.contains("1000"));
778        let poa = mfg.price_of_anarchy();
779        assert!(poa > 1.0);
780        let master = mfg.master_equation();
781        assert!(master.contains("FP") || master.contains("∂_t"));
782    }
783    #[test]
784    fn test_risk_sensitive_control() {
785        let rsc = RiskSensitiveControl::risk_averse(0.5, 1.0);
786        assert!(rsc.risk_parameter > 0.0);
787        let ce = rsc.certainty_equivalent(1.0, 0.5);
788        assert!((ce - 1.125).abs() < 1e-10);
789        assert!(rsc.is_robust_control_connection());
790        let crit = rsc.exponential_criterion();
791        assert!(crit.contains("Risk-sensitive"));
792    }
793    #[test]
794    fn test_hinf_control() {
795        let hinf = HInfinityControl::new(2.0, 3, 2, 2);
796        assert!(!hinf.is_feasible());
797        let minimax = hinf.minimax_criterion();
798        assert!(minimax.contains("H∞"));
799        let riccati = hinf.game_riccati_equation();
800        assert!(riccati.contains("ARE"));
801    }
802    #[test]
803    fn test_ergodic_control() {
804        let mut ec = ErgodicControl::new(3);
805        ec.set_eigenvalue(1.5);
806        assert_eq!(ec.long_run_cost, Some(1.5));
807        let hjb = ec.ergodic_hjb();
808        assert!(hjb.contains("ergodic"));
809        let tp = ec.turnpike_property();
810        assert!(tp.contains("Turnpike"));
811    }
812    #[test]
813    fn test_pathwise_sde_euler() {
814        let sde = PathwiseSDE::euler_maruyama("ax", "bx", 1.0, 10, 0.01);
815        assert!((sde.strong_order() - 0.5).abs() < 1e-10);
816        assert!((sde.weak_order() - 1.0).abs() < 1e-10);
817        let path = sde.simulate_one_path();
818        assert_eq!(path.len(), 11);
819    }
820    #[test]
821    fn test_pathwise_sde_milstein() {
822        let sde = PathwiseSDE::milstein("ax", "bx", 1.0, 5, 0.01);
823        assert!((sde.strong_order() - 1.0).abs() < 1e-10);
824    }
825}