pub fn simulate(
mdp: &Mdp,
policy: &[usize],
initial_state: usize,
n_steps: usize,
seed: u64,
) -> (Vec<usize>, Vec<usize>, Vec<f64>)Expand description
Simulate an MDP with a fixed deterministic policy.
Returns (states, actions, rewards) trajectories of length n_steps.