use crate::bandits::arm::{Arm, MultiArm};
use crate::bandits::bandit::Bandit;
#[derive(Clone, Debug)]
pub struct BenchmarkResult {
pub average_reward_history: Vec<Vec<f64>>,
pub optimal_action_percentage_history: Option<Vec<Vec<f64>>>,
}
pub struct Benchmark<A: Arm> {
pub arm: MultiArm<A>,
pub bandits: Vec<Box<dyn Bandit>>,
}
impl<A: Arm> Benchmark<A> {
pub fn run(&mut self, runs: usize, steps: usize) -> BenchmarkResult {
let optimal_arm = self.arm.optimal_arm();
let mut average_reward_history = vec![vec![0.0; steps]; self.bandits.len()];
let mut optimal_action_percentage_history = vec![vec![0.0; steps]; self.bandits.len()];
for _ in 0..runs {
self.bandits.iter_mut().for_each(|bandit| bandit.restart());
for t in 0..steps {
for (i, bandit) in self.bandits.iter_mut().enumerate() {
let arm = bandit.select_arm();
let reward = self.arm.pull(arm);
average_reward_history[i][t] += reward;
if optimal_arm.map(|j| j == arm).unwrap_or(false) {
optimal_action_percentage_history[i][t] += 1.0;
}
bandit.receive_reward(reward);
}
}
}
for t in 0..steps {
for i in 0..self.bandits.len() {
average_reward_history[i][t] /= runs as f64;
optimal_action_percentage_history[i][t] /= runs as f64;
}
}
BenchmarkResult {
average_reward_history,
optimal_action_percentage_history: optimal_arm
.map(|_| optimal_action_percentage_history),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::bandits::arm::RandomArm;
use crate::bandits::bandit::StochasticBandit;
use rand_distr::{Distribution, Normal};
#[test]
fn test() {
let multi_arm = MultiArm::new(
Normal::new(0.0, 1.0)
.unwrap()
.sample_iter(&mut rand::thread_rng())
.take(10)
.map(RandomArm::normal)
.collect(),
);
let result = Benchmark {
arm: multi_arm,
bandits: vec![Box::new(StochasticBandit::greedy(10))],
}
.run(10, 100);
assert_eq!(result.average_reward_history.len(), 1);
assert!(result.optimal_action_percentage_history.is_some());
}
}