#[cfg(feature = "stochastic")]
use crate::Decision;
#[cfg(feature = "stochastic")]
pub trait BanditPolicy {
fn decide(&mut self, arms: &[String]) -> Option<Decision>;
fn update_reward(&mut self, arm: &str, reward: f64);
}
#[cfg(feature = "stochastic")]
impl BanditPolicy for crate::ThompsonSampling {
fn decide(&mut self, arms: &[String]) -> Option<Decision> {
self.decide(arms)
}
fn update_reward(&mut self, arm: &str, reward: f64) {
self.update_reward(arm, reward);
}
}
#[cfg(feature = "stochastic")]
impl BanditPolicy for crate::Exp3Ix {
fn decide(&mut self, arms: &[String]) -> Option<Decision> {
self.decide(arms)
}
fn update_reward(&mut self, arm: &str, reward: f64) {
self.update_reward(arm, reward);
}
}
#[cfg(all(test, feature = "stochastic"))]
mod tests {
use super::*;
use crate::{Exp3Ix, Exp3IxConfig, ThompsonConfig, ThompsonSampling};
fn arms() -> Vec<String> {
vec!["a".to_string(), "b".to_string(), "c".to_string()]
}
fn run_generic<P: BanditPolicy>(p: &mut P) {
let a = arms();
for _ in 0..10 {
if let Some(d) = p.decide(&a) {
p.update_reward(&d.chosen, 0.7);
}
}
}
#[test]
fn thompson_implements_bandit_policy() {
let mut ts = ThompsonSampling::with_seed(ThompsonConfig::default(), 0);
run_generic(&mut ts);
}
#[test]
fn exp3ix_implements_bandit_policy() {
let mut ex = Exp3Ix::new(Exp3IxConfig::default());
run_generic(&mut ex);
}
#[test]
fn bandit_policy_decide_returns_member_of_arms() {
let a = arms();
let mut ts = ThompsonSampling::with_seed(ThompsonConfig::default(), 42);
for _ in 0..20 {
let d = ts.decide(&a).unwrap();
assert!(a.contains(&d.chosen));
ts.update_reward(&d.chosen, 0.5);
}
}
#[test]
fn bandit_policy_returns_none_on_empty_arms() {
let mut ts = ThompsonSampling::with_seed(ThompsonConfig::default(), 0);
assert!(ts.decide(&[]).is_none());
}
}