#[cfg(test)]
#[path = "../../../tests/unit/algorithms/rl/slot_machine_test.rs"]
mod slot_machine_test;
use crate::utils::DistributionSampler;
use std::fmt::{Display, Formatter};
pub trait SlotAction {
type Context;
type Feedback: SlotFeedback;
fn take(&self, context: Self::Context) -> Self::Feedback;
}
pub trait SlotFeedback {
fn reward(&self) -> f64;
}
#[derive(Clone)]
pub struct SlotMachine<A, S> {
n: usize,
alpha: f64,
beta: f64,
mu: f64,
v: f64,
sampler: S,
action: A,
}
impl<A, S> SlotMachine<A, S>
where
A: SlotAction + Clone,
S: DistributionSampler + Clone,
{
pub fn new(prior_mean: f64, action: A, sampler: S) -> Self {
let alpha = 1.;
let beta = 10.;
let mu = prior_mean;
let v = beta / (alpha + 1.);
Self { n: 0, alpha, beta, mu, v, action, sampler }
}
pub fn sample(&self) -> f64 {
let precision = self.sampler.gamma(self.alpha, 1. / self.beta);
let precision = if precision == 0. || self.n == 0 { 0.001 } else { precision };
let variance = 1. / precision;
self.sampler.normal(self.mu, variance.sqrt())
}
pub fn play(&self, context: A::Context) -> A::Feedback {
self.action.take(context)
}
pub fn update(&mut self, feedback: &A::Feedback) {
let reward = feedback.reward();
let n = 1.;
let v = self.n as f64;
self.alpha += n / 2.;
self.beta += (n * v / (v + n)) * (reward - self.mu).powi(2) / 2.;
self.v = self.beta / (self.alpha + 1.);
self.n += 1;
self.mu += (reward - self.mu) / self.n as f64;
}
pub fn get_params(&self) -> (f64, f64, f64, f64, usize) {
(self.alpha, self.beta, self.mu, self.v, self.n)
}
}
impl<T, S> Display for SlotMachine<T, S>
where
T: Clone,
S: Clone,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "n={},alpha={},beta={},mu={},v={}", self.n, self.alpha, self.beta, self.mu, self.v)
}
}