1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
use std::marker::PhantomData;

use num_traits::ToPrimitive;

use crate::agent::agent::update;
use crate::Stepper;

use super::{Agent, ArgBounds};

/// Agent that follows the Greedy Algorithm.
///
/// Always chooses the arm with the highest estimated reward.
pub struct GreedyAgent<T> {
    /// The current estimates of the Bandit arm values.
    q_star: Vec<f64>,

    /// The Agent's rule for step size updates.
    stepper: Box<dyn Stepper>,
    phantom: PhantomData<T>,
}

impl<T: ToPrimitive> Agent<T> for GreedyAgent<T> {
    /// The action chosen by the Agent. Picks the arm with the highest estimated return.
    fn action(&self) -> usize {
        self.q_star.arg_max()
    }

    /// The number of arms in the Bandit the Agent is playing.
    fn arms(&self) -> usize {
        self.q_star.len()
    }

    /// The Agent's current estimate of the value of a Bandit's arm.
    fn current_estimate(&self, arm: usize) -> f64 {
        self.q_star[arm]
    }

    /// Reset the Agent's history and give it a new initial guess of the Bandit's arm values.
    fn reset(&mut self, q_init: &[f64]) {
        self.q_star = q_init.to_owned();
        self.stepper.reset()
    }

    /// Update the Agent's estimate of a Bandit arm based on a given reward.
    fn step(&mut self, arm: usize, reward: T) {
        self.q_star[arm] += update(&mut self.stepper, &self.q_star, arm, reward)
    }
}

impl<T> GreedyAgent<T> {
    /// Initializes a new Greedy agent.
    pub fn new(q_init: Vec<f64>, stepper: Box<dyn Stepper>) -> GreedyAgent<T> {
        GreedyAgent {
            q_star: q_init,
            stepper,
            phantom: PhantomData,
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::HarmonicStepper;

    use super::{Agent, GreedyAgent};

    #[test]
    fn test_action() {
        let Q_INIT = vec![0.5, 0.61, 0.7, 0.12, 0.37];
        let stepper = HarmonicStepper::new(1, Q_INIT.len());
        let greedy: GreedyAgent<u32> = GreedyAgent::new(Q_INIT, Box::new(stepper));
        assert_eq!(greedy.action(), 2)
    }

    #[test]
    fn test_q_star() {
        let Q_INIT = vec![0.5, 0.61, 0.7, 0.12, 0.37];
        let stepper = HarmonicStepper::new(1, Q_INIT.len());
        let greedy: GreedyAgent<u32> = GreedyAgent::new(Q_INIT, Box::new(stepper));
        assert_eq!(greedy.q_star, vec![0.5, 0.61, 0.7, 0.12, 0.37])
    }

    #[test]
    fn test_reset() {
        let Q_INIT = vec![0.5, 0.61, 0.7, 0.12, 0.37];
        let mut stepper = HarmonicStepper::new(1, Q_INIT.len());
        let mut greedy: GreedyAgent<u32> = GreedyAgent::new(Q_INIT.to_vec(), Box::new(stepper));
        let new_q = vec![0.01, 0.86, 0.43, 0.65, 0.66];
        greedy.reset(&new_q);
        assert_eq!(greedy.q_star, new_q)
    }
}