1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
use super::*;
use crate::utils::{compare_floats, Random};
use std::sync::Arc;
pub struct QLearning {
alpha: f64,
gamma: f64,
}
impl QLearning {
pub fn new(alpha: f64, gamma: f64) -> Self {
Self { alpha, gamma }
}
}
impl<S: State> LearningStrategy<S> for QLearning {
fn value(&self, reward_value: f64, old_value: f64, estimates: &ActionEstimates<S>) -> f64 {
let next_max = estimates.max.as_ref().map_or(0., |(_, v)| *v);
old_value + self.alpha * (reward_value + self.gamma * next_max - old_value)
}
}
pub struct MonteCarlo {
alpha: f64,
}
impl MonteCarlo {
pub fn new(alpha: f64) -> Self {
Self { alpha }
}
}
impl<S: State> LearningStrategy<S> for MonteCarlo {
fn value(&self, reward_value: f64, old_value: f64, _estimates: &ActionEstimates<S>) -> f64 {
old_value + self.alpha * (reward_value - old_value)
}
}
pub struct EpsilonGreedy {
epsilon: f64,
random: Arc<dyn Random + Send + Sync>,
}
impl EpsilonGreedy {
pub fn new(epsilon: f64, random: Arc<dyn Random + Send + Sync>) -> Self {
Self { epsilon, random }
}
}
impl<S: State> PolicyStrategy<S> for EpsilonGreedy {
fn select(&self, estimates: &ActionEstimates<S>) -> Option<S::Action> {
if estimates.data().is_empty() {
return None;
}
if self.random.is_hit(self.epsilon) {
estimates.random(self.random.as_ref())
} else {
estimates.data().iter().max_by(|(_, x), (_, y)| compare_floats(**x, **y)).map(|(a, _)| a.clone())
}
}
}
pub struct Greedy;
impl Default for Greedy {
fn default() -> Self {
Self {}
}
}
impl<S: State> PolicyStrategy<S> for Greedy {
fn select(&self, estimates: &ActionEstimates<S>) -> Option<S::Action> {
estimates.data().iter().max_by(|(_, x), (_, y)| compare_floats(**x, **y)).map(|(a, _)| a.clone())
}
}
pub struct EpsilonWeighted {
epsilon: f64,
random: Arc<dyn Random + Send + Sync>,
}
impl EpsilonWeighted {
pub fn new(epsilon: f64, random: Arc<dyn Random + Send + Sync>) -> Self {
Self { epsilon, random }
}
}
impl<S: State> PolicyStrategy<S> for EpsilonWeighted {
fn select(&self, estimates: &ActionEstimates<S>) -> Option<S::Action> {
if estimates.data().is_empty() {
return None;
}
if self.random.is_hit(self.epsilon) {
estimates.random(self.random.as_ref())
} else {
estimates.weighted(self.random.as_ref())
}
}
}