1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
use rurel::mdp::{Agent, State};
use rurel::strategy::explore::RandomExploration;
use rurel::strategy::learn::QLearning;
use rurel::strategy::terminate::SinkStates;
use rurel::AgentTrainer;
const TARGET: i32 = 100;
const WEIGHT: u8 = 100; #[derive(PartialEq, Eq, Hash, Clone)]
struct CoinState {
balance: i32,
}
#[derive(PartialEq, Eq, Hash, Clone)]
struct CoinAction {
bet: i32,
}
impl State for CoinState {
type A = CoinAction;
fn reward(&self) -> f64 {
if self.balance >= TARGET {
1.0
} else {
0.0
}
}
fn actions(&self) -> Vec<CoinAction> {
let bet_range = {
if self.balance < TARGET / 2 {
1..self.balance + 1
} else {
1..(TARGET - self.balance) + 1
}
};
bet_range.map(|bet| CoinAction { bet }).collect()
}
}
struct CoinAgent {
state: CoinState,
}
impl Agent<CoinState> for CoinAgent {
fn current_state(&self) -> &CoinState {
&self.state
}
fn take_action(&mut self, action: &CoinAction) {
self.state = CoinState {
balance: if rand::random::<u8>() <= WEIGHT {
self.state.balance + action.bet
}
else {
self.state.balance - action.bet
}, }
}
}
fn main() {
const TRIALS: i32 = 100000;
let mut trainer = AgentTrainer::new();
for trial in 0..TRIALS {
let mut agent = CoinAgent {
state: CoinState {
balance: 1 + trial % 98,
},
};
trainer.train(
&mut agent,
&QLearning::new(0.2, 1.0, 0.0),
&mut SinkStates {},
&RandomExploration::new(),
);
}
println!("Balance\tBet\tQ-value");
for balance in 1..TARGET {
let state = CoinState { balance };
let action = trainer.best_action(&state).unwrap();
println!(
"{}\t{}\t{}",
balance,
action.bet,
trainer.expected_value(&state, &action).unwrap(),
);
}
}