1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
use super::{CloneBuild, EnvStructure, Environment, Successor};
use crate::feedback::Reward;
use crate::logging::StatsLogger;
use crate::spaces::{IndexSpace, IndexedTypeSpace, IntervalSpace};
use crate::Prng;
use rand::prelude::*;
use relearn_derive::Indexed;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub struct Chain {
pub size: usize,
pub discount_factor: f64,
}
impl CloneBuild for Chain {}
impl Chain {
#[must_use]
pub const fn new(size: usize, discount_factor: f64) -> Self {
Self {
size,
discount_factor,
}
}
}
impl Default for Chain {
fn default() -> Self {
Self {
size: 5,
discount_factor: 0.95,
}
}
}
impl EnvStructure for Chain {
type ObservationSpace = IndexSpace;
type ActionSpace = IndexedTypeSpace<Move>;
type FeedbackSpace = IntervalSpace<Reward>;
fn observation_space(&self) -> Self::ObservationSpace {
IndexSpace::new(self.size)
}
fn action_space(&self) -> Self::ActionSpace {
Self::ActionSpace::new()
}
fn feedback_space(&self) -> Self::FeedbackSpace {
IntervalSpace::new(Reward(0.0), Reward(10.0))
}
fn discount_factor(&self) -> f64 {
self.discount_factor
}
}
impl Environment for Chain {
type State = usize;
type Observation = usize;
type Action = Move;
type Feedback = Reward;
fn initial_state(&self, _: &mut Prng) -> Self::State {
0
}
fn observe(&self, state: &Self::State, _: &mut Prng) -> Self::Observation {
*state
}
fn step(
&self,
state: Self::State,
action: &Self::Action,
rng: &mut Prng,
_: &mut dyn StatsLogger,
) -> (Successor<Self::State>, Self::Feedback) {
let mut action = *action;
if rng.gen::<f32>() < 0.2 {
action = action.invert();
}
let (next_state, reward) = match action {
Move::Left => (0, 2.0),
Move::Right => {
if state == self.size - 1 {
(state, 10.0)
} else {
(state + 1, 0.0)
}
}
};
(Successor::Continue(next_state), reward.into())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Indexed)]
pub enum Move {
Left,
Right,
}
impl Move {
const fn invert(self) -> Self {
match self {
Self::Left => Self::Right,
Self::Right => Self::Left,
}
}
}
#[cfg(test)]
mod tests {
use super::super::testing;
use super::*;
#[test]
fn run_default() {
testing::check_structured_env(&Chain::default(), 1000, 0);
}
}