1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//! Configuration of [SimpleReplayBuffer](super::SimpleReplayBuffer).
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::{
    default::Default,
    fs::File,
    io::{BufReader, Write},
    path::Path,
};
use super::{WeightNormalizer, WeightNormalizer::All};

/// Configuration for prioritized experience replay.
#[derive(Debug, Deserialize, Serialize, PartialEq, Clone)]
pub struct PerConfig {
    pub(super) alpha: f32,
    /// Initial value of $\beta$.
    pub(super) beta_0: f32,
    /// Final value of $\beta$.
    pub(super) beta_final: f32,
    /// Optimization step when beta reaches its final value.
    pub(super) n_opts_final: usize,
    /// How to normalize the weights.
    pub(super) normalize: WeightNormalizer,
}

impl Default for PerConfig {
    fn default() -> Self {
        Self {
            alpha: 0.6,
            beta_0: 0.4,
            beta_final: 1.0,
            n_opts_final: 500_000,
            normalize: All,
        }
    }
}

impl PerConfig {
    /// Sets alpha, the exponent of sampling probability.
    pub fn alpha(mut self, alpha: f32) -> Self {
        self.alpha = alpha;
        self
    }

    /// Sets beta_0, the initial value of the exponent of importance weights.
    pub fn beta_0(mut self, beta_0: f32) -> Self {
        self.beta_0 = beta_0;
        self
    }

    /// Sets beta_final, the final value of the exponent of importance weights.
    pub fn beta_final(mut self, beta_final: f32) -> Self {
        self.beta_final = beta_final;
        self
    }

    /// Sets the optimization step when beta reaches the final value.
    pub fn n_opts_final(mut self, n_opts_final: usize) -> Self {
        self.n_opts_final = n_opts_final;
        self
    }

    /// Sets how to normalize the importance weight.
    pub fn normalize(mut self, normalize: WeightNormalizer) -> Self {
        self.normalize = normalize;
        self
    }
}

/// Configuration of [SimpleReplayBuffer](super::SimpleReplayBuffer).
#[derive(Debug, Deserialize, Serialize, PartialEq, Clone)]
pub struct SimpleReplayBufferConfig {
    pub(super) capacity: usize,
    pub(super) seed: u64,
    pub(super) per_config: Option<PerConfig>,
}

impl Default for SimpleReplayBufferConfig {
    fn default() -> Self {
        Self {
            capacity: 10000,
            seed: 42,
            per_config: None,
        }
    }
}

impl SimpleReplayBufferConfig {
    /// Sets the capacity of the replay buffer.
    pub fn capacity(mut self, capacity: usize) -> Self {
        self.capacity = capacity;
        self
    }

    /// Sets configuration of PER.
    pub fn per_config(mut self, per_config: Option<PerConfig>) -> Self {
        self.per_config = per_config;
        self
    }

    /// Constructs [SimpleReplayBufferConfig] from YAML file.
    pub fn load(path: impl AsRef<Path>) -> Result<Self> {
        let file = File::open(path)?;
        let rdr = BufReader::new(file);
        let b = serde_yaml::from_reader(rdr)?;
        Ok(b)
    }

    /// Saves [SimpleReplayBufferConfig].
    pub fn save(&self, path: impl AsRef<Path>) -> Result<()> {
        let mut file = File::create(path)?;
        file.write_all(serde_yaml::to_string(&self)?.as_bytes())?;
        Ok(())
    }
}