border_core/base/
replay_buffer.rs

1//! Replay buffer interface for reinforcement learning.
2//!
3//! This module defines the core interfaces for experience replay buffers in reinforcement learning.
4//! Replay buffers are essential components that store and sample experiences (transitions)
5//! for training agents, enabling more efficient learning through experience replay.
6
7use anyhow::Result;
8
9/// Interface for buffers that store experiences from environments.
10///
11/// This trait defines the basic operations for storing experiences in a buffer.
12/// It is typically used by processes that need to sample experiences for training.
13///
14/// # Type Parameters
15///
16/// * `Item` - The type of experience stored in the buffer
17///
18/// # Examples
19///
20/// ```ignore
21/// struct SimpleBuffer<T> {
22///     items: Vec<T>,
23/// }
24///
25/// impl<T> ExperienceBufferBase for SimpleBuffer<T> {
26///     type Item = T;
27///
28///     fn push(&mut self, tr: T) -> Result<()> {
29///         self.items.push(tr);
30///         Ok(())
31///     }
32///
33///     fn len(&self) -> usize {
34///         self.items.len()
35///     }
36/// }
37/// ```
38pub trait ExperienceBufferBase {
39    /// The type of items stored in the buffer.
40    ///
41    /// This can be any type that represents an experience or transition
42    /// from the environment.
43    type Item;
44
45    /// Pushes a new experience into the buffer.
46    ///
47    /// # Arguments
48    ///
49    /// * `tr` - The experience to store
50    ///
51    /// # Returns
52    ///
53    /// `Ok(())` if the push was successful, or an error if it failed
54    fn push(&mut self, tr: Self::Item) -> Result<()>;
55
56    /// Returns the current number of experiences in the buffer.
57    ///
58    /// # Returns
59    ///
60    /// The number of experiences currently stored
61    fn len(&self) -> usize;
62}
63
64/// Interface for replay buffers that generate batches for training.
65///
66/// This trait provides functionality for sampling batches of experiences
67/// for training agents. It is independent of [`ExperienceBufferBase`] and
68/// focuses solely on the batch generation process.
69///
70/// # Associated Types
71///
72/// * `Config` - Configuration parameters for the buffer
73/// * `Batch` - The type of batch generated for training
74pub trait ReplayBufferBase {
75    /// Configuration parameters for the replay buffer.
76    ///
77    /// This type must implement `Clone` to support building multiple instances
78    /// with the same configuration.
79    type Config: Clone;
80
81    /// The type of batch generated for training.
82    ///
83    /// This is typically a collection of experiences that can be used
84    /// directly for training an agent.
85    type Batch;
86
87    /// Builds a new replay buffer from the given configuration.
88    ///
89    /// # Arguments
90    ///
91    /// * `config` - The configuration parameters
92    ///
93    /// # Returns
94    ///
95    /// A new instance of the replay buffer
96    fn build(config: &Self::Config) -> Self;
97
98    /// Constructs a batch of experiences for training.
99    ///
100    /// This method samples experiences from the buffer and returns them
101    /// in a format suitable for training.
102    ///
103    /// # Arguments
104    ///
105    /// * `size` - The number of experiences to include in the batch
106    ///
107    /// # Returns
108    ///
109    /// A batch of experiences or an error if sampling failed
110    fn batch(&mut self, size: usize) -> Result<Self::Batch>;
111
112    /// Updates the priorities of experiences in the buffer.
113    ///
114    /// This method is used in prioritized experience replay to adjust
115    /// the sampling probabilities of experiences based on their TD errors.
116    ///
117    /// # Arguments
118    ///
119    /// * `ixs` - Optional indices of experiences to update
120    /// * `td_err` - Optional TD errors for the experiences
121    ///
122    /// # Note
123    ///
124    /// This method is optional and may be moved to a separate trait
125    /// in future versions to better support non-prioritized replay buffers.
126    fn update_priority(&mut self, ixs: &Option<Vec<usize>>, td_err: &Option<Vec<f32>>);
127}
128
129/// A dummy replay buffer that does nothing.
130///
131/// This struct is used as a placeholder when a replay buffer is not needed.
132pub struct NullReplayBuffer;
133
134impl ReplayBufferBase for NullReplayBuffer {
135    type Batch = ();
136    type Config = ();
137
138    #[allow(unused_variables)]
139    fn build(config: &Self::Config) -> Self {
140        Self
141    }
142
143    #[allow(unused_variables)]
144    fn batch(&mut self, size: usize) -> Result<Self::Batch> {
145        unimplemented!();
146    }
147
148    #[allow(unused_variables)]
149    fn update_priority(&mut self, ixs: &Option<Vec<usize>>, td_err: &Option<Vec<f32>>) {
150        unimplemented!();
151    }
152}