border_core/base/replay_buffer.rs
1//! Replay buffer interface for reinforcement learning.
2//!
3//! This module defines the core interfaces for experience replay buffers in reinforcement learning.
4//! Replay buffers are essential components that store and sample experiences (transitions)
5//! for training agents, enabling more efficient learning through experience replay.
6
7use anyhow::Result;
8
9/// Interface for buffers that store experiences from environments.
10///
11/// This trait defines the basic operations for storing experiences in a buffer.
12/// It is typically used by processes that need to sample experiences for training.
13///
14/// # Type Parameters
15///
16/// * `Item` - The type of experience stored in the buffer
17///
18/// # Examples
19///
20/// ```ignore
21/// struct SimpleBuffer<T> {
22/// items: Vec<T>,
23/// }
24///
25/// impl<T> ExperienceBufferBase for SimpleBuffer<T> {
26/// type Item = T;
27///
28/// fn push(&mut self, tr: T) -> Result<()> {
29/// self.items.push(tr);
30/// Ok(())
31/// }
32///
33/// fn len(&self) -> usize {
34/// self.items.len()
35/// }
36/// }
37/// ```
38pub trait ExperienceBufferBase {
39 /// The type of items stored in the buffer.
40 ///
41 /// This can be any type that represents an experience or transition
42 /// from the environment.
43 type Item;
44
45 /// Pushes a new experience into the buffer.
46 ///
47 /// # Arguments
48 ///
49 /// * `tr` - The experience to store
50 ///
51 /// # Returns
52 ///
53 /// `Ok(())` if the push was successful, or an error if it failed
54 fn push(&mut self, tr: Self::Item) -> Result<()>;
55
56 /// Returns the current number of experiences in the buffer.
57 ///
58 /// # Returns
59 ///
60 /// The number of experiences currently stored
61 fn len(&self) -> usize;
62}
63
64/// Interface for replay buffers that generate batches for training.
65///
66/// This trait provides functionality for sampling batches of experiences
67/// for training agents. It is independent of [`ExperienceBufferBase`] and
68/// focuses solely on the batch generation process.
69///
70/// # Associated Types
71///
72/// * `Config` - Configuration parameters for the buffer
73/// * `Batch` - The type of batch generated for training
74pub trait ReplayBufferBase {
75 /// Configuration parameters for the replay buffer.
76 ///
77 /// This type must implement `Clone` to support building multiple instances
78 /// with the same configuration.
79 type Config: Clone;
80
81 /// The type of batch generated for training.
82 ///
83 /// This is typically a collection of experiences that can be used
84 /// directly for training an agent.
85 type Batch;
86
87 /// Builds a new replay buffer from the given configuration.
88 ///
89 /// # Arguments
90 ///
91 /// * `config` - The configuration parameters
92 ///
93 /// # Returns
94 ///
95 /// A new instance of the replay buffer
96 fn build(config: &Self::Config) -> Self;
97
98 /// Constructs a batch of experiences for training.
99 ///
100 /// This method samples experiences from the buffer and returns them
101 /// in a format suitable for training.
102 ///
103 /// # Arguments
104 ///
105 /// * `size` - The number of experiences to include in the batch
106 ///
107 /// # Returns
108 ///
109 /// A batch of experiences or an error if sampling failed
110 fn batch(&mut self, size: usize) -> Result<Self::Batch>;
111
112 /// Updates the priorities of experiences in the buffer.
113 ///
114 /// This method is used in prioritized experience replay to adjust
115 /// the sampling probabilities of experiences based on their TD errors.
116 ///
117 /// # Arguments
118 ///
119 /// * `ixs` - Optional indices of experiences to update
120 /// * `td_err` - Optional TD errors for the experiences
121 ///
122 /// # Note
123 ///
124 /// This method is optional and may be moved to a separate trait
125 /// in future versions to better support non-prioritized replay buffers.
126 fn update_priority(&mut self, ixs: &Option<Vec<usize>>, td_err: &Option<Vec<f32>>);
127}
128
129/// A dummy replay buffer that does nothing.
130///
131/// This struct is used as a placeholder when a replay buffer is not needed.
132pub struct NullReplayBuffer;
133
134impl ReplayBufferBase for NullReplayBuffer {
135 type Batch = ();
136 type Config = ();
137
138 #[allow(unused_variables)]
139 fn build(config: &Self::Config) -> Self {
140 Self
141 }
142
143 #[allow(unused_variables)]
144 fn batch(&mut self, size: usize) -> Result<Self::Batch> {
145 unimplemented!();
146 }
147
148 #[allow(unused_variables)]
149 fn update_priority(&mut self, ixs: &Option<Vec<usize>>, td_err: &Option<Vec<f32>>) {
150 unimplemented!();
151 }
152}