mbarc_map/minimally_blocking_atomic_reference_counted_map.rs
1use std::collections::{HashMap, VecDeque};
2use std::hash::Hash;
3use std::marker::PhantomData;
4use std::ptr::NonNull;
5use std::sync::atomic::{AtomicBool, AtomicUsize};
6use std::sync::{Arc, Mutex, MutexGuard};
7
8use crate::data_holder::{DataHolder, SharedDataContainerType, DATA_HOLDER_BLOCK_SIZE_INTERNAL};
9use crate::data_reference::DataReference;
10use crate::fixed_address_continuous_allocation::{FaVec, FaVecIndex};
11
12type HashType<T, U> = HashMap<T, U>;
13
14/// The heart of this crate, a map which is safe to use in threading contexts without wrapping in a Mutex.
15///
16/// This map does not hold any locks beyond the duration of its member functions, it is completely safe to hold the results of get indefinitely, regardless of what happens in other threads, and this map will not cause deadlocks
17/// This means it is safe to:
18/// - Hold multiple [DataReference]s to the same value across multiple threads
19/// - Remove elements while a [DataReference] to that value is held elsewhere
20/// - Drop the Map itself while [DataReference]s exist elsewhere
21/// - Iterate over elements in one thread without taking a lock to the map (iterators hold a reference to each element at time of creation)
22///
23/// Additionally:
24/// - Values are stored in mostly-continuous blocks of memory, and they remain in a fixed address until dropped, making it safe to take references/pointers.
25/// - Values are implicitly wrapped in their own Mutex, requiring only that locks are taken on a per-element basis for data access
26/// - Values are only dropped when all [DataReference]s to them have been dropped
27///
28/// This map is not quite a true HashMap, implementing many non-constant (though still sublinear) operations for managing metadata.
29/// The theory behind this approach, however, is that by keeping mutex lock duration to a minimum and everything else "fast enough", any potential performance losses elsewhere should be more than made up accounted for in practice by allowing more saturated thread usage.
30pub struct MbarcMap<T: Hash + Eq, U> {
31 data: SharedDataContainerType<U>,
32 data_refs: Arc<Mutex<HashType<T, DataReference<U>>>>,
33}
34
35impl<T: Hash + Eq, U> Default for MbarcMap<T, U> {
36 fn default() -> Self {
37 Self::new()
38 }
39}
40
41impl<T: Hash + Eq, U> MbarcMap<T, U> {
42 /// Create a new, empty MbarcMap
43 ///
44 /// # Example
45 /// ```
46 /// use std::sync::Arc;
47 /// use mbarc_map::MbarcMap;
48 ///
49 /// let concurrent_map = Arc::new(MbarcMap::<u64,String>::new());
50 /// ```
51 pub fn new() -> Self {
52 Self {
53 data: Arc::new(Mutex::new(FaVec::new())),
54 data_refs: Arc::new(Mutex::new(HashType::new())),
55 }
56 }
57
58 /// Inserts a key-value pair into the map.
59 ///
60 /// The return of this function is identical to the insert function of the underlying map type used internally to store references.
61 /// Currently, this is std::collections::HashMap
62 pub fn insert(&self, key: T, value: U) -> Option<DataReference<U>> {
63 let mut refs_lock = self.data_refs.lock().unwrap();
64 let mut data_lock = self.data.lock().unwrap();
65
66 let new_holder = DataHolder {
67 ref_count: AtomicUsize::new(1),
68 pending_removal: AtomicBool::new(false),
69 data: Mutex::new(value),
70 owner: self.data.clone(),
71 owning_key: FaVecIndex::from_absolute_index(0),
72 };
73
74 let new_key = data_lock.push(new_holder);
75 let inserted_item = data_lock.get_mut(&new_key).unwrap();
76 inserted_item.owning_key = new_key;
77
78 refs_lock.insert(
79 key,
80 DataReference {
81 ptr: NonNull::new(inserted_item as *mut DataHolder<U>).unwrap(),
82 phantom: PhantomData,
83 },
84 )
85 }
86
87 /// Returns `true` if the map contains a value for the specified key.
88 ///
89 /// Note that, in threaded contexts, this is only correct at the moment this function is called.
90 /// It is possible that another thread could add or remove the requested key before you are able to use the return value.
91 ///
92 /// If you intend to use the pattern "if contains then get", then using get alone is sufficient
93 pub fn contains(&self, key: &T) -> bool {
94 self.data_refs.lock().unwrap().contains_key(key)
95 }
96
97 /// Returns a [DataReference] to the value corresponding to the key. If the key is not present, None will be returned
98 ///
99 /// Note that, in threaded contexts, it is possible for another thread to potentially remove the value you get before you can use it.
100 /// In cases like this, the value referenced by the returned [DataReference] will not be dropped until all remaining [DataReference] have been dropped
101 pub fn get(&self, key: &T) -> Option<DataReference<U>> {
102 self.data_refs.lock().unwrap().get(key).cloned()
103 }
104
105 /// Returns a [DataReference] to the value corresponding to the key and removes the key/value from this map. If the key is not present, None will be returned
106 ///
107 /// The value referenced by the returned [DataReference] will not be dropped until all remaining [DataReference] have been dropped
108 pub fn remove(&self, key: &T) -> Option<DataReference<U>> {
109 match self.data_refs.lock().unwrap().remove(key) {
110 Some(value_ref) => {
111 value_ref.raw_data().set_deleted();
112 Some(value_ref)
113 }
114 None => None,
115 }
116 }
117
118 /// Returns the number of elements in the map.
119 pub fn len(&self) -> usize {
120 self.data_refs.lock().unwrap().len()
121 }
122
123 ///Returns `true` if the map contains no elements.
124 pub fn is_empty(&self) -> bool {
125 self.data_refs.lock().unwrap().is_empty()
126 }
127
128 /// An iterator visiting all values in arbitrary order
129 ///
130 /// Important concurrency note: This iterator will represent the state of the map at creation time.
131 /// Adding or removing elements during iteration (in this thread or others) will not have any impact on iteration order, and creation of this iterator has a cost.
132 //TODO: make all iterators generated from macro, to ensure they really are "identical" (????)
133 pub fn iter(
134 &self,
135 ) -> crate::minimally_blocking_atomic_reference_counted_map::Iter<DataReference<U>> {
136 let ref_lock = self.data_refs.lock().unwrap();
137 let mut vals = VecDeque::with_capacity(ref_lock.len());
138
139 for value in ref_lock.values() {
140 vals.push_back(value.clone());
141 }
142
143 Iter { items: vals }
144 }
145
146 /// Value only iterator representing the values in this map at the time it is called. The order of iteration will be the in-memory order of values, and this should be preferred if keys are not needed
147 pub fn iter_copied_values_ordered(&self) -> Iter<DataReference<U>> {
148 let data_lock = self.data.lock().unwrap();
149
150 Iter {
151 items: data_lock.iter().map(|a| a.make_new_ref()).collect(),
152 }
153 }
154
155 /// Exclusive lock on the map for iteration. This does not clone any element references, therefore requiring a lock is taken on the map.
156 /// This returns a [LockedContainer], which only provides an iter() function, returning the iterator you want.
157 ///
158 /// Usage: given some `my_hash: MbarcMap<T,U>`, lock and iterate over it via
159 ///
160 /// `for (k,v) in my_hash.iter_exclusive().iter()`
161 pub fn iter_exclusive(&self) -> LockedContainer<'_, T, U> {
162 LockedContainer {
163 items: self.data_refs.lock().unwrap(),
164 }
165 }
166
167 /// Exclusive lock on the map for value iteration. This does not clone any element references, therefore requiring a lock is taken on the map.
168 /// This returns a [LockedValuesContainer], which only provides an iter() function, returning the iterator you want.
169 ///
170 /// Usage: given some `my_hash: MbarcMap<T,U>`, lock and iterate over it via
171 ///
172 /// `for (k,v) in my_hash.iter_values_exclusive().iter()`
173 pub fn iter_values_exclusive(&self) -> LockedValuesContainer<'_, U> {
174 LockedValuesContainer {
175 items: self.data.lock().unwrap(),
176 }
177 }
178}
179
180impl<T: Hash + Eq + Clone, U> MbarcMap<T, U> {
181 /// An iterator visiting all key-value pairs in arbitrary order, only for keys which implement Clone
182 ///
183 /// Comments regarding concurrency and performance are the same as in [MbarcMap::iter]
184 pub fn iter_cloned_keys(&self) -> Iter<(T, DataReference<U>)> {
185 let ref_lock = self.data_refs.lock().unwrap();
186 let mut vals = VecDeque::with_capacity(ref_lock.len());
187
188 for (key, value) in ref_lock.iter() {
189 vals.push_back((key.clone(), value.clone()));
190 }
191
192 Iter { items: vals }
193 }
194}
195
196impl<T: Hash + Eq + Copy, U> MbarcMap<T, U> {
197 /// An iterator visiting all key-value pairs in arbitrary order, only for keys which implement Copy
198 ///
199 /// Comments regarding concurrency and performance are the same as in [MbarcMap::iter]
200 pub fn iter_copied_keys(&self) -> Iter<(T, DataReference<U>)> {
201 let ref_lock = self.data_refs.lock().unwrap();
202 let mut vals = VecDeque::with_capacity(ref_lock.len());
203
204 for (key, value) in ref_lock.iter() {
205 vals.push_back((*key, value.clone()));
206 }
207
208 Iter { items: vals }
209 }
210}
211
212unsafe impl<T: Hash + Eq, U> Send for MbarcMap<T, U> {}
213unsafe impl<T: Hash + Eq, U> Sync for MbarcMap<T, U> {}
214
215impl<T: Hash + Eq, U> Drop for MbarcMap<T, U> {
216 fn drop(&mut self) {
217 let ref_lock = self.data_refs.lock().unwrap();
218
219 for value in ref_lock.values() {
220 value.raw_data().set_deleted();
221 }
222 }
223}
224
225impl<K, V, const N: usize> From<[(K, V); N]> for MbarcMap<K, V>
226where
227 K: Eq + Hash,
228{
229 fn from(arr: [(K, V); N]) -> Self {
230 let map = Self::new();
231
232 for (k, v) in arr {
233 map.insert(k, v);
234 }
235
236 map
237 }
238}
239
240/// An iterator over the entries of a `MbarcMap`.
241///
242/// This `struct` is created by the various [`iter`] methods on [`MbarcMap`]. See its
243/// documentation for more.
244///
245/// [`iter`]: MbarcMap::iter
246pub struct Iter<U> {
247 items: VecDeque<U>,
248}
249
250impl<U> Iterator for Iter<U> {
251 type Item = U;
252
253 fn next(&mut self) -> Option<Self::Item> {
254 self.items.pop_front()
255 }
256}
257
258/// Represents a lock to an [MbarcMap]'s internal map, used exclusively for locked, by-reference iteration
259/// see [`iter_exclusive`]
260///
261/// [`iter_exclusive`]: MbarcMap::iter_exclusive
262pub struct LockedContainer<'a, T, U> {
263 items: MutexGuard<'a, HashType<T, DataReference<U>>>,
264}
265
266impl<'a, T, U> LockedContainer<'a, T, U> {
267 pub fn iter(&self) -> impl Iterator<Item = (&T, &DataReference<U>)> {
268 self.items.iter()
269 }
270}
271
272/// Represents a lock to an [MbarcMap]'s internal map, used exclusively for locked, value-only iteration
273/// see [`iter_values_exclusive`]
274///
275/// [`iter_values_exclusive`]: MbarcMap::iter_values_exclusive
276pub struct LockedValuesContainer<'a, U> {
277 items: MutexGuard<'a, FaVec<DataHolder<U>, DATA_HOLDER_BLOCK_SIZE_INTERNAL>>,
278}
279
280impl<'a, U> LockedValuesContainer<'a, U> {
281 pub fn iter(&'a self) -> impl Iterator<Item = &'a Mutex<U>> {
282 self.items.iter().map(|value| &value.data)
283 }
284}