mbarc_map/
minimally_blocking_atomic_reference_counted_map.rs

1use std::collections::{HashMap, VecDeque};
2use std::hash::Hash;
3use std::marker::PhantomData;
4use std::ptr::NonNull;
5use std::sync::atomic::{AtomicBool, AtomicUsize};
6use std::sync::{Arc, Mutex, MutexGuard};
7
8use crate::data_holder::{DataHolder, SharedDataContainerType, DATA_HOLDER_BLOCK_SIZE_INTERNAL};
9use crate::data_reference::DataReference;
10use crate::fixed_address_continuous_allocation::{FaVec, FaVecIndex};
11
12type HashType<T, U> = HashMap<T, U>;
13
14/// The heart of this crate, a map which is safe to use in threading contexts without wrapping in a Mutex.
15///
16/// This map does not hold any locks beyond the duration of its member functions, it is completely safe to hold the results of get indefinitely, regardless of what happens in other threads, and this map will not cause deadlocks
17/// This means it is safe to:
18/// - Hold multiple [DataReference]s to the same value across multiple threads
19/// - Remove elements while a [DataReference] to that value is held elsewhere
20/// - Drop the Map itself while [DataReference]s exist elsewhere
21/// - Iterate over elements in one thread without taking a lock to the map (iterators hold a reference to each element at time of creation)
22///
23/// Additionally:
24/// - Values are stored in mostly-continuous blocks of memory, and they remain in a fixed address until dropped, making it safe to take references/pointers.
25/// - Values are implicitly wrapped in their own Mutex, requiring only that locks are taken on a per-element basis for data access
26/// - Values are only dropped when all [DataReference]s to them have been dropped
27///
28/// This map is not quite a true HashMap, implementing many non-constant (though still sublinear) operations for managing metadata.
29/// The theory behind this approach, however, is that by keeping mutex lock duration to a minimum and everything else "fast enough", any potential performance losses elsewhere should be more than made up accounted for in practice by allowing more saturated thread usage.
30pub struct MbarcMap<T: Hash + Eq, U> {
31	data: SharedDataContainerType<U>,
32	data_refs: Arc<Mutex<HashType<T, DataReference<U>>>>,
33}
34
35impl<T: Hash + Eq, U> Default for MbarcMap<T, U> {
36	fn default() -> Self {
37		Self::new()
38	}
39}
40
41impl<T: Hash + Eq, U> MbarcMap<T, U> {
42	/// Create a new, empty MbarcMap
43	///
44	/// # Example
45	/// ```
46	/// use std::sync::Arc;
47	/// use mbarc_map::MbarcMap;
48	///
49	/// let concurrent_map = Arc::new(MbarcMap::<u64,String>::new());
50	/// ```
51	pub fn new() -> Self {
52		Self {
53			data: Arc::new(Mutex::new(FaVec::new())),
54			data_refs: Arc::new(Mutex::new(HashType::new())),
55		}
56	}
57
58	/// Inserts a key-value pair into the map.
59	///
60	/// The return of this function is identical to the insert function of the underlying map type used internally to store references.
61	/// Currently, this is std::collections::HashMap
62	pub fn insert(&self, key: T, value: U) -> Option<DataReference<U>> {
63		let mut refs_lock = self.data_refs.lock().unwrap();
64		let mut data_lock = self.data.lock().unwrap();
65
66		let new_holder = DataHolder {
67			ref_count: AtomicUsize::new(1),
68			pending_removal: AtomicBool::new(false),
69			data: Mutex::new(value),
70			owner: self.data.clone(),
71			owning_key: FaVecIndex::from_absolute_index(0),
72		};
73
74		let new_key = data_lock.push(new_holder);
75		let inserted_item = data_lock.get_mut(&new_key).unwrap();
76		inserted_item.owning_key = new_key;
77
78		refs_lock.insert(
79			key,
80			DataReference {
81				ptr: NonNull::new(inserted_item as *mut DataHolder<U>).unwrap(),
82				phantom: PhantomData,
83			},
84		)
85	}
86
87	/// Returns `true` if the map contains a value for the specified key.
88	///
89	/// Note that, in threaded contexts, this is only correct at the moment this function is called.
90	/// It is possible that another thread could add or remove the requested key before you are able to use the return value.
91	///
92	/// If you intend to use the pattern "if contains then get", then using get alone is sufficient
93	pub fn contains(&self, key: &T) -> bool {
94		self.data_refs.lock().unwrap().contains_key(key)
95	}
96
97	/// Returns a [DataReference] to the value corresponding to the key.  If the key is not present, None will be returned
98	///
99	/// Note that, in threaded contexts, it is possible for another thread to potentially remove the value you get before you can use it.
100	/// In cases like this, the value referenced by the returned [DataReference] will not be dropped until all remaining [DataReference] have been dropped
101	pub fn get(&self, key: &T) -> Option<DataReference<U>> {
102		self.data_refs.lock().unwrap().get(key).cloned()
103	}
104
105	/// Returns a [DataReference] to the value corresponding to the key and removes the key/value from this map.  If the key is not present, None will be returned
106	///
107	/// The value referenced by the returned [DataReference] will not be dropped until all remaining [DataReference] have been dropped
108	pub fn remove(&self, key: &T) -> Option<DataReference<U>> {
109		match self.data_refs.lock().unwrap().remove(key) {
110			Some(value_ref) => {
111				value_ref.raw_data().set_deleted();
112				Some(value_ref)
113			}
114			None => None,
115		}
116	}
117
118	/// Returns the number of elements in the map.
119	pub fn len(&self) -> usize {
120		self.data_refs.lock().unwrap().len()
121	}
122
123	///Returns `true` if the map contains no elements.
124	pub fn is_empty(&self) -> bool {
125		self.data_refs.lock().unwrap().is_empty()
126	}
127
128	/// An iterator visiting all values in arbitrary order
129	///
130	/// Important concurrency note: This iterator will represent the state of the map at creation time.
131	/// Adding or removing elements during iteration (in this thread or others) will not have any impact on iteration order, and creation of this iterator has a cost.
132	//TODO: make all iterators generated from macro, to ensure they really are "identical" (????)
133	pub fn iter(
134		&self,
135	) -> crate::minimally_blocking_atomic_reference_counted_map::Iter<DataReference<U>> {
136		let ref_lock = self.data_refs.lock().unwrap();
137		let mut vals = VecDeque::with_capacity(ref_lock.len());
138
139		for value in ref_lock.values() {
140			vals.push_back(value.clone());
141		}
142
143		Iter { items: vals }
144	}
145
146	/// Value only iterator representing the values in this map at the time it is called.  The order of iteration will be the in-memory order of values, and this should be preferred if keys are not needed
147	pub fn iter_copied_values_ordered(&self) -> Iter<DataReference<U>> {
148		let data_lock = self.data.lock().unwrap();
149
150		Iter {
151			items: data_lock.iter().map(|a| a.make_new_ref()).collect(),
152		}
153	}
154
155	/// Exclusive lock on the map for iteration.  This does not clone any element references, therefore requiring a lock is taken on the map.
156	/// This returns a [LockedContainer], which only provides an iter() function, returning the iterator you want.
157	///
158	/// Usage: given some `my_hash: MbarcMap<T,U>`, lock and iterate over it via
159	///
160	/// `for (k,v) in my_hash.iter_exclusive().iter()`
161	pub fn iter_exclusive(&self) -> LockedContainer<'_, T, U> {
162		LockedContainer {
163			items: self.data_refs.lock().unwrap(),
164		}
165	}
166
167	/// Exclusive lock on the map for value iteration.  This does not clone any element references, therefore requiring a lock is taken on the map.
168	/// This returns a [LockedValuesContainer], which only provides an iter() function, returning the iterator you want.
169	///
170	/// Usage: given some `my_hash: MbarcMap<T,U>`, lock and iterate over it via
171	///
172	/// `for (k,v) in my_hash.iter_values_exclusive().iter()`
173	pub fn iter_values_exclusive(&self) -> LockedValuesContainer<'_, U> {
174		LockedValuesContainer {
175			items: self.data.lock().unwrap(),
176		}
177	}
178}
179
180impl<T: Hash + Eq + Clone, U> MbarcMap<T, U> {
181	/// An iterator visiting all key-value pairs in arbitrary order, only for keys which implement Clone
182	///
183	/// Comments regarding concurrency and performance are the same as in [MbarcMap::iter]
184	pub fn iter_cloned_keys(&self) -> Iter<(T, DataReference<U>)> {
185		let ref_lock = self.data_refs.lock().unwrap();
186		let mut vals = VecDeque::with_capacity(ref_lock.len());
187
188		for (key, value) in ref_lock.iter() {
189			vals.push_back((key.clone(), value.clone()));
190		}
191
192		Iter { items: vals }
193	}
194}
195
196impl<T: Hash + Eq + Copy, U> MbarcMap<T, U> {
197	/// An iterator visiting all key-value pairs in arbitrary order, only for keys which implement Copy
198	///
199	/// Comments regarding concurrency and performance are the same as in [MbarcMap::iter]
200	pub fn iter_copied_keys(&self) -> Iter<(T, DataReference<U>)> {
201		let ref_lock = self.data_refs.lock().unwrap();
202		let mut vals = VecDeque::with_capacity(ref_lock.len());
203
204		for (key, value) in ref_lock.iter() {
205			vals.push_back((*key, value.clone()));
206		}
207
208		Iter { items: vals }
209	}
210}
211
212unsafe impl<T: Hash + Eq, U> Send for MbarcMap<T, U> {}
213unsafe impl<T: Hash + Eq, U> Sync for MbarcMap<T, U> {}
214
215impl<T: Hash + Eq, U> Drop for MbarcMap<T, U> {
216	fn drop(&mut self) {
217		let ref_lock = self.data_refs.lock().unwrap();
218
219		for value in ref_lock.values() {
220			value.raw_data().set_deleted();
221		}
222	}
223}
224
225impl<K, V, const N: usize> From<[(K, V); N]> for MbarcMap<K, V>
226where
227	K: Eq + Hash,
228{
229	fn from(arr: [(K, V); N]) -> Self {
230		let map = Self::new();
231
232		for (k, v) in arr {
233			map.insert(k, v);
234		}
235
236		map
237	}
238}
239
240/// An iterator over the entries of a `MbarcMap`.
241///
242/// This `struct` is created by the various [`iter`] methods on [`MbarcMap`]. See its
243/// documentation for more.
244///
245/// [`iter`]: MbarcMap::iter
246pub struct Iter<U> {
247	items: VecDeque<U>,
248}
249
250impl<U> Iterator for Iter<U> {
251	type Item = U;
252
253	fn next(&mut self) -> Option<Self::Item> {
254		self.items.pop_front()
255	}
256}
257
258/// Represents a lock to an [MbarcMap]'s internal map, used exclusively for locked, by-reference iteration
259/// see [`iter_exclusive`]
260///
261/// [`iter_exclusive`]: MbarcMap::iter_exclusive
262pub struct LockedContainer<'a, T, U> {
263	items: MutexGuard<'a, HashType<T, DataReference<U>>>,
264}
265
266impl<'a, T, U> LockedContainer<'a, T, U> {
267	pub fn iter(&self) -> impl Iterator<Item = (&T, &DataReference<U>)> {
268		self.items.iter()
269	}
270}
271
272/// Represents a lock to an [MbarcMap]'s internal map, used exclusively for locked, value-only iteration
273/// see [`iter_values_exclusive`]
274///
275/// [`iter_values_exclusive`]: MbarcMap::iter_values_exclusive
276pub struct LockedValuesContainer<'a, U> {
277	items: MutexGuard<'a, FaVec<DataHolder<U>, DATA_HOLDER_BLOCK_SIZE_INTERNAL>>,
278}
279
280impl<'a, U> LockedValuesContainer<'a, U> {
281	pub fn iter(&'a self) -> impl Iterator<Item = &'a Mutex<U>> {
282		self.items.iter().map(|value| &value.data)
283	}
284}