mbarc_map/
fixed_address_continuous_allocation.rs

1use std::collections::btree_map::Entry;
2use std::collections::{BTreeMap, BTreeSet};
3
4struct DataBlock<T, const BLOCK_SIZE: usize> {
5	free_space: usize,
6	data: [Option<T>; BLOCK_SIZE],
7}
8
9impl<T, const BLOCK_SIZE: usize> DataBlock<T, BLOCK_SIZE> {
10	const EMPTY_ELEMENT: Option<T> = None;
11
12	fn new() -> Self {
13		Self {
14			free_space: BLOCK_SIZE,
15			data: [Self::EMPTY_ELEMENT; BLOCK_SIZE],
16		}
17	}
18
19	fn insert(&mut self, val: T) -> usize {
20		assert!(self.free_space > 0);
21
22		//the above assert means this MUST find a value, assuming free_space is accurate
23		let mut stored_offset = 0;
24		//TODO: improve this loop further.  maybe iters like find?
25		for (i, item) in self.data.iter_mut().enumerate() {
26			if item.is_none() {
27				let _ = item.insert(val);
28				stored_offset = i;
29				break;
30			}
31			//TODO: if we get here and i is last element, we should maybe panic with std::unreachable
32		}
33		self.free_space -= 1;
34
35		stored_offset
36	}
37}
38
39//#[derive(Copy, Clone)]
40pub(crate) struct FaVecIndex<const BLOCK_SIZE: usize> {
41	absolute_index: usize,
42	//TODO: can store pieces individually, determine if this is worth doing
43	//block_index:usize,
44	//offset:usize,
45}
46
47impl<const BLOCK_SIZE: usize> FaVecIndex<BLOCK_SIZE> {
48	fn index_from_block_offset(block_index: usize, offset: usize) -> Self {
49		FaVecIndex {
50			absolute_index: block_index * BLOCK_SIZE + offset,
51		}
52	}
53
54	fn index_to_block_offset(&self) -> (usize, usize) {
55		(
56			self.absolute_index / BLOCK_SIZE,
57			self.absolute_index % BLOCK_SIZE,
58		)
59	}
60
61	fn as_absolute_index(&self) -> usize {
62		self.absolute_index
63	}
64	pub(crate) fn from_absolute_index(absolute_index: usize) -> Self {
65		FaVecIndex { absolute_index }
66	}
67}
68
69pub(crate) struct FaVec<T, const BLOCK_SIZE: usize> {
70	//TODO: we use Box to keep data_blocks small and allow efficient resize, however we also only add to/remove from data_blocks at the end index.
71	//TODO: Determine if it's most efficient to a) keep as-is, b) remove the Box, or c) remove empty blocks from the middle as well, rather than just the end
72	//TODO: keep in mind that, if Box is removed here, we still must ensure that individual elements are pointer safe, ie never moved in memory
73	data_blocks: Vec<Box<DataBlock<T, BLOCK_SIZE>>>,
74
75	//maps free space to a set of indexes (in data)
76	free_space_map: BTreeMap<usize, BTreeSet<usize>>,
77}
78
79impl<T, const BLOCK_SIZE: usize> FaVec<T, BLOCK_SIZE> {
80	//need to ensure block*blocksize+offset<=usize::MAX -1
81	//max for offset is blocksize-1
82	//block*blocksize+blocksize-1 <= usize::max - 1
83	//block*(blocksize+1) <= usize::max
84	//block <= usize::max / (blocksize+1)
85	//if block = usize::max/(blocksize+1) and we are full, push should panic instead of adding new data
86	const MAX_BLOCK_COUNT: usize = usize::MAX / (BLOCK_SIZE + 1);
87
88	pub fn new() -> Self {
89		let mut initial_set = BTreeSet::new();
90		initial_set.insert(0usize);
91		let mut initial_map = BTreeMap::new();
92		initial_map.insert(BLOCK_SIZE, initial_set);
93
94		Self {
95			data_blocks: vec![Box::new(DataBlock::new())],
96			free_space_map: initial_map,
97		}
98	}
99
100	fn update_block_free_spaces(&mut self, block_index: usize, new_free_space: usize) {
101		assert!(block_index < self.data_blocks.len());
102		assert!(new_free_space <= BLOCK_SIZE);
103
104		//println!("updating block {} free space to {}", block_index, new_free_space);
105
106		assert!(block_index < self.data_blocks.len());
107		let block = self.data_blocks[block_index].as_mut();
108		let old_free_space = block.free_space;
109
110		//println!("block {} previously had {} free indices", block_index, old_free_space);
111
112		assert!(self.free_space_map.contains_key(&old_free_space));
113		let old_free_blocks = self.free_space_map.get_mut(&old_free_space).unwrap();
114		assert!(!old_free_blocks.is_empty());
115		assert!(old_free_blocks.contains(&block_index));
116		old_free_blocks.remove(&block_index);
117
118		if old_free_blocks.is_empty() {
119			self.free_space_map.remove(&old_free_space);
120
121			//println!("block {} was the last block with {} free indices, removing", block_index, old_free_space);
122		}
123
124		match self.free_space_map.entry(new_free_space) {
125			Entry::Occupied(mut entry) => {
126				entry.get_mut().insert(block_index);
127
128				//println!("block {} was added to set with {} free indices", block_index, new_free_space);
129			}
130			Entry::Vacant(entry) => {
131				let mut new_set = BTreeSet::new();
132				new_set.insert(block_index);
133				entry.insert(new_set);
134
135				//println!("block {} was added to new set with {} free indices", block_index, new_free_space);
136			}
137		}
138
139		block.free_space = new_free_space;
140	}
141
142	fn get_or_create_block_for_insert(&mut self) -> usize {
143		//the current strategy here is to first, find the block(s) with the least amount of non-zero free space remaining
144		//we want to concentrate data to "hot" blocks, where there's already lots of other data, so we have fewer gaps
145		//from these blocks, we then select the lowest-index block, so that high index blocks are more likely to empty, and thus be dropped later during remove()
146
147		let lowest_free_space = self.free_space_map.keys().find(|key| **key > 0);
148
149		//if no blocks with more than 0 free space, need to make a new block
150		let lowest_free_space = match lowest_free_space {
151			Some(lowest_free_space) => {
152				//println!("lowest has {} free blocks", lowest_free_space);
153				*lowest_free_space
154			}
155			None => {
156				if self.data_blocks.len() == Self::MAX_BLOCK_COUNT {
157					//TODO: technically we can avoid this
158					//consider instead using a struct that wraps the block index + block number, rather than packing them together at the end
159					//this, however, comes at a cost of having a more complex type for hashing purposes
160					std::process::abort(); //TODO: is abort the right exit here?
161				}
162
163				//add new data block
164				let new_index = self.data_blocks.len();
165				self.data_blocks.push(Box::new(DataBlock::new()));
166
167				//track the new block's free space in our map
168				let mut new_set = BTreeSet::new();
169				new_set.insert(new_index);
170				self.free_space_map.insert(BLOCK_SIZE, new_set);
171
172				//we now have a block that's completely empty
173				BLOCK_SIZE
174			}
175		};
176
177		//pick the lowest-indexed block (with the least free space)
178		//println!("{:?}", self.free_space_map);
179		let possible_blocks = self.free_space_map.get_mut(&lowest_free_space).unwrap();
180		assert!(!possible_blocks.is_empty());
181		let block_index = possible_blocks.pop_first().unwrap();
182
183		//if no more blocks have lowest_free_space left, then remove the index
184		if possible_blocks.is_empty() {
185			self.free_space_map.remove(&lowest_free_space);
186		}
187
188		//println!("{:?}", self.free_space_map);
189		assert!(block_index < self.data_blocks.len());
190		block_index
191	}
192
193	//TODO: insert?
194	pub fn push(&mut self, val: T) -> FaVecIndex<BLOCK_SIZE> {
195		let block_index = self.get_or_create_block_for_insert();
196
197		//find an index in our block to insert the new data
198		let data_block = self.data_blocks.get_mut(block_index).unwrap();
199		let stored_offset = data_block.insert(val);
200
201		//store the block's index back in its proper place in the free space map
202		match self.free_space_map.get_mut(&data_block.free_space) {
203			Some(block_indexes) => {
204				block_indexes.insert(block_index);
205			}
206			None => {
207				let mut new_indexes = BTreeSet::new();
208				new_indexes.insert(block_index);
209
210				self.free_space_map
211					.insert(data_block.free_space, new_indexes);
212			}
213		}
214
215		//println!("added item, free space map is now: {:?}", self.free_space_map);
216
217		FaVecIndex::index_from_block_offset(block_index, stored_offset)
218	}
219
220	pub fn get(&self, index: &FaVecIndex<BLOCK_SIZE>) -> Option<&T> {
221		let (block_index, offset) = FaVecIndex::index_to_block_offset(index);
222		//let (block_index, offset) = (index.block_index,index.offset);
223
224		if block_index >= self.data_blocks.len() || offset >= BLOCK_SIZE {
225			return None;
226		}
227
228		match self.data_blocks.get(block_index) {
229			Some(block) => block.data[offset].as_ref(),
230			None => None,
231		}
232	}
233
234	//todo: evaluate necessity of mut
235	pub fn get_mut(&mut self, index: &FaVecIndex<BLOCK_SIZE>) -> Option<&mut T> {
236		let (block_index, offset) = FaVecIndex::index_to_block_offset(index);
237
238		if block_index >= self.data_blocks.len() || offset >= BLOCK_SIZE {
239			return None;
240		}
241
242		match self.data_blocks.get_mut(block_index) {
243			Some(block) => block.data[offset].as_mut(),
244			None => None,
245		}
246	}
247
248	fn prune_end_blocks(&mut self) {
249		if let Some(empty_blocks) = self.free_space_map.get_mut(&BLOCK_SIZE) {
250			//keep at least one block allocation, but otherwise we can remove empty blocks from the end
251			while self.data_blocks.len() > 1
252				&& self.data_blocks.last().unwrap().free_space == BLOCK_SIZE
253			{
254				self.data_blocks.pop();
255
256				//new length is the index of the popped element
257				//equivalent to using len()-1 before the above pop as long as len>0, len cannot be zero due to loop conditions
258				let old_end_index = self.data_blocks.len();
259
260				assert!(!empty_blocks.is_empty());
261				empty_blocks.remove(&old_end_index);
262			}
263
264			if empty_blocks.is_empty() {
265				self.free_space_map.remove(&BLOCK_SIZE);
266			}
267		};
268	}
269
270	//todo: evaluate necessity of mut
271	pub fn remove(&mut self, index: &FaVecIndex<BLOCK_SIZE>) -> Option<T> {
272		let (block_index, offset) = FaVecIndex::index_to_block_offset(index);
273
274		assert!(block_index < self.data_blocks.len());
275		assert!(offset < BLOCK_SIZE);
276
277		if block_index >= self.data_blocks.len() || offset >= BLOCK_SIZE {
278			//TODO: consider panic instead
279			return None;
280		}
281
282		let removed_item = match self.data_blocks.get_mut(block_index) {
283			Some(block) => {
284				let removed = block.data.get_mut(offset).unwrap().take();
285
286				if removed.is_some() {
287					let _old_free_space = block.free_space; //for debugging
288
289					let new_free_space = block.free_space + 1;
290					self.update_block_free_spaces(block_index, new_free_space);
291
292					assert!(self.free_space_map.contains_key(&new_free_space));
293					assert!(!self.free_space_map[&new_free_space].is_empty());
294
295					//TODO: while this should keep the map more compact, it also may cause more allocation/deallocation, which can have a cost
296					//testing seems to indicate within-margin runtime, but keep a close eye on this one
297					self.prune_end_blocks();
298				}
299
300				removed
301			}
302			None => None,
303		};
304
305		//println!("removed item, free space map is now: {:?}", self.free_space_map);
306		removed_item
307	}
308
309	pub fn capacity(&self) -> usize {
310		self.data_blocks.len() * BLOCK_SIZE
311	}
312
313	pub fn len(&self) -> usize {
314		self.free_space_map
315			.iter()
316			.fold(0, |acc, (free_space, items)| {
317				let used_space = BLOCK_SIZE - free_space;
318				let block_count = items.len();
319
320				acc + used_space * block_count
321			})
322	}
323
324	pub fn iter(&self) -> FaVecIter<T, BLOCK_SIZE> {
325		FaVecIter {
326			block_index: 0,
327			offset: 0,
328			real_count: 0,
329			fa_vec: self,
330		}
331	}
332}
333
334pub struct FaVecIter<'a, T, const BLOCK_SIZE: usize> {
335	block_index: usize,
336	offset: usize,
337	real_count: usize,
338
339	fa_vec: &'a FaVec<T, BLOCK_SIZE>,
340}
341
342impl<'a, T, const BLOCK_SIZE: usize> FaVecIter<'a, T, BLOCK_SIZE> {
343	fn increment(&mut self) {
344		self.offset += 1;
345		if self.offset >= BLOCK_SIZE {
346			self.offset = 0;
347			self.block_index += 1;
348		}
349	}
350}
351
352impl<'a, T, const BLOCK_SIZE: usize> Iterator for FaVecIter<'a, T, BLOCK_SIZE> {
353	type Item = &'a T;
354
355	fn next(&mut self) -> Option<Self::Item> {
356		while self.block_index < self.fa_vec.data_blocks.len() {
357			let block_ref = self.fa_vec.data_blocks.get(self.block_index).unwrap();
358			let item = &block_ref.data[self.offset];
359
360			self.increment();
361
362			if item.is_some() {
363				self.real_count += 1;
364				return item.as_ref();
365			}
366		}
367		None
368	}
369
370	fn size_hint(&self) -> (usize, Option<usize>) {
371		//TODO: cache len?
372		let size = self.fa_vec.len() - self.real_count;
373		(size, Some(size))
374	}
375}
376
377#[cfg(test)]
378mod tests {
379	use std::collections::BTreeMap;
380
381	use rand::prelude::*;
382	use rand_chacha::ChaCha8Rng;
383
384	use crate::fixed_address_continuous_allocation::{FaVec, FaVecIndex};
385
386	const TEST_BLOCK_SIZE: usize = 512;
387
388	#[test]
389	fn fa_vec_properties_test_random_io() {
390		//randomly add and remove elements in a 2:1 ratio until data reaches a certain count
391
392		const ITEM_COUNT: usize = 100000;
393
394		let mut rng = ChaCha8Rng::seed_from_u64(0xDEADBEEF);
395		let mut vec = FaVec::<i64, TEST_BLOCK_SIZE>::new();
396		let mut keys = Vec::<FaVecIndex<TEST_BLOCK_SIZE>>::new();
397
398		//note, this access pattern is pretty basic, as it is very unlikely to leave holes in the vec
399		while keys.len() < ITEM_COUNT {
400			let remove_element = rng.random_ratio(1, 3);
401
402			if remove_element && !keys.is_empty() {
403				let key_index = rng.random_range(0..keys.len());
404				let key = keys.swap_remove(key_index);
405				vec.remove(&key);
406			} else {
407				let new_value = rng.random::<i64>();
408				let new_key = vec.push(new_value);
409				keys.push(new_key);
410			}
411		}
412
413		//this output should visually prove there's a random-ish (But generally increasing) order to inserted indexes
414		//it's noisy, but worth looking at if there's an issue
415		//println!("indices inserted: {:?}",keys);
416
417		assert_eq!(vec.len(), keys.len());
418		validate_vec_properties(&mut vec);
419	}
420
421	#[test]
422	fn fa_vec_properties_test_add_remove_add() {
423		const ITEM_COUNT: usize = 100000;
424
425		let mut rng = ChaCha8Rng::seed_from_u64(0xDEADBEEF);
426		let mut vec = FaVec::<i64, TEST_BLOCK_SIZE>::new();
427		let mut keys = Vec::<FaVecIndex<TEST_BLOCK_SIZE>>::new();
428
429		const INITIAL_ITEM_COUNT: usize = ITEM_COUNT * 2;
430		for total_items in 0..INITIAL_ITEM_COUNT {
431			let new_value = rng.random::<i64>();
432			let new_key = vec.push(new_value);
433			keys.push(new_key);
434
435			assert_eq!(
436				vec.capacity(),
437				TEST_BLOCK_SIZE * (1 + total_items / TEST_BLOCK_SIZE)
438			);
439		}
440
441		const HALF_ITEM_COUNT: usize = ITEM_COUNT / 2;
442		for _ in 0..(ITEM_COUNT + HALF_ITEM_COUNT) {
443			let key_index = rng.random_range(0..keys.len());
444			let key = keys.swap_remove(key_index);
445			vec.remove(&key);
446
447			//TODO: it's impossible to assess capacity in-loop, due to the random remove order.  Adjust/implement test(s) to account for this
448		}
449
450		for _ in 0..HALF_ITEM_COUNT {
451			let new_value = rng.random::<i64>();
452			let new_key = vec.push(new_value);
453			keys.push(new_key);
454		}
455
456		//this output should visually prove there's a random-ish (But generally increasing) order to inserted indexes
457		//it's noisy, but worth looking at if there's an issue
458		//println!("indices inserted: {:?}",keys);
459
460		assert_eq!(vec.len(), keys.len());
461		validate_vec_properties(&mut vec);
462	}
463
464	#[test]
465	fn fa_vec_properties_test_add_remove_repeat() {
466		const ITEM_COUNT: usize = 100000;
467
468		let mut rng = ChaCha8Rng::seed_from_u64(0xDEADBEEF);
469		let mut vec = FaVec::<i64, TEST_BLOCK_SIZE>::new();
470		let mut keys = Vec::<FaVecIndex<TEST_BLOCK_SIZE>>::new();
471
472		for _ in 0..2 * ITEM_COUNT {
473			let new_value = rng.random::<i64>();
474			let new_key = vec.push(new_value);
475			keys.push(new_key);
476		}
477
478		for _ in 0..10 {
479			for _ in 0..ITEM_COUNT {
480				let key_index = rng.random_range(0..keys.len());
481				let key = keys.swap_remove(key_index);
482				vec.remove(&key);
483			}
484
485			for _ in 0..ITEM_COUNT {
486				let new_value = rng.random::<i64>();
487				let new_key = vec.push(new_value);
488				keys.push(new_key);
489			}
490		}
491
492		/*
493		//Using this final remove step to end with N values causes a bell curve distribution
494		//This is valid as a result, but useless from a testing perspective
495		//this is because I cannot choose what address to remove from, only which one I pick when pushing new data
496		for i in 0..ITEM_COUNT{
497			let key_index=rng.gen_range(0..keys.len());
498			let key= keys.swap_remove(key_index);
499			vec.remove(&key);
500		}
501		// */
502
503		//this output should visually prove there's a random-ish (But generally increasing) order to inserted indexes
504		//it's noisy, but worth looking at if there's an issue
505		//println!("indices inserted: {:?}",keys);
506
507		assert_eq!(vec.len(), keys.len());
508		validate_vec_properties(&mut vec);
509	}
510
511	#[test]
512	fn iterator_basic() {
513		let mut vec = FaVec::<i64, TEST_BLOCK_SIZE>::new();
514
515		let _first = vec.push(5);
516		let _second = vec.push(7);
517		let _third = vec.push(2);
518
519		vec.remove(&_second);
520
521		let vals = vec.iter().copied().collect::<Vec<i64>>();
522
523		assert_eq!(vals, vec![5, 2]);
524		assert_eq!(vec.len(), 2);
525		validate_vec_properties(&mut vec);
526	}
527
528	#[test]
529	fn ensure_correct_prune_on_block_edge() {
530		let mut vec = FaVec::<i64, TEST_BLOCK_SIZE>::new();
531
532		let mut last_block = FaVecIndex::from_absolute_index(0);
533		for i in 0..(TEST_BLOCK_SIZE + 1) {
534			last_block = vec.push(i as i64);
535		}
536
537		//previously, pruning was not correctly occurring when the end block became empty and no other blocks were vacant
538		vec.remove(&last_block);
539		//the free space map index tracking "completely vacant" blocks was being left as an empty set, instead of removed entirely, by the prune pass
540		//the application would then panic when trying to insert, due to not expecting a state where the set existed but was empty when later trying to insert
541		//the logic for finding where to insert expects that all tracked vacancy counts have at least one associated block, and becomes more complicated if it has to handle empty sets
542		//TODO: consider explicit test for auditing this assumption in the validate function below, though not sure it's relevant to other cases
543		vec.push((TEST_BLOCK_SIZE + 1) as i64);
544
545		assert_eq!(vec.len(), TEST_BLOCK_SIZE + 1);
546		validate_vec_properties(&mut vec);
547	}
548
549	fn validate_vec_properties(vec: &mut FaVec<i64, TEST_BLOCK_SIZE>) {
550		//assert that actual data matches free_space
551		//assert that free space map matches free_space
552		//statistically, most blocks should be pretty full, or pretty empty
553		//statistically, full blocks should be mostly low-index blocks
554
555		let mut free_space_buckets = BTreeMap::new();
556		for (block_index, block) in vec.data_blocks.iter().enumerate() {
557			let total_free_blocks = block
558				.data
559				.iter()
560				.filter(|item| -> bool { item.is_none() })
561				.count();
562			assert_eq!(total_free_blocks, block.free_space);
563
564			assert!(vec.free_space_map.contains_key(&block.free_space));
565			assert!(vec.free_space_map[&block.free_space].contains(&block_index));
566
567			match free_space_buckets.get_mut(&block.free_space) {
568				Some(count) => {
569					*count += 1;
570				}
571				None => {
572					free_space_buckets.insert(block.free_space, 1usize);
573				}
574			}
575		}
576
577		//there should be lots of free or empty blocks, but not as many in between
578		//this vec should have a bathtub curve
579		//TODO: this is just a basic guess approach, but we should ideally investigate how to determine this more rigorously
580		//actually, it seems like rather than forming a bathtub, we do aggressively push towards keeping blocks full
581		//barring pathologic bad remove behavior, we see a decreasing trend of free space vs count
582		//TODO: reassess metrics
583		let free_space_counts: Vec<usize> = free_space_buckets.values().copied().collect();
584		let least_free_block_count = *free_space_counts.first().unwrap();
585		let most_free_block_count = *free_space_counts.last().unwrap();
586		let middle_free_block_count = free_space_counts[free_space_counts.len() / 2];
587
588		println!("free space map: {:?}", vec.free_space_map);
589		println!("free count vec: {:?}", free_space_counts);
590		println!(
591			"least/middle/most free: {} - {} - {}",
592			least_free_block_count, middle_free_block_count, most_free_block_count
593		);
594		assert!(least_free_block_count >= middle_free_block_count);
595		//assert!(most_free_block_count >= middle_free_block_count);
596
597		//also, we should have more full blocks than empty ones
598		//TODO: linear regression over the whole list should give a negative slope, instead of this basic comparison
599		assert!(least_free_block_count >= most_free_block_count);
600	}
601}