1use crate::{
2 allocator::{Allocator, Kind},
3 block::Block,
4 mapping::MappedRange,
5 memory::Memory,
6 AtomSize, Size,
7};
8
9use bit_set::BitSet;
10use hal::{device::Device as _, Backend};
11use slab::Slab;
12
13use std::{
14 collections::{BTreeSet, HashMap},
15 hash::BuildHasherDefault,
16 mem,
17 ops::Range,
18 ptr::NonNull,
19 sync::Arc,
20 thread,
21};
22
23fn max_chunks_per_size() -> usize {
25 (mem::size_of::<usize>() * 8).pow(4)
26}
27
28#[derive(Debug)]
30pub struct GeneralBlock<B: Backend> {
31 block_index: u32,
32 chunk_index: u32,
33 count: u32,
34 memory: Arc<Memory<B>>,
35 ptr: Option<NonNull<u8>>,
36 range: Range<Size>,
37}
38
39unsafe impl<B: Backend> Send for GeneralBlock<B> {}
40unsafe impl<B: Backend> Sync for GeneralBlock<B> {}
41
42impl<B: Backend> GeneralBlock<B> {
43 pub fn size(&self) -> Size {
45 self.range.end - self.range.start
46 }
47}
48
49impl<B: Backend> Block<B> for GeneralBlock<B> {
50 fn properties(&self) -> hal::memory::Properties {
51 self.memory.properties()
52 }
53
54 fn memory(&self) -> &B::Memory {
55 self.memory.raw()
56 }
57
58 fn segment(&self) -> hal::memory::Segment {
59 hal::memory::Segment {
60 offset: self.range.start,
61 size: Some(self.range.end - self.range.start),
62 }
63 }
64
65 fn map<'a>(
66 &'a mut self,
67 _device: &B::Device,
68 segment: hal::memory::Segment,
69 ) -> Result<MappedRange<'a, B>, hal::device::MapError> {
70 let requested_range = crate::segment_to_sub_range(segment, &self.range)?;
71 let mapping_range = match self.memory.non_coherent_atom_size {
72 Some(atom) => crate::align_range(&requested_range, atom),
73 None => requested_range.clone(),
74 };
75
76 Ok(unsafe {
77 MappedRange::from_raw(
78 &*self.memory,
79 self.ptr
80 .ok_or(hal::device::MapError::MappingFailed)?
81 .as_ptr()
82 .offset((mapping_range.start - self.range.start) as isize),
83 mapping_range,
84 requested_range,
85 )
86 })
87 }
88}
89
90#[derive(Clone, Copy, Debug)]
92pub struct GeneralConfig {
93 pub block_size_granularity: Size,
95
96 pub max_chunk_size: Size,
99
100 pub min_device_allocation: Size,
102}
103
104#[derive(Debug)]
108pub struct GeneralAllocator<B: Backend> {
109 memory_type: hal::MemoryTypeId,
111
112 memory_properties: hal::memory::Properties,
114
115 block_size_granularity: Size,
117
118 max_chunk_size: Size,
120
121 min_device_allocation: Size,
123
124 sizes: HashMap<Size, SizeEntry<B>, BuildHasherDefault<fxhash::FxHasher>>,
126
127 chunks: BTreeSet<Size>,
129
130 non_coherent_atom_size: Option<AtomSize>,
131}
132
133unsafe impl<B: Backend> Send for GeneralAllocator<B> {}
135unsafe impl<B: Backend> Sync for GeneralAllocator<B> {}
136
137#[derive(Debug)]
138struct SizeEntry<B: Backend> {
139 ready_chunks: BitSet,
141
142 chunks: Slab<Chunk<B>>,
144}
145
146impl<B: Backend> Default for SizeEntry<B> {
147 fn default() -> Self {
148 SizeEntry {
149 chunks: Slab::new(),
150 ready_chunks: BitSet::new(),
151 }
152 }
153}
154
155impl<B: Backend> SizeEntry<B> {
156 fn next_block_count(&self, block_size: Size) -> u32 {
157 self.chunks.iter().fold(1u32, |max, (_, chunk)| {
158 (chunk.num_blocks(block_size) as u32).max(max)
159 }) * 2
160 }
161}
162
163type BlockMask = u64;
165
166const MIN_BLOCKS_PER_CHUNK: u32 = 8;
167const MAX_BLOCKS_PER_CHUNK: u32 = mem::size_of::<BlockMask>() as u32 * 8;
168const LARGE_BLOCK_THRESHOLD: Size = 0x10000;
169
170#[test]
171fn test_constants() {
172 assert!(MIN_BLOCKS_PER_CHUNK < MAX_BLOCKS_PER_CHUNK);
173 assert!(LARGE_BLOCK_THRESHOLD * 2 >= MIN_BLOCKS_PER_CHUNK as Size);
174}
175
176impl<B: Backend> GeneralAllocator<B> {
177 pub fn new(
181 memory_type: hal::MemoryTypeId,
182 memory_properties: hal::memory::Properties,
183 config: GeneralConfig,
184 non_coherent_atom_size: Size,
185 ) -> Self {
186 log::trace!(
187 "Create new allocator: type: '{:?}', properties: '{:#?}' config: '{:#?}'",
188 memory_type,
189 memory_properties,
190 config
191 );
192
193 assert!(
194 config.block_size_granularity.is_power_of_two(),
195 "Allocation granularity must be power of two"
196 );
197 assert!(
198 config.max_chunk_size.is_power_of_two(),
199 "Max chunk size must be power of two"
200 );
201
202 assert!(
203 config.min_device_allocation.is_power_of_two(),
204 "Min device allocation must be power of two"
205 );
206
207 assert!(
208 config.min_device_allocation <= config.max_chunk_size,
209 "Min device allocation must be less than or equalt to max chunk size"
210 );
211
212 let (block_size_granularity, non_coherent_atom_size) =
213 if crate::is_non_coherent_visible(memory_properties) {
214 let granularity = non_coherent_atom_size
215 .max(config.block_size_granularity)
216 .next_power_of_two();
217 (granularity, AtomSize::new(non_coherent_atom_size))
218 } else {
219 (config.block_size_granularity, None)
220 };
221
222 GeneralAllocator {
223 memory_type,
224 memory_properties,
225 block_size_granularity,
226 max_chunk_size: config.max_chunk_size,
227 min_device_allocation: config.min_device_allocation,
228 sizes: HashMap::default(),
229 chunks: BTreeSet::new(),
230 non_coherent_atom_size,
231 }
232 }
233
234 pub fn max_sub_block_size(&self) -> Size {
236 self.max_chunk_size / MIN_BLOCKS_PER_CHUNK as Size
237 }
238
239 fn alloc_chunk_from_device(
241 &self,
242 device: &B::Device,
243 block_size: Size,
244 count: u32,
245 ) -> Result<Chunk<B>, hal::device::AllocationError> {
246 log::trace!(
247 "Allocate chunk with {} blocks size {} from device",
248 count,
249 block_size
250 );
251
252 let (memory, ptr) = unsafe {
253 super::allocate_memory_helper(
254 device,
255 self.memory_type,
256 block_size * count as Size,
257 self.memory_properties,
258 self.non_coherent_atom_size,
259 )?
260 };
261
262 Ok(Chunk::from_memory(block_size, memory, ptr))
263 }
264
265 fn alloc_chunk(
269 &mut self,
270 device: &B::Device,
271 block_size: Size,
272 requested_count: u32,
273 ) -> Result<(Chunk<B>, Size), hal::device::AllocationError> {
274 log::trace!(
275 "Allocate chunk for roughly {} blocks of size {}",
276 requested_count,
277 block_size
278 );
279
280 let min_chunk_size = MIN_BLOCKS_PER_CHUNK as Size * block_size;
281 let max_chunk_size = MAX_BLOCKS_PER_CHUNK as Size * block_size;
282 let clamped_count = requested_count
283 .next_power_of_two() .max(MIN_BLOCKS_PER_CHUNK)
285 .min(MAX_BLOCKS_PER_CHUNK);
286 let requested_chunk_size = clamped_count as Size * block_size;
287
288 if min_chunk_size > self.max_sub_block_size() {
290 let chunk = self.alloc_chunk_from_device(device, block_size, clamped_count)?;
292 return Ok((chunk, requested_chunk_size));
293 }
294
295 let (block, allocated) = match self
296 .chunks
297 .range(min_chunk_size..=max_chunk_size)
298 .rfind(|&size| size % block_size == 0)
299 {
300 Some(&chunk_size) => {
301 self.alloc_from_entry(device, chunk_size, 1, block_size)?
303 }
304 None if requested_chunk_size > self.min_device_allocation => {
305 let chunk = self.alloc_chunk_from_device(device, block_size, clamped_count)?;
309 return Ok((chunk, requested_chunk_size));
310 }
311 None => {
312 self.alloc_block(device, requested_chunk_size, block_size)?
314 }
315 };
316
317 Ok((Chunk::from_block(block_size, block), allocated))
318 }
319
320 fn alloc_from_chunk(
322 chunks: &mut Slab<Chunk<B>>,
323 chunk_index: usize,
324 block_size: Size,
325 count: u32,
326 align: Size,
327 ) -> Option<GeneralBlock<B>> {
328 log::trace!(
329 "Allocate {} consecutive blocks of size {} from chunk {}",
330 count,
331 block_size,
332 chunk_index
333 );
334
335 let chunk = &mut chunks[chunk_index];
336 let block_index = chunk.acquire_blocks(count, block_size, align)?;
337 let block_range = chunk.blocks_range(block_size, block_index, count);
338
339 let block_start = block_range.start;
340 debug_assert_eq!((block_range.end - block_start) % count as Size, 0);
341
342 Some(GeneralBlock {
343 range: block_range,
344 memory: Arc::clone(chunk.shared_memory()),
345 block_index,
346 chunk_index: chunk_index as u32,
347 count,
348 ptr: chunk.mapping_ptr().map(|ptr| unsafe {
349 let offset = (block_start - chunk.range().start) as isize;
350 NonNull::new_unchecked(ptr.as_ptr().offset(offset))
351 }),
352 })
353 }
354
355 fn alloc_from_entry(
359 &mut self,
360 device: &B::Device,
361 block_size: Size,
362 count: u32,
363 align: Size,
364 ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
365 log::trace!(
366 "Allocate {} consecutive blocks for size {} from the entry",
367 count,
368 block_size
369 );
370
371 debug_assert!(count < MIN_BLOCKS_PER_CHUNK);
372 debug_assert_eq!(
373 block_size % align,
374 0,
375 "Requested entry block size {} is not aligned to {}",
376 block_size,
377 align
378 );
379 let size_entry = self.sizes.entry(block_size).or_default();
380
381 for chunk_index in (&size_entry.ready_chunks).iter() {
382 if let Some(block) = Self::alloc_from_chunk(
383 &mut size_entry.chunks,
384 chunk_index,
385 block_size,
386 count,
387 align,
388 ) {
389 return Ok((block, 0));
390 }
391 }
392
393 if size_entry.chunks.vacant_entry().key() > max_chunks_per_size() {
394 return Err(hal::device::OutOfMemory::Host.into());
395 }
396
397 let estimated_block_count = size_entry.next_block_count(block_size);
400 let (chunk, allocated) = self.alloc_chunk(device, block_size, estimated_block_count)?;
401 log::trace!("\tChunk init mask: 0x{:x}", chunk.blocks);
402 let size_entry = self.sizes.entry(block_size).or_default();
403 let chunk_index = size_entry.chunks.insert(chunk);
404
405 let block = Self::alloc_from_chunk(
406 &mut size_entry.chunks,
407 chunk_index,
408 block_size,
409 count,
410 align,
411 )
412 .expect("New chunk should yield blocks");
413
414 if !size_entry.chunks[chunk_index].is_exhausted() {
415 size_entry.ready_chunks.insert(chunk_index);
416 }
417
418 Ok((block, allocated))
419 }
420
421 fn alloc_block(
423 &mut self,
424 device: &B::Device,
425 block_size: Size,
426 align: Size,
427 ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
428 log::trace!("Allocate block of size {}", block_size);
429
430 debug_assert_eq!(
431 block_size & (self.block_size_granularity - 1),
432 0,
433 "Requested block size {} is not aligned to the size granularity {}",
434 block_size,
435 self.block_size_granularity
436 );
437 debug_assert_eq!(
438 block_size % align,
439 0,
440 "Requested block size {} is not aligned to {}",
441 block_size,
442 align
443 );
444 let size_entry = self.sizes.entry(block_size).or_default();
445
446 let overhead = (MIN_BLOCKS_PER_CHUNK - 1) / size_entry.next_block_count(block_size);
447 if overhead >= 1 && block_size >= LARGE_BLOCK_THRESHOLD {
448 let ideal_chunk_size = crate::align_size(
451 block_size * 2 / MIN_BLOCKS_PER_CHUNK as Size,
452 crate::AtomSize::new(align).unwrap(),
453 );
454 let chunk_size = match self
455 .chunks
456 .range(ideal_chunk_size..block_size * overhead as Size)
457 .find(|&size| size % align == 0)
458 {
459 Some(&size) => size,
460 None => {
461 self.chunks.insert(ideal_chunk_size);
462 ideal_chunk_size
463 }
464 };
465
466 self.alloc_from_entry(
467 device,
468 chunk_size,
469 ((block_size - 1) / chunk_size + 1) as u32,
470 align,
471 )
472 } else {
473 self.chunks.insert(block_size);
474 self.alloc_from_entry(device, block_size, 1, align)
475 }
476 }
477
478 fn free_chunk(&mut self, device: &B::Device, chunk: Chunk<B>, block_size: Size) -> Size {
479 log::trace!("Free chunk: {:#?}", chunk);
480 assert!(chunk.is_unused(block_size));
481 match chunk.flavor {
482 ChunkFlavor::Dedicated { memory, .. } => {
483 let size = memory.size();
484 match Arc::try_unwrap(memory) {
485 Ok(mem) => unsafe {
486 if mem.is_mappable() {
487 device.unmap_memory(mem.raw());
488 }
489 device.free_memory(mem.into_raw());
490 },
491 Err(_) => {
492 log::error!("Allocated `Chunk` was freed, but memory is still shared and never will be destroyed");
493 }
494 }
495 size
496 }
497 ChunkFlavor::General(block) => self.free(device, block),
498 }
499 }
500
501 fn free_block(&mut self, device: &B::Device, block: GeneralBlock<B>) -> Size {
502 log::trace!("Free block: {:#?}", block);
503
504 let block_size = block.size() / block.count as Size;
505 let size_entry = self
506 .sizes
507 .get_mut(&block_size)
508 .expect("Unable to get size entry from which block was allocated");
509 let chunk_index = block.chunk_index as usize;
510 let chunk = &mut size_entry.chunks[chunk_index];
511 let block_index = block.block_index;
512 let count = block.count;
513
514 chunk.release_blocks(block_index, count);
515 if chunk.is_unused(block_size) {
516 size_entry.ready_chunks.remove(chunk_index);
517 let chunk = size_entry.chunks.remove(chunk_index);
518 drop(block); self.free_chunk(device, chunk, block_size)
520 } else {
521 size_entry.ready_chunks.insert(chunk_index);
522 0
523 }
524 }
525
526 pub fn clear(&mut self, _device: &B::Device) -> Size {
528 0
529 }
530}
531
532impl<B: Backend> Allocator<B> for GeneralAllocator<B> {
533 type Block = GeneralBlock<B>;
534
535 const KIND: Kind = Kind::General;
536
537 fn alloc(
538 &mut self,
539 device: &B::Device,
540 size: Size,
541 align: Size,
542 ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
543 debug_assert!(align.is_power_of_two());
544 let aligned_size = ((size - 1) | (align - 1) | (self.block_size_granularity - 1)) + 1;
545 let map_aligned_size = match self.non_coherent_atom_size {
546 Some(atom) => crate::align_size(aligned_size, atom),
547 None => aligned_size,
548 };
549
550 log::trace!(
551 "Allocate general block: size: {}, align: {}, aligned size: {}, type: {}",
552 size,
553 align,
554 map_aligned_size,
555 self.memory_type.0
556 );
557
558 self.alloc_block(device, map_aligned_size, align)
559 }
560
561 fn free(&mut self, device: &B::Device, block: GeneralBlock<B>) -> Size {
562 self.free_block(device, block)
563 }
564}
565
566impl<B: Backend> Drop for GeneralAllocator<B> {
567 fn drop(&mut self) {
568 for (index, size) in self.sizes.drain() {
569 if !thread::panicking() {
570 assert_eq!(size.chunks.len(), 0, "SizeEntry({}) is still used", index);
571 } else {
572 log::error!("Memory leak: SizeEntry({}) is still used", index);
573 }
574 }
575 }
576}
577
578#[derive(Debug)]
580enum ChunkFlavor<B: Backend> {
581 Dedicated {
583 memory: Arc<Memory<B>>,
584 ptr: Option<NonNull<u8>>,
585 },
586 General(GeneralBlock<B>),
588}
589
590#[derive(Debug)]
591struct Chunk<B: Backend> {
592 flavor: ChunkFlavor<B>,
593 blocks: BlockMask,
595}
596
597impl<B: Backend> Chunk<B> {
598 fn from_memory(block_size: Size, memory: Memory<B>, ptr: Option<NonNull<u8>>) -> Self {
599 let blocks = memory.size() / block_size;
600 debug_assert!(blocks <= MAX_BLOCKS_PER_CHUNK as Size);
601
602 let high_bit = 1 << (blocks - 1);
603
604 Chunk {
605 flavor: ChunkFlavor::Dedicated {
606 memory: Arc::new(memory),
607 ptr,
608 },
609 blocks: (high_bit - 1) | high_bit,
610 }
611 }
612
613 fn from_block(block_size: Size, chunk_block: GeneralBlock<B>) -> Self {
614 let blocks = (chunk_block.size() / block_size).min(MAX_BLOCKS_PER_CHUNK as Size);
615
616 let high_bit = 1 << (blocks - 1);
617
618 Chunk {
619 flavor: ChunkFlavor::General(chunk_block),
620 blocks: (high_bit - 1) | high_bit,
621 }
622 }
623
624 fn shared_memory(&self) -> &Arc<Memory<B>> {
625 match self.flavor {
626 ChunkFlavor::Dedicated { ref memory, .. } => memory,
627 ChunkFlavor::General(ref block) => &block.memory,
628 }
629 }
630
631 fn range(&self) -> Range<Size> {
632 match self.flavor {
633 ChunkFlavor::Dedicated { ref memory, .. } => 0..memory.size(),
634 ChunkFlavor::General(ref block) => block.range.clone(),
635 }
636 }
637
638 fn blocks_range(&self, block_size: Size, block_index: u32, count: u32) -> Range<Size> {
640 let range = self.range();
641 let start = range.start + block_size * block_index as Size;
642 let end = start + block_size * count as Size;
643 debug_assert!(end <= range.end);
644 start..end
645 }
646
647 fn num_blocks(&self, block_size: Size) -> Size {
648 let range = self.range();
649 ((range.end - range.start) / block_size).min(MAX_BLOCKS_PER_CHUNK as Size)
650 }
651
652 fn is_unused(&self, block_size: Size) -> bool {
654 let blocks = self.num_blocks(block_size);
655
656 let high_bit = 1 << (blocks - 1);
657 let mask = (high_bit - 1) | high_bit;
658
659 debug_assert!(self.blocks <= mask);
660 self.blocks == mask
661 }
662
663 fn is_exhausted(&self) -> bool {
665 self.blocks == 0
666 }
667
668 fn acquire_blocks(&mut self, count: u32, block_size: Size, align: Size) -> Option<u32> {
669 debug_assert!(count > 0 && count <= MAX_BLOCKS_PER_CHUNK);
670
671 let mut blocks: BlockMask = !0;
673 for i in 0..count {
674 blocks &= self.blocks >> i;
675 }
676 while blocks != 0 {
678 let index = blocks.trailing_zeros();
679 blocks ^= 1 << index;
680
681 if (index as Size * block_size) & (align - 1) == 0 {
682 let mask = ((1 << count) - 1) << index;
683 debug_assert_eq!(self.blocks & mask, mask);
684 self.blocks ^= mask;
685 log::trace!(
686 "Chunk acquired at {}, mask: 0x{:x} -> 0x{:x}",
687 index,
688 mask,
689 self.blocks
690 );
691 return Some(index);
692 }
693 }
694 None
695 }
696
697 fn release_blocks(&mut self, index: u32, count: u32) {
698 debug_assert!(index + count <= MAX_BLOCKS_PER_CHUNK);
699 let mask = ((1 << count) - 1) << index;
700 debug_assert_eq!(self.blocks & mask, 0);
701 self.blocks |= mask;
702 log::trace!(
703 "Chunk released at {}, mask: 0x{:x} -> 0x{:x}",
704 index,
705 mask,
706 self.blocks
707 );
708 }
709
710 fn mapping_ptr(&self) -> Option<NonNull<u8>> {
711 match self.flavor {
712 ChunkFlavor::Dedicated { ptr, .. } => ptr,
713 ChunkFlavor::General(ref block) => block.ptr,
714 }
715 }
716}