1use crate::{
2 allocator::{Allocator, Kind},
3 block::Block,
4 mapping::MappedRange,
5 memory::Memory,
6 AtomSize, Size,
7};
8use hal::{device::Device as _, Backend};
9use hibitset::{BitSet, BitSetLike as _};
10use std::{
11 collections::{BTreeSet, HashMap},
12 hash::BuildHasherDefault,
13 mem,
14 ops::Range,
15 ptr::NonNull,
16 sync::Arc,
17 thread,
18};
19
20fn max_chunks_per_size() -> usize {
22 (mem::size_of::<usize>() * 8).pow(4)
23}
24
25#[derive(Debug)]
27pub struct GeneralBlock<B: Backend> {
28 block_index: u32,
29 chunk_index: u32,
30 count: u32,
31 memory: Arc<Memory<B>>,
32 ptr: Option<NonNull<u8>>,
33 range: Range<Size>,
34}
35
36unsafe impl<B: Backend> Send for GeneralBlock<B> {}
37unsafe impl<B: Backend> Sync for GeneralBlock<B> {}
38
39impl<B: Backend> GeneralBlock<B> {
40 pub fn size(&self) -> Size {
42 self.range.end - self.range.start
43 }
44}
45
46impl<B: Backend> Block<B> for GeneralBlock<B> {
47 fn properties(&self) -> hal::memory::Properties {
48 self.memory.properties()
49 }
50
51 fn memory(&self) -> &B::Memory {
52 self.memory.raw()
53 }
54
55 fn segment(&self) -> hal::memory::Segment {
56 hal::memory::Segment {
57 offset: self.range.start,
58 size: Some(self.range.end - self.range.start),
59 }
60 }
61
62 fn map<'a>(
63 &'a mut self,
64 _device: &B::Device,
65 segment: hal::memory::Segment,
66 ) -> Result<MappedRange<'a, B>, hal::device::MapError> {
67 let requested_range = crate::segment_to_sub_range(segment, &self.range)?;
68 let mapping_range = match self.memory.non_coherent_atom_size {
69 Some(atom) => crate::align_range(&requested_range, atom),
70 None => requested_range.clone(),
71 };
72
73 Ok(unsafe {
74 MappedRange::from_raw(
75 &*self.memory,
76 self.ptr
77 .ok_or(hal::device::MapError::MappingFailed)?
78 .as_ptr()
79 .offset((mapping_range.start - self.range.start) as isize),
80 mapping_range,
81 requested_range,
82 )
83 })
84 }
85}
86
87#[derive(Clone, Copy, Debug)]
89pub struct GeneralConfig {
90 pub block_size_granularity: Size,
92
93 pub max_chunk_size: Size,
96
97 pub min_device_allocation: Size,
99}
100
101#[derive(Debug)]
105pub struct GeneralAllocator<B: Backend> {
106 memory_type: hal::MemoryTypeId,
108
109 memory_properties: hal::memory::Properties,
111
112 block_size_granularity: Size,
114
115 max_chunk_size: Size,
117
118 min_device_allocation: Size,
120
121 sizes: HashMap<Size, SizeEntry<B>, BuildHasherDefault<fxhash::FxHasher>>,
123
124 chunks: BTreeSet<Size>,
126
127 non_coherent_atom_size: Option<AtomSize>,
128}
129
130unsafe impl<B: Backend> Send for GeneralAllocator<B> {}
132unsafe impl<B: Backend> Sync for GeneralAllocator<B> {}
133
134#[derive(Debug)]
135struct SizeEntry<B: Backend> {
136 total_blocks: Size,
138
139 ready_chunks: BitSet,
141
142 chunks: slab::Slab<Chunk<B>>,
144}
145
146impl<B: Backend> Default for SizeEntry<B> {
147 fn default() -> Self {
148 SizeEntry {
149 chunks: Default::default(),
150 total_blocks: 0,
151 ready_chunks: Default::default(),
152 }
153 }
154}
155
156type BlockMask = u64;
158
159const MIN_BLOCKS_PER_CHUNK: u32 = 8;
160const MAX_BLOCKS_PER_CHUNK: u32 = mem::size_of::<BlockMask>() as u32 * 8;
161const LARGE_BLOCK_THRESHOLD: Size = 0x10000;
162
163#[test]
164fn test_constants() {
165 assert!(MIN_BLOCKS_PER_CHUNK < MAX_BLOCKS_PER_CHUNK);
166 assert!(LARGE_BLOCK_THRESHOLD * 2 >= MIN_BLOCKS_PER_CHUNK as Size);
167}
168
169impl<B: Backend> GeneralAllocator<B> {
170 pub fn new(
174 memory_type: hal::MemoryTypeId,
175 memory_properties: hal::memory::Properties,
176 config: GeneralConfig,
177 non_coherent_atom_size: Size,
178 ) -> Self {
179 log::trace!(
180 "Create new allocator: type: '{:?}', properties: '{:#?}' config: '{:#?}'",
181 memory_type,
182 memory_properties,
183 config
184 );
185
186 assert!(
187 config.block_size_granularity.is_power_of_two(),
188 "Allocation granularity must be power of two"
189 );
190 assert!(
191 config.max_chunk_size.is_power_of_two(),
192 "Max chunk size must be power of two"
193 );
194
195 assert!(
196 config.min_device_allocation.is_power_of_two(),
197 "Min device allocation must be power of two"
198 );
199
200 assert!(
201 config.min_device_allocation <= config.max_chunk_size,
202 "Min device allocation must be less than or equalt to max chunk size"
203 );
204
205 let (block_size_granularity, non_coherent_atom_size) =
206 if crate::is_non_coherent_visible(memory_properties) {
207 let granularity = non_coherent_atom_size
208 .max(config.block_size_granularity)
209 .next_power_of_two();
210 (granularity, AtomSize::new(non_coherent_atom_size))
211 } else {
212 (config.block_size_granularity, None)
213 };
214
215 GeneralAllocator {
216 memory_type,
217 memory_properties,
218 block_size_granularity,
219 max_chunk_size: config.max_chunk_size,
220 min_device_allocation: config.min_device_allocation,
221 sizes: HashMap::default(),
222 chunks: BTreeSet::new(),
223 non_coherent_atom_size,
224 }
225 }
226
227 pub fn max_sub_block_size(&self) -> Size {
229 self.max_chunk_size / MIN_BLOCKS_PER_CHUNK as Size
230 }
231
232 fn alloc_chunk_from_device(
234 &self,
235 device: &B::Device,
236 block_size: Size,
237 count: u32,
238 ) -> Result<Chunk<B>, hal::device::AllocationError> {
239 log::trace!(
240 "Allocate chunk with {} blocks size {} from device",
241 count,
242 block_size
243 );
244
245 let (memory, ptr) = unsafe {
246 super::allocate_memory_helper(
247 device,
248 self.memory_type,
249 block_size * count as Size,
250 self.memory_properties,
251 self.non_coherent_atom_size,
252 )?
253 };
254
255 Ok(Chunk::from_memory(block_size, memory, ptr))
256 }
257
258 fn alloc_chunk(
262 &mut self,
263 device: &B::Device,
264 block_size: Size,
265 requested_count: u32,
266 ) -> Result<(Chunk<B>, Size), hal::device::AllocationError> {
267 log::trace!(
268 "Allocate chunk for roughly {} blocks of size {}",
269 requested_count,
270 block_size
271 );
272
273 let min_chunk_size = MIN_BLOCKS_PER_CHUNK as Size * block_size;
274 let max_chunk_size = MAX_BLOCKS_PER_CHUNK as Size * block_size;
275 let clamped_count = requested_count
276 .next_power_of_two() .max(MIN_BLOCKS_PER_CHUNK)
278 .min(MAX_BLOCKS_PER_CHUNK);
279 let requested_chunk_size = clamped_count as Size * block_size;
280
281 if min_chunk_size > self.max_sub_block_size() {
283 let chunk = self.alloc_chunk_from_device(device, block_size, clamped_count)?;
285 return Ok((chunk, requested_chunk_size));
286 }
287
288 let (block, allocated) = match self
289 .chunks
290 .range(min_chunk_size..=max_chunk_size)
291 .rfind(|&size| size % block_size == 0)
292 {
293 Some(&chunk_size) => {
294 self.alloc_from_entry(device, chunk_size, 1, block_size)?
296 }
297 None => {
298 self.alloc_block(device, requested_chunk_size, block_size)?
300 }
301 };
302
303 Ok((Chunk::from_block(block_size, block), allocated))
304 }
305
306 fn alloc_from_chunk(
308 chunks: &mut slab::Slab<Chunk<B>>,
309 chunk_index: u32,
310 block_size: Size,
311 count: u32,
312 align: Size,
313 ) -> Option<GeneralBlock<B>> {
314 log::trace!(
315 "Allocate {} consecutive blocks of size {} from chunk {}",
316 count,
317 block_size,
318 chunk_index
319 );
320
321 let chunk = &mut chunks[chunk_index as usize];
322 let block_index = chunk.acquire_blocks(count, block_size, align)?;
323 let block_range = chunk.blocks_range(block_size, block_index, count);
324
325 let block_start = block_range.start;
326 debug_assert_eq!((block_range.end - block_start) % count as Size, 0);
327
328 Some(GeneralBlock {
329 range: block_range,
330 memory: Arc::clone(chunk.shared_memory()),
331 block_index,
332 chunk_index,
333 count,
334 ptr: chunk.mapping_ptr().map(|ptr| unsafe {
335 let offset = (block_start - chunk.range().start) as isize;
336 NonNull::new_unchecked(ptr.as_ptr().offset(offset))
337 }),
338 })
339 }
340
341 fn alloc_from_entry(
345 &mut self,
346 device: &B::Device,
347 block_size: Size,
348 count: u32,
349 align: Size,
350 ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
351 log::trace!(
352 "Allocate {} consecutive blocks for size {} from the entry",
353 count,
354 block_size
355 );
356
357 debug_assert!(count < MIN_BLOCKS_PER_CHUNK);
358 debug_assert_eq!(
359 block_size % align,
360 0,
361 "Requested entry block size {} is not aligned to {}",
362 block_size,
363 align
364 );
365 let size_entry = self.sizes.entry(block_size).or_default();
366
367 for chunk_index in (&size_entry.ready_chunks).iter() {
368 if let Some(block) = Self::alloc_from_chunk(
369 &mut size_entry.chunks,
370 chunk_index,
371 block_size,
372 count,
373 align,
374 ) {
375 return Ok((block, 0));
376 }
377 }
378
379 if size_entry.chunks.vacant_entry().key() > max_chunks_per_size() {
380 return Err(hal::device::OutOfMemory::Host.into());
381 }
382
383 let estimated_block_count = size_entry.total_blocks as u32;
386 let (chunk, allocated) = self.alloc_chunk(device, block_size, estimated_block_count)?;
387 log::trace!("\tChunk init mask: 0x{:x}", chunk.blocks);
388 let size_entry = self.sizes.entry(block_size).or_default();
389 let chunk_index = size_entry.chunks.insert(chunk) as u32;
390
391 let block = Self::alloc_from_chunk(
392 &mut size_entry.chunks,
393 chunk_index,
394 block_size,
395 count,
396 align,
397 )
398 .expect("New chunk should yield blocks");
399
400 if !size_entry.chunks[chunk_index as usize].is_exhausted() {
401 size_entry.ready_chunks.add(chunk_index);
402 }
403
404 Ok((block, allocated))
405 }
406
407 fn alloc_block(
409 &mut self,
410 device: &B::Device,
411 block_size: Size,
412 align: Size,
413 ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
414 log::trace!("Allocate block of size {}", block_size);
415
416 debug_assert_eq!(
417 block_size & (self.block_size_granularity - 1),
418 0,
419 "Requested block size {} is not aligned to the size granularity {}",
420 block_size,
421 self.block_size_granularity
422 );
423 debug_assert_eq!(
424 block_size % align,
425 0,
426 "Requested block size {} is not aligned to {}",
427 block_size,
428 align
429 );
430 let size_entry = self.sizes.entry(block_size).or_default();
431 size_entry.total_blocks += 1;
432
433 let overhead = (MIN_BLOCKS_PER_CHUNK as Size - 1) / size_entry.total_blocks;
434 if overhead >= 1 && block_size >= LARGE_BLOCK_THRESHOLD {
435 let ideal_chunk_size = crate::align_size(
438 block_size * 2 / MIN_BLOCKS_PER_CHUNK as Size,
439 crate::AtomSize::new(align).unwrap(),
440 );
441 let chunk_size = match self
442 .chunks
443 .range(ideal_chunk_size..block_size * overhead)
444 .find(|&size| size % align == 0)
445 {
446 Some(&size) => size,
447 None => {
448 self.chunks.insert(ideal_chunk_size);
449 ideal_chunk_size
450 }
451 };
452
453 self.alloc_from_entry(
454 device,
455 chunk_size,
456 ((block_size - 1) / chunk_size + 1) as u32,
457 align,
458 )
459 } else {
460 self.chunks.insert(block_size);
461 self.alloc_from_entry(device, block_size, 1, align)
462 }
463 }
464
465 fn free_chunk(&mut self, device: &B::Device, chunk: Chunk<B>, block_size: Size) -> Size {
466 log::trace!("Free chunk: {:#?}", chunk);
467 assert!(chunk.is_unused(block_size));
468 match chunk.flavor {
469 ChunkFlavor::Dedicated { memory, .. } => {
470 let size = memory.size();
471 match Arc::try_unwrap(memory) {
472 Ok(mem) => unsafe {
473 if mem.is_mappable() {
474 device.unmap_memory(mem.raw());
475 }
476 device.free_memory(mem.into_raw());
477 },
478 Err(_) => {
479 log::error!("Allocated `Chunk` was freed, but memory is still shared and never will be destroyed");
480 }
481 }
482 size
483 }
484 ChunkFlavor::General(block) => self.free(device, block),
485 }
486 }
487
488 fn free_block(&mut self, device: &B::Device, block: GeneralBlock<B>) -> Size {
489 log::trace!("Free block: {:#?}", block);
490
491 let block_size = block.size() / block.count as Size;
492 let size_entry = self
493 .sizes
494 .get_mut(&block_size)
495 .expect("Unable to get size entry from which block was allocated");
496 let chunk_index = block.chunk_index;
497 let chunk = &mut size_entry.chunks[chunk_index as usize];
498 let block_index = block.block_index;
499 let count = block.count;
500
501 chunk.release_blocks(block_index, count);
502 if chunk.is_unused(block_size) {
503 size_entry.ready_chunks.remove(chunk_index);
504 let chunk = size_entry.chunks.remove(chunk_index as usize);
505 drop(block); self.free_chunk(device, chunk, block_size)
507 } else {
508 size_entry.ready_chunks.add(chunk_index);
509 0
510 }
511 }
512
513 pub fn clear(&mut self, _device: &B::Device) -> Size {
515 0
516 }
517}
518
519impl<B: Backend> Allocator<B> for GeneralAllocator<B> {
520 type Block = GeneralBlock<B>;
521
522 const KIND: Kind = Kind::General;
523
524 fn alloc(
525 &mut self,
526 device: &B::Device,
527 size: Size,
528 align: Size,
529 ) -> Result<(GeneralBlock<B>, Size), hal::device::AllocationError> {
530 debug_assert!(align.is_power_of_two());
531 let aligned_size = ((size - 1) | (align - 1) | (self.block_size_granularity - 1)) + 1;
532 let map_aligned_size = match self.non_coherent_atom_size {
533 Some(atom) => crate::align_size(aligned_size, atom),
534 None => aligned_size,
535 };
536
537 log::trace!(
538 "Allocate general block: size: {}, align: {}, aligned size: {}, type: {}",
539 size,
540 align,
541 map_aligned_size,
542 self.memory_type.0
543 );
544
545 self.alloc_block(device, map_aligned_size, align)
546 }
547
548 fn free(&mut self, device: &B::Device, block: GeneralBlock<B>) -> Size {
549 self.free_block(device, block)
550 }
551}
552
553impl<B: Backend> Drop for GeneralAllocator<B> {
554 fn drop(&mut self) {
555 for (index, size) in self.sizes.drain() {
556 if !thread::panicking() {
557 assert_eq!(size.chunks.len(), 0, "SizeEntry({}) is still used", index);
558 } else {
559 log::error!("Memory leak: SizeEntry({}) is still used", index);
560 }
561 }
562 }
563}
564
565#[derive(Debug)]
567enum ChunkFlavor<B: Backend> {
568 Dedicated {
570 memory: Arc<Memory<B>>,
571 ptr: Option<NonNull<u8>>,
572 },
573 General(GeneralBlock<B>),
575}
576
577#[derive(Debug)]
578struct Chunk<B: Backend> {
579 flavor: ChunkFlavor<B>,
580 blocks: BlockMask,
582}
583
584impl<B: Backend> Chunk<B> {
585 fn from_memory(block_size: Size, memory: Memory<B>, ptr: Option<NonNull<u8>>) -> Self {
586 let blocks = memory.size() / block_size;
587 debug_assert!(blocks <= MAX_BLOCKS_PER_CHUNK as Size);
588
589 let high_bit = 1 << (blocks - 1);
590
591 Chunk {
592 flavor: ChunkFlavor::Dedicated {
593 memory: Arc::new(memory),
594 ptr,
595 },
596 blocks: (high_bit - 1) | high_bit,
597 }
598 }
599
600 fn from_block(block_size: Size, chunk_block: GeneralBlock<B>) -> Self {
601 let blocks = (chunk_block.size() / block_size).min(MAX_BLOCKS_PER_CHUNK as Size);
602
603 let high_bit = 1 << (blocks - 1);
604
605 Chunk {
606 flavor: ChunkFlavor::General(chunk_block),
607 blocks: (high_bit - 1) | high_bit,
608 }
609 }
610
611 fn shared_memory(&self) -> &Arc<Memory<B>> {
612 match self.flavor {
613 ChunkFlavor::Dedicated { ref memory, .. } => memory,
614 ChunkFlavor::General(ref block) => &block.memory,
615 }
616 }
617
618 fn range(&self) -> Range<Size> {
619 match self.flavor {
620 ChunkFlavor::Dedicated { ref memory, .. } => 0..memory.size(),
621 ChunkFlavor::General(ref block) => block.range.clone(),
622 }
623 }
624
625 fn blocks_range(&self, block_size: Size, block_index: u32, count: u32) -> Range<Size> {
627 let range = self.range();
628 let start = range.start + block_size * block_index as Size;
629 let end = start + block_size * count as Size;
630 debug_assert!(end <= range.end);
631 start..end
632 }
633
634 fn is_unused(&self, block_size: Size) -> bool {
636 let range = self.range();
637 let blocks = ((range.end - range.start) / block_size).min(MAX_BLOCKS_PER_CHUNK as Size);
638
639 let high_bit = 1 << (blocks - 1);
640 let mask = (high_bit - 1) | high_bit;
641
642 debug_assert!(self.blocks <= mask);
643 self.blocks == mask
644 }
645
646 fn is_exhausted(&self) -> bool {
648 self.blocks == 0
649 }
650
651 fn acquire_blocks(&mut self, count: u32, block_size: Size, align: Size) -> Option<u32> {
652 debug_assert!(count > 0 && count <= MAX_BLOCKS_PER_CHUNK);
653
654 let mut blocks: BlockMask = !0;
656 for i in 0..count {
657 blocks &= self.blocks >> i;
658 }
659 while blocks != 0 {
661 let index = blocks.trailing_zeros();
662 blocks ^= 1 << index;
663
664 if (index as Size * block_size) & (align - 1) == 0 {
665 let mask = ((1 << count) - 1) << index;
666 debug_assert_eq!(self.blocks & mask, mask);
667 self.blocks ^= mask;
668 log::trace!(
669 "Chunk acquired at {}, mask: 0x{:x} -> 0x{:x}",
670 index,
671 mask,
672 self.blocks
673 );
674 return Some(index);
675 }
676 }
677 None
678 }
679
680 fn release_blocks(&mut self, index: u32, count: u32) {
681 debug_assert!(index + count <= MAX_BLOCKS_PER_CHUNK);
682 let mask = ((1 << count) - 1) << index;
683 debug_assert_eq!(self.blocks & mask, 0);
684 self.blocks |= mask;
685 log::trace!(
686 "Chunk released at {}, mask: 0x{:x} -> 0x{:x}",
687 index,
688 mask,
689 self.blocks
690 );
691 }
692
693 fn mapping_ptr(&self) -> Option<NonNull<u8>> {
694 match self.flavor {
695 ChunkFlavor::Dedicated { ptr, .. } => ptr,
696 ChunkFlavor::General(ref block) => block.ptr,
697 }
698 }
699}